From b37b03b7051493c9f9a6b336c9c0f81334885b7d Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Fri, 25 Nov 2005 17:10:06 -0500 Subject: NFS: Fix a spinlock recursion inside nfs_update_inode() In cases where the server has gone insane, nfs_update_inode() may end up calling nfs_invalidate_inode(), which again calls stuff that takes the inode->i_lock that we're already holding. In addition, given the sort of things we have in NFS these days that need to be cleaned up on inode release, I'm not sure we should ever be calling make_bad_inode(). Fix up spinlock recursion, and limit nfs_invalidate_inode() to clearing the caches, and marking the inode as being stale. Thanks to Steve Dickson <SteveD@redhat.com> for spotting this. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6391d89..aaab1a5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -643,14 +643,11 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) /* * Invalidate the local caches */ -void -nfs_zap_caches(struct inode *inode) +static void nfs_zap_caches_locked(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; - spin_lock(&inode->i_lock); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -659,7 +656,12 @@ nfs_zap_caches(struct inode *inode) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} +void nfs_zap_caches(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } @@ -676,16 +678,13 @@ static void nfs_zap_acl_cache(struct inode *inode) } /* - * Invalidate, but do not unhash, the inode + * Invalidate, but do not unhash, the inode. + * NB: must be called with inode->i_lock held! */ -static void -nfs_invalidate_inode(struct inode *inode) +static void nfs_invalidate_inode(struct inode *inode) { - umode_t save_mode = inode->i_mode; - - make_bad_inode(inode); - inode->i_mode = save_mode; - nfs_zap_caches(inode); + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + nfs_zap_caches_locked(inode); } struct nfs_find_desc { @@ -1528,14 +1527,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); #endif + out_err: /* * No need to worry about unhashing the dentry, as the * lookup validation will know that the inode is bad. * (But we fall through to invalidate the caches.) */ nfs_invalidate_inode(inode); - out_err: - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); return -ESTALE; } -- cgit v1.1 From 223db122bfccd463751d8b0c09a638abee03681d Mon Sep 17 00:00:00 2001 From: Steve Dickson <steved@redhat.com> Date: Wed, 30 Nov 2005 09:25:33 -0500 Subject: NFS: Fix cache consistency regression Make sure cache_change_attribute is initialized to jiffies so when the mtime changes on directory, the directory will be refreshed. Signed-off by: Steve Dickson <steved@redhat.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aaab1a5..b551b19 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2066,6 +2066,7 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; + nfsi->cache_change_attribute = jiffies; #ifdef CONFIG_NFS_V3_ACL nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); -- cgit v1.1 From 24aa1fe6779eaddb3e0b1b802585dcf6faf9cc44 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Sat, 3 Dec 2005 15:20:07 -0500 Subject: NFS: Fix a few further cache consistency regressions Steve Dickson writes: Doing the following: 1. On server: $ mkdir ~/t $ echo Hello > ~/t/tmp 2. On client, wait for a string to appear in this file: $ until grep -q foo t/tmp ; do echo -n . ; sleep 1 ; done 3. On server, create a *new* file with the same name containing that string: $ mv ~/t/tmp ~/t/tmp.old; echo foo > ~/t/tmp will show how the client will never (and I mean never ;-) ) see the updated file. The problem is that we do not update nfsi->cache_change_attribute when the file changes on the server (we only update it when our client makes the changes). This again means that functions like nfs_check_verifier() will fail to register when the parent directory has changed and should trigger a dentry lookup revalidation. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 54 ++++++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b551b19..afd75d0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -54,7 +54,7 @@ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) static void nfs_invalidate_inode(struct inode *); -static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); +static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); @@ -1080,8 +1080,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) int status = -ESTALE; struct nfs_fattr fattr; struct nfs_inode *nfsi = NFS_I(inode); - unsigned long verifier; - unsigned long cache_validity; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1106,8 +1104,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } } - /* Protect against RPC races by saving the change attribute */ - verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -1122,7 +1118,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } spin_lock(&inode->i_lock); - status = nfs_update_inode(inode, &fattr, verifier); + status = nfs_update_inode(inode, &fattr); if (status) { spin_unlock(&inode->i_lock); dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", @@ -1130,20 +1126,11 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } - cache_validity = nfsi->cache_validity; - nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; - - /* - * We may need to keep the attributes marked as invalid if - * we raced with nfs_end_attr_update(). - */ - if (time_after_eq(verifier, nfsi->cache_change_attribute)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); nfs_revalidate_mapping(inode, inode->i_mapping); - if (cache_validity & NFS_INO_INVALID_ACL) + if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", @@ -1346,10 +1333,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return 0; spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; - if (nfs_verify_change_attribute(inode, fattr->time_start)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); if (time_after(fattr->time_start, nfsi->last_updated)) - status = nfs_update_inode(inode, fattr, fattr->time_start); + status = nfs_update_inode(inode, fattr); else status = nfs_check_inode_attributes(inode, fattr); @@ -1375,10 +1360,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; goto out; } - status = nfs_update_inode(inode, fattr, fattr->time_start); - if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); - nfsi->cache_change_attribute = jiffies; + status = nfs_update_inode(inode, fattr); out: spin_unlock(&inode->i_lock); return status; @@ -1396,12 +1378,12 @@ out: * * A very similar scenario holds for the dir cache. */ -static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; unsigned int invalid = 0; - int data_unstable; + int data_stable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -1432,8 +1414,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign nfsi->last_updated = jiffies; /* Are we racing with known updates of the metadata on the server? */ - data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || - (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)); + data_stable = nfs_verify_change_attribute(inode, fattr->time_start); + if (data_stable) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); @@ -1442,7 +1425,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Do we perhaps have any outstanding writes? */ if (nfsi->npages == 0) { /* No, but did we race with nfs_end_data_update()? */ - if (time_after_eq(verifier, nfsi->cache_change_attribute)) { + if (data_stable) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_DATA; } @@ -1451,6 +1434,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + nfsi->cache_change_attribute = jiffies; dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } @@ -1460,8 +1444,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); dprintk("NFS: mtime change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute = jiffies; } if ((fattr->valid & NFS_ATTR_FATTR_V4) @@ -1469,15 +1453,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); nfsi->change_attr = fattr->change_attr; - if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; } /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { - if (!data_unstable) - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + nfsi->cache_change_attribute = jiffies; } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); @@ -1515,6 +1499,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; + if (data_stable) + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; -- cgit v1.1 From 29884df0d89c1df0dec3449405bc41569bb44800 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 13 Dec 2005 16:13:54 -0500 Subject: NFS: Fix another O_DIRECT race Ensure we call unmap_mapping_range() and sync dirty pages to disk before doing an NFS direct write. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index afd75d0..432f41c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -640,6 +640,27 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } +/** + * nfs_sync_mapping - helper to flush all mmapped dirty data to disk + */ +int nfs_sync_mapping(struct address_space *mapping) +{ + int ret; + + if (mapping->nrpages == 0) + return 0; + unmap_mapping_range(mapping, 0, 0, 0); + ret = filemap_fdatawrite(mapping); + if (ret != 0) + goto out; + ret = filemap_fdatawait(mapping); + if (ret != 0) + goto out; + ret = nfs_wb_all(mapping->host); +out: + return ret; +} + /* * Invalidate the local caches */ @@ -1179,11 +1200,8 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(mapping) == 0) - filemap_fdatawait(mapping); - nfs_wb_all(inode); - } + if (S_ISREG(inode->i_mode)) + nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); spin_lock(&inode->i_lock); -- cgit v1.1 From 286d7d6a0cb38d3d4316a1dfea9b0c0fc5a6455b Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 3 Jan 2006 09:55:26 +0100 Subject: NFSv4: Remove requirement for machine creds for the "setclientid" operation Use a cred from the nfs4_client->cl_state_owners list. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 432f41c..740d3cd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1822,23 +1822,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clp->cl_rpcclient = clnt; - clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); - if (IS_ERR(clp->cl_cred)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clp->cl_cred); - clp->cl_cred = NULL; - goto out_fail; - } memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } - if (list_empty(&clp->cl_superblocks)) { - err = nfs4_init_client(clp); - if (err != 0) { - up_write(&clp->cl_sem); - goto out_fail; - } - } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) -- cgit v1.1 From 6b59a75460eb9527145d7bd55068e5d32bee8a44 Mon Sep 17 00:00:00 2001 From: Chuck Lever <cel@netapp.com> Date: Wed, 30 Nov 2005 18:08:19 -0500 Subject: NFS: Fix error recovery code in fs/nfs/inode.c:__init_nfs() Red Hat found a problem in the error recovery logic in __init_nfs. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 740d3cd..da58207 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2152,11 +2152,11 @@ out: #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif - nfs_destroy_writepagecache(); #ifdef CONFIG_NFS_DIRECTIO -out0: nfs_destroy_directcache(); +out0: #endif + nfs_destroy_writepagecache(); out1: nfs_destroy_readpagecache(); out2: -- cgit v1.1 From dc20f803904dbf30f834dcc43c14701dfce32491 Mon Sep 17 00:00:00 2001 From: Chuck Lever <cel@netapp.com> Date: Wed, 30 Nov 2005 18:08:57 -0500 Subject: NFS: get rid of useless kernel log message nfs_statfs() generates a log message when GETATTR returns an error. This is usually a useless message. Make it a dprintk. Test plan: None Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index da58207..cc75392 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -575,11 +575,10 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) buf->f_namelen = server->namelen; out: unlock_kernel(); - return 0; out_err: - printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error); + dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; goto out; -- cgit v1.1 From 325cfed9ae901320e9234b18c21434b783dbe342 Mon Sep 17 00:00:00 2001 From: Chuck Lever <cel@netapp.com> Date: Wed, 30 Nov 2005 18:08:55 -0500 Subject: NFS: make "inode number mismatch" message more useful To help NFS users and server developers, make the "inode number mismatch" message display more useful information. Test-plan: None. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cc75392..4e6558d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1409,14 +1409,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; - if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "%s: inode number mismatch\n" - "expected (%s/0x%Lx), got (%s/0x%Lx)\n", - __FUNCTION__, - inode->i_sb->s_id, (long long)nfsi->fileid, - inode->i_sb->s_id, (long long)fattr->fileid); - goto out_err; - } + if (nfsi->fileid != fattr->fileid) + goto out_fileid; /* * Make sure the inode's type hasn't changed. @@ -1538,6 +1532,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfs_invalidate_inode(inode); return -ESTALE; + + out_fileid: + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->hostname, inode->i_sb->s_id, + (long long)nfsi->fileid, (long long)fattr->fileid); + goto out_err; } /* -- cgit v1.1 From 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff Mon Sep 17 00:00:00 2001 From: Chuck Lever <cel@netapp.com> Date: Wed, 30 Nov 2005 18:09:02 -0500 Subject: NFS: support large reads and writes on the wire Most NFS server implementations allow up to 64KB reads and writes on the wire. The Solaris NFS server allows up to a megabyte, for instance. Now the Linux NFS client supports transfer sizes up to 1MB, too. This will help reduce protocol and context switch overhead on read/write intensive NFS workloads, and support larger atomic read and write operations on servers that support them. Test-plan: Connectathon and iozone on mount point with wsize=rsize>32768 over TCP. Tests with NFS over UDP to verify the maximum RPC payload size cap. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4e6558d..acde2c5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize) static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } @@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, -- cgit v1.1 From 70b9ecbdb9c5fdc731f8780bffd45d9519020c4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 3 Jan 2006 09:55:34 +0100 Subject: NFS: Make stat() return updated mtimes after a write() The SuS states that a call to write() will cause mtime to be updated on the file. In order to satisfy that requirement, we need to flush out any cached writes in nfs_getattr(). Speed things up slightly by not committing the writes. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index acde2c5..2c7f8aa 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -952,6 +952,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; + /* Flush out writes to the server in order to update c/mtime */ + nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); if (__IS_FLG(inode, MS_NOATIME)) need_atime = 0; else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) -- cgit v1.1 From a895b4a198dd06f8353328867e4f6cfd28b63081 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 3 Jan 2006 09:55:40 +0100 Subject: NFS: Clean up weak cache consistency code ...and ensure that nfs_update_inode() respects wcc Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 60 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 20 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2c7f8aa..7270b1d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1248,6 +1248,33 @@ void nfs_end_data_update(struct inode *inode) atomic_dec(&nfsi->data_updates); } +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) { + nfsi->change_attr = fattr->change_attr; + nfsi->cache_change_attribute = jiffies; + } + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + nfsi->cache_change_attribute = jiffies; + } + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + nfsi->cache_change_attribute = jiffies; + } + if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { + inode->i_size = fattr->size; + nfsi->cache_change_attribute = jiffies; + } + } +} + /** * nfs_check_inode_attributes - verify consistency of the inode attribute cache * @inode - pointer to inode @@ -1264,22 +1291,20 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat int data_unstable; + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); - if (fattr->valid & NFS_ATTR_FATTR_V4) { - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 - && nfsi->change_attr == fattr->pre_change_attr) - nfsi->change_attr = fattr->change_attr; - if (nfsi->change_attr != fattr->change_attr) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } - } + /* Do atomic weak cache consistency updates */ + nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR) == 0) { - return 0; + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } /* Has the inode gone and changed behind our back? */ @@ -1291,14 +1316,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - /* If we have atomic WCC data, we may update some attributes */ - if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; @@ -1426,6 +1443,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (data_stable) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do atomic weak cache consistency updates */ + nfs_wcc_update_inode(inode, fattr); + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); -- cgit v1.1 From a72b44222d222749d54b3e370d825094352e389f Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 3 Jan 2006 09:55:41 +0100 Subject: NFSv4: Allow user to set the port used by the NFSv4 callback channel Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7270b1d..648cb1a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -40,6 +40,7 @@ #include <asm/uaccess.h> #include "nfs4_fs.h" +#include "callback.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -2036,6 +2037,21 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -2043,8 +2059,25 @@ static struct file_system_type nfs4_fs_type = { nfsi->delegation_state = 0; \ init_rwsem(&nfsi->rwsem); \ } while(0) -#define register_nfs4fs() register_filesystem(&nfs4_fs_type) -#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) + +static inline int register_nfs4fs(void) +{ + int ret; + + ret = nfs_register_sysctl(); + if (ret != 0) + return ret; + ret = register_filesystem(&nfs4_fs_type); + if (ret != 0) + nfs_unregister_sysctl(); + return ret; +} + +static inline void unregister_nfs4fs(void) +{ + unregister_filesystem(&nfs4_fs_type); + nfs_unregister_sysctl(); +} #else #define nfs4_init_once(nfsi) \ do { } while (0) -- cgit v1.1 From f518e35aec984036903c1003e867f833747a9d79 Mon Sep 17 00:00:00 2001 From: Chuck Lever <cel@netapp.com> Date: Tue, 3 Jan 2006 09:55:52 +0100 Subject: SUNRPC: get rid of cl_chatty Clean up: Every ULP that uses the in-kernel RPC client, except the NLM client, sets cl_chatty. There's no reason why NLM shouldn't set it, so just get rid of cl_chatty and always be verbose. Test-plan: Compile with CONFIG_NFS enabled. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 648cb1a..4625479 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -413,7 +413,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; return clnt; @@ -1838,7 +1837,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); -- cgit v1.1 From 58df095b732529ade8f4051b41d7c29731afecd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Tue, 3 Jan 2006 09:55:57 +0100 Subject: NFSv4: Allow entries in the idmap cache to expire If someone changes the uid/gid mapping in userland, then we do eventually want those changes to be propagated to the kernel. Currently the kernel assumes that it may cache entries forever. Add an expiration time + garbage collector for idmap entries. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4625479..e7bd0d9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2050,6 +2050,20 @@ static int param_set_port(const char *val, struct kernel_param *kp) module_param_call(callback_tcpport, param_set_port, param_get_int, &nfs_callback_set_tcpport, 0644); +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ -- cgit v1.1 From 28fd129827b00e12829d48a5290f46277600619b Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Date: Sun, 8 Jan 2006 01:02:14 -0800 Subject: [PATCH] Fix and add EXPORT_SYMBOL(filemap_write_and_wait) This patch add EXPORT_SYMBOL(filemap_write_and_wait) and use it. See mm/filemap.c: And changes the filemap_write_and_wait() and filemap_write_and_wait_range(). Current filemap_write_and_wait() doesn't wait if filemap_fdatawrite() returns error. However, even if filemap_fdatawrite() returned an error, it may have submitted the partially data pages to the device. (e.g. in the case of -ENOSPC) <quotation> Andrew Morton writes, If filemap_fdatawrite() returns an error, this might be due to some I/O problem: dead disk, unplugged cable, etc. Given the generally crappy quality of the kernel's handling of such exceptions, there's a good chance that the filemap_fdatawait() will get stuck in D state forever. </quotation> So, this patch doesn't wait if filemap_fdatawrite() returns the -EIO. Trond, could you please review the nfs part? Especially I'm not sure, nfs must use the "filemap_fdatawrite(inode->i_mapping) == 0", or not. Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> --- fs/nfs/inode.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e7bd0d9..3e4ba9c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping) if (mapping->nrpages == 0) return 0; unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_fdatawrite(mapping); - if (ret != 0) - goto out; - ret = filemap_fdatawait(mapping); + ret = filemap_write_and_wait(mapping); if (ret != 0) goto out; ret = nfs_wb_all(mapping->host); @@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_begin_data_update(inode); /* Write all dirty data if we're changing file permissions or size */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); nfs_wb_all(inode); } /* -- cgit v1.1 From fc33a7bb9c6dd8f6e4a014976200f8fdabb3a45c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Mon, 9 Jan 2006 20:52:17 -0800 Subject: [PATCH] per-mountpoint noatime/nodiratime Turn noatime and nodiratime into per-mount instead of per-sb flags. After all the preparations this is a rather trivial patch. The mount code needs to treat the two options as per-mount instead of per-superblock, and touch_atime needs to be changed to check the new MNT_ flags in addition to the MS_ flags that are kept for filesystems that are always noatime/nodiratime but not user settable anymore. Besides that core code only nfs needed an update because it's leaving atime updates to the server and thus sets the S_NOATIME flag on every inode, but needs to know whether it's a real noatime mount for an getattr optimization. While we're at it I've killed the IS_NOATIME/IS_NODIRATIME macros that were only used by touch_atime. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> --- fs/nfs/inode.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3e4ba9c..a77ee95 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -950,11 +950,20 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); - if (__IS_FLG(inode, MS_NOATIME)) - need_atime = 0; - else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + + /* + * We may force a getattr if the user cares about atime. + * + * Note that we only have to check the vfsmount flags here: + * - NFS always sets S_NOATIME by so checking it would give a + * bogus result + * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * no point in checking those. + */ + if ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - /* We may force a getattr if the user cares about atime */ + if (need_atime) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else -- cgit v1.1