From ddeff520f02b92128132c282c350fa72afffb84a Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Wed, 9 Aug 2006 13:53:47 -0400 Subject: NFS: Fix a potential deadlock in nfs_release_page nfs_wb_page() waits on request completion and, as a result, is not safe to be called from nfs_release_page() invoked by VM scanner as part of GFP_NOFS allocation. Fix possible deadlock by analyzing gfp mask and refusing to release page if __GFP_FS is not set. Signed-off-by: Nikita Danilov Signed-off-by: Trond Myklebust (cherry picked from 374d969debfb290bafcb41d28918dc6f7e43ce31 commit) --- fs/nfs/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc2b874..48e8928 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { - return !nfs_wb_page(page->mapping->host, page); + if (gfp & __GFP_FS) + return !nfs_wb_page(page->mapping->host, page); + else + /* + * Avoid deadlock on nfs_wait_on_request(). + */ + return 0; } const struct address_space_operations nfs_file_aops = { -- cgit v1.1 From a634904a7de0d3a0bc606f608007a34e8c05bfee Mon Sep 17 00:00:00 2001 From: ASANO Masahiro Date: Tue, 22 Aug 2006 20:06:02 -0400 Subject: VFS: add lookup hint for network file systems I'm trying to speeding up mkdir(2) for network file systems. A typical mkdir(2) calls two inode_operations: lookup and mkdir. The lookup operation would fail with ENOENT in common case. I think it is unnecessary because the subsequent mkdir operation can check it. In case of creat(2), lookup operation is called with the LOOKUP_CREATE flag, so individual filesystem can omit real lookup. e.g. nfs_lookup(). Here is a sample patch which uses LOOKUP_CREATE and O_EXCL on mkdir, symlink and mknod. This uses the gadget for creat(2). And here is the result of a benchmark on NFSv3. mkdir(2) 10,000 times: original 50.5 sec patched 29.0 sec Signed-off-by: ASANO Masahiro Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust (cherry picked from fab7bf44449b29f9d5572a5dd8adcf7c91d5bf0f commit) --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 55a1312..8631664 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1767,6 +1767,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; + nd->flags |= LOOKUP_CREATE; + nd->intent.open.flags = O_EXCL; /* * Do the final lookup. -- cgit v1.1 From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:11:48 -0700 Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path Signe-off-by: Trond Myklebust (cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit) --- fs/nfs/idmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b81e7ed..df0be12 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -130,9 +130,8 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; + rpc_unlink(idmap->idmap_dentry); dput(idmap->idmap_dentry); - idmap->idmap_dentry = NULL; - rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); } -- cgit v1.1 From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Aug 2006 13:11:15 -0400 Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs rpc_unlink() and rpc_rmdir() will dput the dentry reference for you. Signed-off-by: Trond Myklebust (cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit) --- fs/nfs/idmap.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index df0be12..07a5dd5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -131,7 +131,6 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; rpc_unlink(idmap->idmap_dentry); - dput(idmap->idmap_dentry); clp->cl_idmap = NULL; kfree(idmap); } -- cgit v1.1 From 01df9c5e918ae5559f2d96da0143f8bfbb9e6171 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Aug 2006 11:58:57 -0400 Subject: LOCKD: Fix a deadlock in nlm_traverse_files() nlm_traverse_files() is not allowed to hold the nlm_file_mutex while calling nlm_inspect file, since it may end up calling nlm_release_file() when releaseing the blocks. Signed-off-by: Trond Myklebust (cherry picked from e558d3cde986e04f68afe8c790ad68ef4b94587a commit) --- fs/lockd/svcsubs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 2a4df9b..01b4db9e 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -237,19 +237,22 @@ static int nlm_traverse_files(struct nlm_host *host, int action) { struct nlm_file *file, **fp; - int i; + int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { fp = nlm_files + i; while ((file = *fp) != NULL) { + file->f_count++; + mutex_unlock(&nlm_file_mutex); + /* Traverse locks, blocks and shares of this file * and update file->f_locks count */ - if (nlm_inspect_file(host, file, action)) { - mutex_unlock(&nlm_file_mutex); - return 1; - } + if (nlm_inspect_file(host, file, action)) + ret = 1; + mutex_lock(&nlm_file_mutex); + file->f_count--; /* No more references to this file. Let go of it. */ if (!file->f_blocks && !file->f_locks && !file->f_shares && !file->f_count) { @@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action) } } mutex_unlock(&nlm_file_mutex); - return 0; + return ret; } /* -- cgit v1.1 From 79558f3610efd7928e8882b2eaca3093b283630e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 13:44:32 -0400 Subject: NFS: Fix issue with EIO on NFS read The problem is that we may be caching writes that would extend the file and create a hole in the region that we are reading. In this case, we need to detect the eof from the server, ensure that we zero out the pages that are part of the hole and mark them as up to date. Signed-off-by: Trond Myklebust (cherry picked from 856b603b01b99146918c093969b6cb1b1b0f1c01 commit) --- fs/nfs/read.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 65c0c5b..da9cf11 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; base &= ~PAGE_CACHE_MASK; pglen = PAGE_CACHE_SIZE - base; - if (pglen < remainder) + for (;;) { + if (remainder <= pglen) { + memclear_highpage_flush(*pages, base, remainder); + break; + } memclear_highpage_flush(*pages, base, pglen); - else - memclear_highpage_flush(*pages, base, remainder); + pages++; + remainder -= pglen; + pglen = PAGE_CACHE_SIZE; + base = 0; + } } /* @@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) unsigned int base = data->args.pgbase; struct page **pages; + if (data->res.eof) + count = data->args.count; if (unlikely(count == 0)) return; pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; @@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageUptodate(*pages); - /* - * Was this an eof or a short read? If the latter, don't mark the page - * as uptodate yet. - */ - if (count > 0 && (data->res.eof || data->args.count == data->res.count)) + if (count != 0) SetPageUptodate(*pages); } @@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageError(*pages); + if (count != 0) + SetPageError(*pages); } /* -- cgit v1.1 From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Aug 2006 15:44:19 -0400 Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode Check the bounds of length specifiers more thoroughly in the XDR decoding of NFS4 readdir reply data. Currently, if the server returns a bitmap or attr length that causes the current decode point pointer to wrap, this could go undetected (consider a small "negative" length on a 32-bit machine). Also add a check into the main XDR decode handler to make sure that the amount of data is a multiple of four bytes (as specified by RFC-1014). This makes sure that we can do u32* pointer subtraction in the NFS client without risking an undefined result (the result is undefined if the pointers are not correctly aligned with respect to one another). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust (cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit) --- fs/nfs/nfs4xdr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1750d99..730ec8f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n struct kvec *iov = rcvbuf->head; unsigned int nr, pglen = rcvbuf->page_len; uint32_t *end, *entry, *p, *kaddr; - uint32_t len, attrlen; + uint32_t len, attrlen, xlen; int hdrlen, recvd, status; status = decode_op_hdr(xdr, OP_READDIR); @@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); - end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); + end = p + ((pglen + readdir->pgbase) >> 2); entry = p; for (nr = 0; *p++; nr++) { - if (p + 3 > end) + if (end - p < 3) goto short_pkt; dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ @@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } - dprintk("filename = %*s\n", len, (char *)p); - p += XDR_QUADLEN(len); - if (p + 1 > end) + xlen = XDR_QUADLEN(len); + if (end - p < xlen + 1) goto short_pkt; + dprintk("filename = %*s\n", len, (char *)p); + p += xlen; len = ntohl(*p++); /* bitmap length */ - p += len; - if (p + 1 > end) + if (end - p < len + 1) goto short_pkt; + p += len; attrlen = XDR_QUADLEN(ntohl(*p++)); - p += attrlen; /* attributes */ - if (p + 2 > end) + if (end - p < attrlen + 2) goto short_pkt; + p += attrlen; /* attributes */ entry = p; } if (!nr && (entry[0] != 0 || entry[1] == 0)) -- cgit v1.1 From 16b4289c7460ba9c04af40c574949dcca9029658 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 12:27:15 -0400 Subject: NFSv4: Add v4 exception handling for the ACL functions. This is needed in order to handle any NFS4ERR_DELAY errors that might be returned by the server. It also ensures that we map the NFSv4 errors before they are returned to userland. Signed-off-by: Trond Myklebust (cherry picked from 71c12b3f0abc7501f6ed231a6d17bc9c05a238dc commit) --- fs/nfs/nfs4proc.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6ee97f..153898e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2668,7 +2668,7 @@ out: nfs4_set_cached_acl(inode, acl); } -static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_getaclargs args = { @@ -2721,6 +2721,19 @@ out_free: return ret; } +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t ret; + do { + ret = __nfs4_get_acl_uncached(inode, buf, buflen); + if (ret >= 0) + break; + ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); + } while (exception.retry); + return ret; +} + static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); @@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) return nfs4_get_acl_uncached(inode, buf, buflen); } -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; @@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen return ret; } +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + __nfs4_proc_set_acl(inode, buf, buflen), + &exception); + } while (exception.retry); + return err; +} + static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { -- cgit v1.1 From a343bb7750e6a098909c34f5c5dfddbc4fa40053 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:03 -0400 Subject: VFS: Fix access("file", X_OK) in the presence of ACLs Currently, the access() call will return incorrect information on NFS if there exists an ACL that grants execute access to the user on a regular file. The reason the information is incorrect is that the VFS overrides this execute access in open_exec() by checking (inode->i_mode & 0111). This patch propagates the VFS execute bit check back into the generic permission() call. Signed-off-by: Trond Myklebust (cherry picked from 64cbae98848c4c99851cb0a405f0b4982cd76c1e commit) --- fs/namei.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 8631664..432d6bc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask, int permission(struct inode *inode, int mask, struct nameidata *nd) { + umode_t mode = inode->i_mode; int retval, submask; if (mask & MAY_WRITE) { - umode_t mode = inode->i_mode; /* * Nobody gets write access to a read-only fs. @@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) } + /* + * MAY_EXEC on regular files requires special handling: We override + * filesystem execute permissions if the mode bits aren't set. + */ + if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) + return -EACCES; + /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) -- cgit v1.1 From 9167b0b9a0ab7907191523f5a0528e3b9c288e21 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:03 -0400 Subject: VFS: Remove redundant open-coded mode bit check in prepare_binfmt(). The check in prepare_binfmt() for inode->i_mode & 0111 is redundant, since open_exec() will already have done that. Signed-off-by: Trond Myklebust (cherry picked from 822dec482ced07af32c378cd936d77345786572b commit) --- fs/exec.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 8344ba7..a6f64a9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -922,12 +922,6 @@ int prepare_binprm(struct linux_binprm *bprm) int retval; mode = inode->i_mode; - /* - * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, - * generic_permission lets a non-executable through - */ - if (!(mode & 0111)) /* with at least _one_ execute bit set */ - return -EACCES; if (bprm->file->f_op == NULL) return -EACCES; -- cgit v1.1 From a969fd5a4e162c4485ae8f3e49d674656a18fa36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:04 -0400 Subject: VFS: Remove redundant open-coded mode bit checks in open_exec(). The check in open_exec() for inode->i_mode & 0111 has been made redundant by the fix to permission(). Signed-off-by: Trond Myklebust (cherry picked from 1d3741c5d991686699f100b65b9956f7ee7ae0ae commit) --- fs/exec.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index a6f64a9..f7aabfe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -486,8 +486,6 @@ struct file *open_exec(const char *name) if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { int err = vfs_permission(&nd, MAY_EXEC); - if (!err && !(inode->i_mode & 0111)) - err = -EACCES; file = ERR_PTR(err); if (!err) { file = nameidata_to_filp(&nd, O_RDONLY); -- cgit v1.1