From 72cb77f4a5ace37b12dcb47a0e8637a2c28ad881 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:30 -0400 Subject: NFS: Throttle page dirtying while we're flushing to disk The following patch is a combination of a patch by myself and Peter Staubach. Trond: If we allow other processes to dirty pages while a process is doing a consistency sync to disk, we can end up never making progress. Peter: Attached is a patch which addresses a continuing problem with the NFS client generating out of order WRITE requests. While this is compliant with all of the current protocol specifications, there are servers in the market which can not handle out of order WRITE requests very well. Also, this may lead to sub-optimal block allocations in the underlying file system on the server. This may cause the read throughputs to be reduced when reading the file from the server. Peter: There has been a lot of work recently done to address out of order issues on a systemic level. However, the NFS client is still susceptible to the problem. Out of order WRITE requests can occur when pdflush is in the middle of writing out pages while the process dirtying the pages calls generic_file_buffered_write which calls generic_perform_write which calls balance_dirty_pages_rate_limited which ends up calling writeback_inodes which ends up calling back into the NFS client to writes out dirty pages for the same file that pdflush happens to be working with. Signed-off-by: Peter Staubach [modification by Trond to merge the two similar patches] Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b..404c19c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -354,6 +354,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); + /* + * Prevent starvation issues if someone is doing a consistency + * sync-to-disk + */ + ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + return ret; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; -- cgit v1.1 From e1ebfd33be068ec933f8954060a499bd22ad6f69 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:54 -0400 Subject: NFS: Kill the "defined but not used" compile error on nommu machines Bryan Wu reports that when compiling NFS on nommu machines he gets a "defined but not used" error on nfs_file_mmap(). The easiest fix is simply to get rid of the special casing in NFS, and just always call generic_file_mmap() to set up the file. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 404c19c..1eab9c9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = { .write = do_sync_write, .aio_read = nfs_file_read, .aio_write = nfs_file_write, -#ifdef CONFIG_MMU .mmap = nfs_file_mmap, -#else - .mmap = generic_file_mmap, -#endif .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, @@ -304,11 +300,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dprintk("NFS: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_mapping(inode, file->f_mapping); + /* Note: generic_file_mmap() returns ENOSYS on nommu systems + * so we call that before revalidating the mapping + */ + status = generic_file_mmap(file, vma); if (!status) { vma->vm_ops = &nfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - file_accessed(file); + status = nfs_revalidate_mapping(inode, file->f_mapping); } return status; } -- cgit v1.1 From 7fe5c398fc2186ed586db11106a6692d871d0d58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Mar 2009 15:35:50 -0400 Subject: NFS: Optimise NFS close() Close-to-open cache consistency rules really only require us to flush out writes on calls to close(), and require us to revalidate attributes on the very last close of the file. Currently we appear to be doing a lot of extra attribute revalidation and cache flushes. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1eab9c9..d451073 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp) dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure that dirty pages are flushed out with the right creds */ - if (filp->f_mode & FMODE_WRITE) - nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } @@ -231,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id) struct nfs_open_context *ctx = nfs_file_open_context(file); struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - int status; dprintk("NFS: flush(%s/%s)\n", dentry->d_parent->d_name.name, @@ -241,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_do_fsync(ctx, inode); - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - return status; + /* Flush writes to the server and return any errors */ + return nfs_do_fsync(ctx, inode); } static ssize_t -- cgit v1.1