summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2005-11-30 18:09:02 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-01-06 14:58:49 -0500
commit40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch)
treeed4069423c3d6551035d5b6116f50452cdac4103 /fs
parent325cfed9ae901320e9234b18c21434b783dbe342 (diff)
downloadop-kernel-dev-40859d7ee64ed6bfad8a4e93f9bb5c1074afadff.zip
op-kernel-dev-40859d7ee64ed6bfad8a4e93f9bb5c1074afadff.tar.gz
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the wire. The Solaris NFS server allows up to a megabyte, for instance. Now the Linux NFS client supports transfer sizes up to 1MB, too. This will help reduce protocol and context switch overhead on read/write intensive NFS workloads, and support larger atomic read and write operations on servers that support them. Test-plan: Connectathon and iozone on mount point with wsize=rsize>32768 over TCP. Tests with NFS over UDP to verify the maximum RPC payload size cap. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfs/inode.c25
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c29
5 files changed, 40 insertions, 29 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f69d95a..fd7ac5e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
struct list_head *list;
struct nfs_direct_req *dreq;
unsigned int reads = 0;
+ unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
if (!dreq)
@@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
list = &dreq->list;
for(;;) {
- struct nfs_read_data *data = nfs_readdata_alloc();
+ struct nfs_read_data *data = nfs_readdata_alloc(rpages);
if (unlikely(!data)) {
while (!list_empty(list)) {
@@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode,
struct nfs_writeverf first_verf;
struct nfs_write_data *wdata;
- wdata = nfs_writedata_alloc();
+ wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!wdata)
return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4e6558d..acde2c5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize)
static inline unsigned long
nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
{
- if (bsize < 1024)
- bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
- else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
- bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+ if (bsize < NFS_MIN_FILE_IO_SIZE)
+ bsize = NFS_DEF_FILE_IO_SIZE;
+ else if (bsize >= NFS_MAX_FILE_IO_SIZE)
+ bsize = NFS_MAX_FILE_IO_SIZE;
return nfs_block_bits(bsize, nrbitsp);
}
@@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
if (server->rsize > max_rpc_payload)
server->rsize = max_rpc_payload;
- if (server->wsize > max_rpc_payload)
- server->wsize = max_rpc_payload;
-
+ if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+ server->rsize = NFS_MAX_FILE_IO_SIZE;
server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (server->rpages > NFS_READ_MAXIOV) {
- server->rpages = NFS_READ_MAXIOV;
- server->rsize = server->rpages << PAGE_CACHE_SHIFT;
- }
+ if (server->wsize > max_rpc_payload)
+ server->wsize = max_rpc_payload;
+ if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ server->wsize = NFS_MAX_FILE_IO_SIZE;
server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (server->wpages > NFS_WRITE_MAXIOV) {
- server->wpages = NFS_WRITE_MAXIOV;
- server->wsize = server->wpages << PAGE_CACHE_SHIFT;
- }
if (sb->s_blocksize == 0)
sb->s_blocksize = nfs_block_bits(server->wsize,
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 1b272a1..985cc53 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name)
nfs_port = -1;
nfs_data.version = NFS_MOUNT_VERSION;
nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */
- nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
- nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ nfs_data.rsize = NFS_DEF_FILE_IO_SIZE;
+ nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
nfs_data.acregmin = 3;
nfs_data.acregmax = 60;
nfs_data.acdirmin = 30;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2148624..05eb43f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result;
struct nfs_read_data *rdata;
- rdata = nfs_readdata_alloc();
+ rdata = nfs_readdata_alloc(1);
if (!rdata)
return -ENOMEM;
@@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
nbytes = req->wb_bytes;
for(;;) {
- data = nfs_readdata_alloc();
+ data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
INIT_LIST_HEAD(&data->pages);
@@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(head, inode);
- data = nfs_readdata_alloc();
+ data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
if (!data)
goto out_bad;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bc4ea..1ce0c20 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(void)
+static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
}
return p;
}
static inline void nfs_commit_free(struct nfs_write_data *p)
{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
@@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result, written = 0;
struct nfs_write_data *wdata;
- wdata = nfs_writedata_alloc();
+ wdata = nfs_writedata_alloc(1);
if (!wdata)
return -ENOMEM;
@@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
nbytes = req->wb_bytes;
for (;;) {
- data = nfs_writedata_alloc();
+ data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
list_add(&data->pages, &list);
@@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
return nfs_flush_multi(head, inode, how);
- data = nfs_writedata_alloc();
+ data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
* Commit dirty pages
*/
static int
-nfs_commit_list(struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
struct nfs_write_data *data;
struct nfs_page *req;
- data = nfs_commit_alloc();
+ data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how)
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(&head, how);
+ error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}
OpenPOWER on IntegriCloud