diff options
Diffstat (limited to 'fs/nfs/dir.c')
-rw-r--r-- | fs/nfs/dir.c | 1015 |
1 files changed, 618 insertions, 397 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e257172..07ac384 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,11 +33,12 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/sched.h> +#include <linux/vmalloc.h> -#include "nfs4_fs.h" #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" /* #define NFS_DEBUG_VERBOSE 1 */ @@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); +static int nfs_readdir_clear_array(struct page*, gfp_t); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +const struct address_space_operations nfs_dir_addr_space_ops = { + .releasepage = nfs_readdir_clear_array, +}; + #ifdef CONFIG_NFS_V3 const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, @@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_create, + .create = nfs_open_create, .lookup = nfs_atomic_lookup, .link = nfs_link, .unlink = nfs_unlink, @@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp) return res; } -typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); +struct nfs_cache_array_entry { + u64 cookie; + u64 ino; + struct qstr string; +}; + +struct nfs_cache_array { + unsigned int size; + int eof_index; + u64 last_cookie; + struct nfs_cache_array_entry array[0]; +}; + +#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) + +typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); typedef struct { struct file *file; struct page *page; unsigned long page_index; - __be32 *ptr; u64 *dir_cookie; loff_t current_index; - struct nfs_entry *entry; decode_dirent_t decode; - int plus; + unsigned long timestamp; unsigned long gencount; - int timestamp_valid; + unsigned int cache_entry_index; + unsigned int plus:1; + unsigned int eof:1; } nfs_readdir_descriptor_t; -/* Now we cache directories properly, by stuffing the dirent - * data directly in the page cache. - * - * Inode invalidation due to refresh etc. takes care of - * _everything_, no sloppy entry flushing logic, no extraneous - * copying, network direct to page cache, the way it was meant - * to be. - * - * NOTE: Dirent information verification is done always by the - * page-in of the RPC reply, nowhere else, this simplies - * things substantially. +/* + * The caller is responsible for calling nfs_readdir_release_array(page) */ static -int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) +struct nfs_cache_array *nfs_readdir_get_array(struct page *page) +{ + if (page == NULL) + return ERR_PTR(-EIO); + return (struct nfs_cache_array *)kmap(page); +} + +static +void nfs_readdir_release_array(struct page *page) +{ + kunmap(page); +} + +/* + * we are freeing strings created by nfs_add_to_readdir_array() + */ +static +int nfs_readdir_clear_array(struct page *page, gfp_t mask) +{ + struct nfs_cache_array *array = nfs_readdir_get_array(page); + int i; + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); + nfs_readdir_release_array(page); + return 0; +} + +/* + * the caller is responsible for freeing qstr.name + * when called by nfs_readdir_add_to_array, the strings will be freed in + * nfs_clear_readdir_array() + */ +static +int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) +{ + string->len = len; + string->name = kmemdup(name, len, GFP_KERNEL); + if (string->name == NULL) + return -ENOMEM; + string->hash = full_name_hash(name, len); + return 0; +} + +static +int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) +{ + struct nfs_cache_array *array = nfs_readdir_get_array(page); + struct nfs_cache_array_entry *cache_entry; + int ret; + + if (IS_ERR(array)) + return PTR_ERR(array); + ret = -EIO; + if (array->size >= MAX_READDIR_ARRAY) + goto out; + + cache_entry = &array->array[array->size]; + cache_entry->cookie = entry->prev_cookie; + cache_entry->ino = entry->ino; + ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); + if (ret) + goto out; + array->last_cookie = entry->cookie; + if (entry->eof == 1) + array->eof_index = array->size; + array->size++; +out: + nfs_readdir_release_array(page); + return ret; +} + +static +int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +{ + loff_t diff = desc->file->f_pos - desc->current_index; + unsigned int index; + + if (diff < 0) + goto out_eof; + if (diff >= array->size) { + if (array->eof_index > 0) + goto out_eof; + desc->current_index += array->size; + return -EAGAIN; + } + + index = (unsigned int)diff; + *desc->dir_cookie = array->array[index].cookie; + desc->cache_entry_index = index; + if (index == array->eof_index) + desc->eof = 1; + return 0; +out_eof: + desc->eof = 1; + return -EBADCOOKIE; +} + +static +int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +{ + int i; + int status = -EAGAIN; + + for (i = 0; i < array->size; i++) { + if (i == array->eof_index) { + desc->eof = 1; + status = -EBADCOOKIE; + } + if (array->array[i].cookie == *desc->dir_cookie) { + desc->cache_entry_index = i; + status = 0; + break; + } + } + + return status; +} + +static +int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) +{ + struct nfs_cache_array *array; + int status = -EBADCOOKIE; + + if (desc->dir_cookie == NULL) + goto out; + + array = nfs_readdir_get_array(desc->page); + if (IS_ERR(array)) { + status = PTR_ERR(array); + goto out; + } + + if (*desc->dir_cookie == 0) + status = nfs_readdir_search_for_pos(array, desc); + else + status = nfs_readdir_search_for_cookie(array, desc); + + nfs_readdir_release_array(desc->page); +out: + return status; +} + +/* Fill a page with xdr information before transferring to the cache page */ +static +int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, + struct nfs_entry *entry, struct file *file, struct inode *inode) { - struct file *file = desc->file; - struct inode *inode = file->f_path.dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); unsigned long timestamp, gencount; int error; - dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", - __func__, (long long)desc->entry->cookie, - page->index); - again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, + error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ @@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) } desc->timestamp = timestamp; desc->gencount = gencount; - desc->timestamp_valid = 1; - SetPageUptodate(page); - /* Ensure consistent page alignment of the data. - * Note: assumes we have exclusive access to this mapping either - * through inode->i_mutex or some other mechanism. - */ - if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { - /* Should never happen */ - nfs_zap_mapping(inode, inode->i_mapping); - } - unlock_page(page); - return 0; - error: - unlock_page(page); - return -EIO; +error: + return error; } -static inline -int dir_decode(nfs_readdir_descriptor_t *desc) +/* Fill in an entry based on the xdr code stored in desc->page */ +static +int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream) { - __be32 *p = desc->ptr; - p = desc->decode(p, desc->entry, desc->plus); + __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus); if (IS_ERR(p)) return PTR_ERR(p); - desc->ptr = p; - if (desc->timestamp_valid) { - desc->entry->fattr->time_start = desc->timestamp; - desc->entry->fattr->gencount = desc->gencount; - } else - desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; + + entry->fattr->time_start = desc->timestamp; + entry->fattr->gencount = desc->gencount; return 0; } -static inline -void dir_page_release(nfs_readdir_descriptor_t *desc) +static +int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { - kunmap(desc->page); - page_cache_release(desc->page); - desc->page = NULL; - desc->ptr = NULL; + struct nfs_inode *node; + if (dentry->d_inode == NULL) + goto different; + node = NFS_I(dentry->d_inode); + if (node->fh.size != entry->fh->size) + goto different; + if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0) + goto different; + return 1; +different: + return 0; } -/* - * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry with cookie '*desc->dir_cookie'. - * - * If the end of the buffer has been reached, return -EAGAIN, if not, - * return the offset within the buffer of the next entry to be - * read. - */ -static inline -int find_dirent(nfs_readdir_descriptor_t *desc) +static +void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { - struct nfs_entry *entry = desc->entry; - int loop_count = 0, - status; + struct qstr filename = { + .len = entry->len, + .name = entry->name, + }; + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct inode *inode; - while((status = dir_decode(desc)) == 0) { - dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", - __func__, (unsigned long long)entry->cookie); - if (entry->prev_cookie == *desc->dir_cookie) - break; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); + if (filename.name[0] == '.') { + if (filename.len == 1) + return; + if (filename.len == 2 && filename.name[1] == '.') + return; + } + filename.hash = full_name_hash(filename.name, filename.len); + + dentry = d_lookup(parent, &filename); + if (dentry != NULL) { + if (nfs_same_file(dentry, entry)) { + nfs_refresh_inode(dentry->d_inode, entry->fattr); + goto out; + } else { + d_drop(dentry); + dput(dentry); } } - return status; + + dentry = d_alloc(parent, &filename); + if (dentry == NULL) + return; + + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + if (IS_ERR(inode)) + goto out; + + alias = d_materialise_unique(dentry, inode); + if (IS_ERR(alias)) + goto out; + else if (alias) { + nfs_set_verifier(alias, nfs_save_change_attribute(dir)); + dput(alias); + } else + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + +out: + dput(dentry); +} + +/* Perform conversion from xdr to cache array */ +static +void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, + void *xdr_page, struct page *page, unsigned int buflen) +{ + struct xdr_stream stream; + struct xdr_buf buf; + __be32 *ptr = xdr_page; + int status; + struct nfs_cache_array *array; + + buf.head->iov_base = xdr_page; + buf.head->iov_len = buflen; + buf.tail->iov_len = 0; + buf.page_base = 0; + buf.page_len = 0; + buf.buflen = buf.head->iov_len; + buf.len = buf.head->iov_len; + + xdr_init_decode(&stream, &buf, ptr); + + + do { + status = xdr_decode(desc, entry, &stream); + if (status != 0) + break; + + if (nfs_readdir_add_to_array(entry, page) == -1) + break; + if (desc->plus == 1) + nfs_prime_dcache(desc->file->f_path.dentry, entry); + } while (!entry->eof); + + if (status == -EBADCOOKIE && entry->eof) { + array = nfs_readdir_get_array(page); + array->eof_index = array->size - 1; + status = 0; + nfs_readdir_release_array(page); + } +} + +static +void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages) +{ + unsigned int i; + for (i = 0; i < npages; i++) + put_page(pages[i]); +} + +static +void nfs_readdir_free_large_page(void *ptr, struct page **pages, + unsigned int npages) +{ + vm_unmap_ram(ptr, npages); + nfs_readdir_free_pagearray(pages, npages); } /* - * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the entry at offset 'desc->file->f_pos'. - * - * If the end of the buffer has been reached, return -EAGAIN, if not, - * return the offset within the buffer of the next entry to be - * read. + * nfs_readdir_large_page will allocate pages that must be freed with a call + * to nfs_readdir_free_large_page */ -static inline -int find_dirent_index(nfs_readdir_descriptor_t *desc) +static +void *nfs_readdir_large_page(struct page **pages, unsigned int npages) { - struct nfs_entry *entry = desc->entry; - int loop_count = 0, - status; + void *ptr; + unsigned int i; + + for (i = 0; i < npages; i++) { + struct page *page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out_freepages; + pages[i] = page; + } - for(;;) { - status = dir_decode(desc); - if (status) - break; + ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL); + if (!IS_ERR_OR_NULL(ptr)) + return ptr; +out_freepages: + nfs_readdir_free_pagearray(pages, i); + return NULL; +} + +static +int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) +{ + struct page *pages[NFS_MAX_READDIR_PAGES]; + void *pages_ptr = NULL; + struct nfs_entry entry; + struct file *file = desc->file; + struct nfs_cache_array *array; + int status = 0; + unsigned int array_size = ARRAY_SIZE(pages); + + entry.prev_cookie = 0; + entry.cookie = *desc->dir_cookie; + entry.eof = 0; + entry.fh = nfs_alloc_fhandle(); + entry.fattr = nfs_alloc_fattr(); + if (entry.fh == NULL || entry.fattr == NULL) + goto out; - dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", - (unsigned long long)entry->cookie, desc->current_index); + array = nfs_readdir_get_array(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; - if (desc->file->f_pos == desc->current_index) { - *desc->dir_cookie = entry->cookie; + pages_ptr = nfs_readdir_large_page(pages, array_size); + if (!pages_ptr) + goto out_release_array; + do { + status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); + + if (status < 0) break; - } - desc->current_index++; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } - } + nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); + } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); + + nfs_readdir_free_large_page(pages_ptr, pages, array_size); +out_release_array: + nfs_readdir_release_array(page); +out: + nfs_free_fattr(entry.fattr); + nfs_free_fhandle(entry.fh); return status; } /* - * Find the given page, and call find_dirent() or find_dirent_index in - * order to try to return the next entry. + * Now we cache directories properly, by converting xdr information + * to an array that can be used for lookups later. This results in + * fewer cache pages, since we can store more information on each page. + * We only need to convert from xdr once so future lookups are much simpler */ -static inline -int find_dirent_page(nfs_readdir_descriptor_t *desc) +static +int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) { struct inode *inode = desc->file->f_path.dentry->d_inode; - struct page *page; - int status; - dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", - __func__, desc->page_index, - (long long) *desc->dir_cookie); + if (nfs_readdir_xdr_to_array(desc, page, inode) < 0) + goto error; + SetPageUptodate(page); - /* If we find the page in the page_cache, we cannot be sure - * how fresh the data is, so we will ignore readdir_plus attributes. - */ - desc->timestamp_valid = 0; - page = read_cache_page(inode->i_mapping, desc->page_index, - (filler_t *)nfs_readdir_filler, desc); - if (IS_ERR(page)) { - status = PTR_ERR(page); - goto out; + if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { + /* Should never happen */ + nfs_zap_mapping(inode, inode->i_mapping); } + unlock_page(page); + return 0; + error: + unlock_page(page); + return -EIO; +} - /* NOTE: Someone else may have changed the READDIRPLUS flag */ - desc->page = page; - desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (*desc->dir_cookie != 0) - status = find_dirent(desc); - else - status = find_dirent_index(desc); - if (status < 0) - dir_page_release(desc); - out: - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); - return status; +static +void cache_page_release(nfs_readdir_descriptor_t *desc) +{ + page_cache_release(desc->page); + desc->page = NULL; +} + +static +struct page *get_cache_page(nfs_readdir_descriptor_t *desc) +{ + struct page *page; + page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, + desc->page_index, (filler_t *)nfs_readdir_filler, desc); + if (IS_ERR(page)) + desc->eof = 1; + return page; } /* - * Recurse through the page cache pages, and return a - * filled nfs_entry structure of the next directory entry if possible. - * - * The target for the search is '*desc->dir_cookie' if non-0, - * 'desc->file->f_pos' otherwise + * Returns 0 if desc->dir_cookie was found on page desc->page_index */ +static +int find_cache_page(nfs_readdir_descriptor_t *desc) +{ + int res; + + desc->page = get_cache_page(desc); + if (IS_ERR(desc->page)) + return PTR_ERR(desc->page); + + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + cache_page_release(desc); + return res; +} + +/* Search for desc->dir_cookie from the beginning of the page cache */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) { - int loop_count = 0; - int res; - - /* Always search-by-index from the beginning of the cache */ - if (*desc->dir_cookie == 0) { - dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", - (long long)desc->file->f_pos); - desc->page_index = 0; - desc->entry->cookie = desc->entry->prev_cookie = 0; - desc->entry->eof = 0; - desc->current_index = 0; - } else - dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", - (unsigned long long)*desc->dir_cookie); + int res = -EAGAIN; - for (;;) { - res = find_dirent_page(desc); + while (1) { + res = find_cache_page(desc); if (res != -EAGAIN) break; - /* Align to beginning of next page */ - desc->page_index ++; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } + desc->page_index++; } - - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res); return res; } @@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode) return (inode->i_mode >> 12) & 15; } -static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); - /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, filldir_t filldir) { struct file *file = desc->file; - struct nfs_entry *entry = desc->entry; - struct dentry *dentry = NULL; - u64 fileid; - int loop_count = 0, - res; - - dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", - (unsigned long long)entry->cookie); - - for(;;) { - unsigned d_type = DT_UNKNOWN; - /* Note: entry->prev_cookie contains the cookie for - * retrieving the current dirent on the server */ - fileid = entry->ino; - - /* Get a dentry if we have one */ - if (dentry != NULL) - dput(dentry); - dentry = nfs_readdir_lookup(desc); + int i = 0; + int res = 0; + struct nfs_cache_array *array = NULL; + unsigned int d_type = DT_UNKNOWN; + struct dentry *dentry = NULL; - /* Use readdirplus info */ - if (dentry != NULL && dentry->d_inode != NULL) { - d_type = dt_type(dentry->d_inode); - fileid = NFS_FILEID(dentry->d_inode); - } + array = nfs_readdir_get_array(desc->page); - res = filldir(dirent, entry->name, entry->len, - file->f_pos, nfs_compat_user_ino64(fileid), - d_type); + for (i = desc->cache_entry_index; i < array->size; i++) { + d_type = DT_UNKNOWN; + + res = filldir(dirent, array->array[i].string.name, + array->array[i].string.len, file->f_pos, + nfs_compat_user_ino64(array->array[i].ino), d_type); if (res < 0) break; file->f_pos++; - *desc->dir_cookie = entry->cookie; - if (dir_decode(desc) != 0) { - desc->page_index ++; + desc->cache_entry_index = i; + if (i < (array->size-1)) + *desc->dir_cookie = array->array[i+1].cookie; + else + *desc->dir_cookie = array->last_cookie; + if (i == array->eof_index) { + desc->eof = 1; break; } - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } } - dir_page_release(desc); + + nfs_readdir_release_array(desc->page); + cache_page_release(desc); if (dentry != NULL) dput(dentry); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", @@ -476,12 +716,9 @@ static inline int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, filldir_t filldir) { - struct file *file = desc->file; - struct inode *inode = file->f_path.dentry->d_inode; - struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; - unsigned long timestamp, gencount; + struct inode *inode = desc->file->f_path.dentry->d_inode; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, status = -ENOMEM; goto out; } - timestamp = jiffies; - gencount = nfs_inc_attr_generation_counter(); - status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, - *desc->dir_cookie, page, - NFS_SERVER(inode)->dtsize, - desc->plus); - desc->page = page; - desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (status >= 0) { - desc->timestamp = timestamp; - desc->gencount = gencount; - desc->timestamp_valid = 1; - if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = *desc->dir_cookie; - } else + + if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) { status = -EIO; - if (status < 0) goto out_release; + } + desc->page_index = 0; + desc->page = page; status = nfs_do_filldir(desc, dirent, filldir); - /* Reset read descriptor so it searches the page cache from - * the start upon the next call to readdir_search_pagecache() */ - desc->page_index = 0; - desc->entry->cookie = desc->entry->prev_cookie = 0; - desc->entry->eof = 0; out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; out_release: - dir_page_release(desc); + cache_page_release(desc); goto out; } @@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; - struct nfs_entry my_entry; int res = -ENOMEM; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", @@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); - my_entry.cookie = my_entry.prev_cookie = 0; - my_entry.eof = 0; - my_entry.fh = nfs_alloc_fhandle(); - my_entry.fattr = nfs_alloc_fattr(); - if (my_entry.fh == NULL || my_entry.fattr == NULL) - goto out_alloc_failed; - - desc->entry = &my_entry; - nfs_block_sillyrename(dentry); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; - while(!desc->entry->eof) { + while (desc->eof != 1) { res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { + if (*desc->dir_cookie && desc->eof == 0) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (res == -ETOOSMALL && desc->plus) { clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); nfs_zap_caches(inode); + desc->page_index = 0; desc->plus = 0; - desc->entry->eof = 0; + desc->eof = 0; continue; } if (res < 0) @@ -605,9 +817,6 @@ out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; -out_alloc_failed: - nfs_free_fattr(my_entry.fattr); - nfs_free_fhandle(my_entry.fh); dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, res); @@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd) return 1; } +static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct nfs_open_context *ctx; + struct rpc_cred *cred; + fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return ERR_CAST(cred); + ctx = alloc_nfs_open_context(&path, cred, fmode); + put_rpccred(cred); + if (ctx == NULL) + return ERR_PTR(-ENOMEM); + return ctx; +} + +static int do_open(struct inode *inode, struct file *filp) +{ + nfs_fscache_set_inode_cookie(inode, filp); + return 0; +} + +static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +{ + struct file *filp; + int ret = 0; + + /* If the open_intent is for execute, we have an extra check to make */ + if (ctx->mode & FMODE_EXEC) { + ret = nfs_may_open(ctx->path.dentry->d_inode, + ctx->cred, + nd->intent.open.flags); + if (ret < 0) + goto out; + } + filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); + if (IS_ERR(filp)) + ret = PTR_ERR(filp); + else + nfs_file_set_open_context(filp, ctx); +out: + put_nfs_open_context(ctx); + return ret; +} + static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct nfs_open_context *ctx; + struct iattr attr; struct dentry *res = NULL; - int error; + struct inode *inode; + int open_flags; + int err; dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto out; } + ctx = nameidata_to_nfs_open_context(dentry, nd); + res = ERR_CAST(ctx); + if (IS_ERR(ctx)) + goto out; + + open_flags = nd->intent.open.flags; + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current_umask(); + } else { + open_flags &= ~(O_EXCL | O_CREAT); + attr.ia_valid = 0; + } + /* Open the file on the server */ - res = nfs4_atomic_open(dir, dentry, nd); - if (IS_ERR(res)) { - error = PTR_ERR(res); - switch (error) { + nfs_block_sillyrename(dentry->d_parent); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + if (IS_ERR(inode)) { + nfs_unblock_sillyrename(dentry->d_parent); + put_nfs_open_context(ctx); + switch (PTR_ERR(inode)) { /* Make a negative dentry */ case -ENOENT: + d_add(dentry, NULL); res = NULL; goto out; /* This turned out not to be a regular file */ @@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto no_open; /* case -EINVAL: */ default: + res = ERR_CAST(inode); goto out; } - } else if (res != NULL) + } + res = d_add_unique(dentry, inode); + nfs_unblock_sillyrename(dentry->d_parent); + if (res != NULL) { + dput(ctx->path.dentry); + ctx->path.dentry = dget(res); dentry = res; + } + err = nfs_intent_set_file(nd, ctx); + if (err < 0) { + if (res != NULL) + dput(res); + return ERR_PTR(err); + } out: + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return res; no_open: return nfs_lookup(dir, dentry, nd); @@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode = dentry->d_inode; struct inode *dir; + struct nfs_open_context *ctx; int openflags, ret = 0; if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; + parent = dget_parent(dentry); dir = parent->d_inode; + /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ @@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); + ctx = nameidata_to_nfs_open_context(dentry, nd); + ret = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; /* * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - ret = nfs4_open_revalidate(dir, dentry, openflags, nd); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + switch (ret) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + ret = nfs_intent_set_file(nd, ctx); + if (ret >= 0) + ret = 1; out: dput(parent); - if (!ret) - d_drop(dentry); return ret; +out_drop: + d_drop(dentry); + ret = 0; +out_put_ctx: + put_nfs_open_context(ctx); + goto out; + no_open_dput: dput(parent); no_open: return nfs_lookup_revalidate(dentry, nd); } -#endif /* CONFIG_NFSV4 */ -static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { - struct dentry *parent = desc->file->f_path.dentry; - struct inode *dir = parent->d_inode; - struct nfs_entry *entry = desc->entry; - struct dentry *dentry, *alias; - struct qstr name = { - .name = entry->name, - .len = entry->len, - }; - struct inode *inode; - unsigned long verf = nfs_save_change_attribute(dir); + struct nfs_open_context *ctx = NULL; + struct iattr attr; + int error; + int open_flags = 0; - switch (name.len) { - case 2: - if (name.name[0] == '.' && name.name[1] == '.') - return dget_parent(parent); - break; - case 1: - if (name.name[0] == '.') - return dget(parent); - } + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dir->i_lock); - if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { - spin_unlock(&dir->i_lock); - return NULL; - } - spin_unlock(&dir->i_lock); + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; - name.hash = full_name_hash(name.name, name.len); - dentry = d_lookup(parent, &name); - if (dentry != NULL) { - /* Is this a positive dentry that matches the readdir info? */ - if (dentry->d_inode != NULL && - (NFS_FILEID(dentry->d_inode) == entry->ino || - d_mountpoint(dentry))) { - if (!desc->plus || entry->fh->size == 0) - return dentry; - if (nfs_compare_fh(NFS_FH(dentry->d_inode), - entry->fh) == 0) - goto out_renew; - } - /* No, so d_drop to allow one to be created */ - d_drop(dentry); - dput(dentry); - } - if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) - return NULL; - if (name.len > NFS_SERVER(dir)->namelen) - return NULL; - /* Note: caller is already holding the dir->i_mutex! */ - dentry = d_alloc(parent, &name); - if (dentry == NULL) - return NULL; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); - if (IS_ERR(inode)) { - dput(dentry); - return NULL; - } + if ((nd->flags & LOOKUP_CREATE) != 0) { + open_flags = nd->intent.open.flags; - alias = d_materialise_unique(dentry, inode); - if (alias != NULL) { - dput(dentry); - if (IS_ERR(alias)) - return NULL; - dentry = alias; + ctx = nameidata_to_nfs_open_context(dentry, nd); + error = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err_drop; } -out_renew: - nfs_set_verifier(dentry, verf); - return dentry; + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); + if (error != 0) + goto out_put_ctx; + if (ctx != NULL) { + error = nfs_intent_set_file(nd, ctx); + if (error < 0) + goto out_err; + } + return 0; +out_put_ctx: + if (ctx != NULL) + put_nfs_open_context(ctx); +out_err_drop: + d_drop(dentry); +out_err: + return error; } +#endif /* CONFIG_NFSV4 */ + /* * Code common to create, mkdir, and mknod. */ @@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct iattr attr; int error; - int open_flags = 0; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if ((nd->flags & LOOKUP_CREATE) != 0) - open_flags = nd->intent.open.flags; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL); if (error != 0) goto out_err; return 0; @@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } -static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) -{ - static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; - const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; - char silly[slen+1]; - struct qstr qsilly; - struct dentry *sdentry; - int error = -EIO; - - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); - nfs_inc_stats(dir, NFSIOS_SILLYRENAME); - - /* - * We don't allow a dentry to be silly-renamed twice. - */ - error = -EBUSY; - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) - goto out; - - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); - - /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); - - sdentry = NULL; - do { - char *suffix = silly + slen - countersize; - - dput(sdentry); - sillycounter++; - sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); - - sdentry = lookup_one_len(silly, dentry->d_parent, slen); - /* - * N.B. Better to return EBUSY here ... it could be - * dangerous to delete the file while it's in use. - */ - if (IS_ERR(sdentry)) - goto out; - } while(sdentry->d_inode != NULL); /* need negative lookup */ - - qsilly.name = silly; - qsilly.len = strlen(silly); - if (dentry->d_inode) { - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - nfs_mark_for_revalidate(dentry->d_inode); - } else - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - if (!error) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - d_move(dentry, sdentry); - error = nfs_async_unlink(dir, dentry); - /* If we return 0 we don't unlink */ - } - dput(sdentry); -out: - return error; -} - /* * Remove a file after making sure there are no pending writes, * and after checking that the file has only one user. @@ -1580,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { - atomic_inc(&inode->i_count); + ihold(inode); d_add(dentry, inode); } return error; @@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head) int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(head); - struct nfs_inode *nfsi; + struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; spin_lock(&nfs_access_lru_lock); - list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; if (nr_to_scan-- == 0) |