diff options
author | Steven J. Magnani <steve@digidescorp.com> | 2012-10-04 17:14:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-06 03:05:09 +0900 |
commit | 21b6633d516c4f5d03ec02ede6374e320191003f (patch) | |
tree | 1117879875a138230c3fb2dea435026012270141 /fs/fat/inode.c | |
parent | 4b63709861e431e73f0be6b83f420fdd8fc518f5 (diff) | |
download | op-kernel-dev-21b6633d516c4f5d03ec02ede6374e320191003f.zip op-kernel-dev-21b6633d516c4f5d03ec02ede6374e320191003f.tar.gz |
fat (exportfs): move NFS support code
Under memory pressure, the system may evict dentries from cache. When the
FAT driver receives a NFS request involving an evicted dentry, it is
unable to reconnect it to the filesystem root. This causes the request to
fail, often with ENOENT.
This is partially due to ineffectiveness of the current FAT NFS
implementation, and partially due to an unimplemented fh_to_parent method.
The latter can cause file accesses to fail on shares exported with
subtree_check.
This patch set provides the FAT driver with the ability to
reconnect dentries. NFS file handle generation and lookups are simplified
and made congruent with ext2.
Testing has involved a memory-starved virtual machine running 3.5-rc5 that
exports a ~2 GB vfat filesystem containing a kernel tree (~770 MB, ~40000
files, 9 levels). Both 'cp -r' and 'ls -lR' operations were performed
from a client, some overlapping, some consecutive. Exports with
'subtree_check' and 'no_subtree_check' have been tested.
Note that while this patch set improves FAT's NFS support, it does not
eliminate ESTALE errors completely.
The following should be considered for NFS clients who are sensitive to ESTALE:
* Mounting with lookupcache=none
Unfortunately this can degrade performance severely, particularly for deep
filesystems.
* Incorporating VFS patches to retry ESTALE failures on the client-side,
such as https://lkml.org/lkml/2012/6/29/381
* Handling ESTALE errors in client application code
This patch:
Move NFS-related code into its own C file. No functional changes.
Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/fat/inode.c')
-rw-r--r-- | fs/fat/inode.c | 130 |
1 files changed, 0 insertions, 130 deletions
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4e5a6ac..169f6eb 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -562,20 +562,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, - struct inode *inode) -{ - loff_t i_pos; -#if BITS_PER_LONG == 32 - spin_lock(&sbi->inode_hash_lock); -#endif - i_pos = MSDOS_I(inode)->i_pos; -#if BITS_PER_LONG == 32 - spin_unlock(&sbi->inode_hash_lock); -#endif - return i_pos; -} - static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -668,122 +654,6 @@ static const struct super_operations fat_sops = { .show_options = fat_show_options, }; -/* - * a FAT file handle with fhtype 3 is - * 0/ i_ino - for fast, reliable lookup if still in the cache - * 1/ i_generation - to see if i_ino is still valid - * bit 0 == 0 iff directory - * 2/ i_pos(8-39) - if ino has changed, but still in cache - * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos - * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc - * - * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum - * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits - * of i_logstart is used to store the directory entry offset. - */ - -static struct dentry *fat_fh_to_dentry(struct super_block *sb, - struct fid *fid, int fh_len, int fh_type) -{ - struct inode *inode = NULL; - u32 *fh = fid->raw; - - if (fh_len < 5 || fh_type != 3) - return NULL; - - inode = ilookup(sb, fh[0]); - if (!inode || inode->i_generation != fh[1]) { - if (inode) - iput(inode); - inode = NULL; - } - if (!inode) { - loff_t i_pos; - int i_logstart = fh[3] & 0x0fffffff; - - i_pos = (loff_t)fh[2] << 8; - i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28); - - /* try 2 - see if i_pos is in F-d-c - * require i_logstart to be the same - * Will fail if you truncate and then re-write - */ - - inode = fat_iget(sb, i_pos); - if (inode && MSDOS_I(inode)->i_logstart != i_logstart) { - iput(inode); - inode = NULL; - } - } - - /* - * For now, do nothing if the inode is not found. - * - * What we could do is: - * - * - follow the file starting at fh[4], and record the ".." entry, - * and the name of the fh[2] entry. - * - then follow the ".." file finding the next step up. - * - * This way we build a path to the root of the tree. If this works, we - * lookup the path and so get this inode into the cache. Finally try - * the fat_iget lookup again. If that fails, then we are totally out - * of luck. But all that is for another day - */ - return d_obtain_alias(inode); -} - -static int -fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) -{ - int len = *lenp; - struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - loff_t i_pos; - - if (len < 5) { - *lenp = 5; - return 255; /* no room */ - } - - i_pos = fat_i_pos_read(sbi, inode); - *lenp = 5; - fh[0] = inode->i_ino; - fh[1] = inode->i_generation; - fh[2] = i_pos >> 8; - fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; - fh[4] = (i_pos & 0x0f) << 28; - if (parent) - fh[4] |= MSDOS_I(parent)->i_logstart; - return 3; -} - -static struct dentry *fat_get_parent(struct dentry *child) -{ - struct super_block *sb = child->d_sb; - struct buffer_head *bh; - struct msdos_dir_entry *de; - loff_t i_pos; - struct dentry *parent; - struct inode *inode; - int err; - - lock_super(sb); - - err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); - if (err) { - parent = ERR_PTR(err); - goto out; - } - inode = fat_build_inode(sb, de, i_pos); - brelse(bh); - - parent = d_obtain_alias(inode); -out: - unlock_super(sb); - - return parent; -} - static const struct export_operations fat_export_ops = { .encode_fh = fat_encode_fh, .fh_to_dentry = fat_fh_to_dentry, |