diff options
author | wengang wang <wen.gang.wang@oracle.com> | 2009-03-06 21:29:10 +0800 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2009-04-03 11:39:25 -0700 |
commit | 6ca497a83e592d64e050c4d04b6dedb8c915f39a (patch) | |
tree | 0b9cd611d6d907881841eca73d12a7f3b85f1716 /fs/ocfs2/inode.c | |
parent | 9405dccfd3201d2b76e120949bec81ba8cfbd2d0 (diff) | |
download | op-kernel-dev-6ca497a83e592d64e050c4d04b6dedb8c915f39a.zip op-kernel-dev-6ca497a83e592d64e050c4d04b6dedb8c915f39a.tar.gz |
ocfs2: fix rare stale inode errors when exporting via nfs
For nfs exporting, ocfs2_get_dentry() returns the dentry for fh.
ocfs2_get_dentry() may read from disk when the inode is not in memory,
without any cross cluster lock. this leads to the file system loading a
stale inode.
This patch fixes above problem.
Solution is that in case of inode is not in memory, we get the cluster
lock(PR) of alloc inode where the inode in question is allocated from (this
causes node on which deletion is done sync the alloc inode) before reading
out the inode itsself. then we check the bitmap in the group (the inode in
question allcated from) to see if the bit is clear. if it's clear then it's
stale. if the bit is set, we then check generation as the existing code
does.
We have to read out the inode in question from disk first to know its alloc
slot and allot bit. And if its not stale we read it out using ocfs2_iget().
The second read should then be from cache.
And also we have to add a per superblock nfs_sync_lock to cover the lock for
alloc inode and that for inode in question. this is because ocfs2_get_dentry()
and ocfs2_delete_inode() lock on them in reverse order. nfs_sync_lock is locked
in EX mode in ocfs2_get_dentry() and in PR mode in ocfs2_delete_inode(). so
that mutliple ocfs2_delete_inode() can run concurrently in normal case.
[mfasheh@suse.com: build warning fixes and comment cleanups]
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r-- | fs/ocfs2/inode.c | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 4a88bce..10e1fa87 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -113,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) oi->ip_attr |= OCFS2_DIRSYNC_FL; } +struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno) +{ + struct ocfs2_find_inode_args args; + + args.fi_blkno = blkno; + args.fi_flags = 0; + args.fi_ino = ino_from_blkno(sb, blkno); + args.fi_sysfile_type = 0; + + return ilookup5(sb, blkno, ocfs2_find_actor, &args); +} struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, int sysfile_type) { @@ -961,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode) goto bail; } + /* + * Synchronize us against ocfs2_get_dentry. We take this in + * shared mode so that all nodes can still concurrently + * process deletes. + */ + status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0); + if (status < 0) { + mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status); + ocfs2_cleanup_delete_inode(inode, 0); + goto bail_unblock; + } /* Lock down the inode. This gives us an up to date view of * it's metadata (for verification), and allows us to * serialize delete_inode on multiple nodes. @@ -974,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode) if (status != -ENOENT) mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 0); - goto bail_unblock; + goto bail_unlock_nfs_sync; } /* Query the cluster. This will be the final decision made @@ -1017,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode) bail_unlock_inode: ocfs2_inode_unlock(inode, 1); brelse(di_bh); + +bail_unlock_nfs_sync: + ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); + bail_unblock: status = sigprocmask(SIG_SETMASK, &oldset, NULL); if (status < 0) |