diff options
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 81 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 |
3 files changed, 64 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c9b863e..525260c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 unpin and reclaim + * stale, pinned 0 unpin and reclaim + * dirty, async 0 block on flush lock, reclaim + * dirty, sync flush 0 block on flush lock, reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned => unpin + * stale => reclaim + * clean => reclaim + * dirty => flush, wait and reclaim + */ STATIC int xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, int sync_mode) { + int error; + /* * The radix tree lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the @@ -729,30 +760,42 @@ xfs_reclaim_inode( spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) + xfs_iunpin_wait(ip); + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* Now we have an inode that needs flushing */ + error = xfs_iflush(ip, sync_mode); + if (!error) { + switch(sync_mode) { + case XFS_IFLUSH_DELWRI_ELSE_ASYNC: + case XFS_IFLUSH_DELWRI: + case XFS_IFLUSH_ASYNC: + case XFS_IFLUSH_DELWRI_ELSE_SYNC: + case XFS_IFLUSH_SYNC: + /* IO issued, synchronise with IO completion */ + xfs_iflock(ip); + break; + default: + ASSERT(0); + break; + } } +reclaim: + xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); return 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d0d1b5a..8d0666d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2493,7 +2493,7 @@ __xfs_iunpin_wait( wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } -static inline void +void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -2849,15 +2849,6 @@ xfs_iflush( mp = ip->i_mount; /* - * If the inode isn't dirty, then just release the inode flush lock and - * do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - - /* * We can't flush the inode until it is unpinned, so wait for it if we * are allowed to block. We know noone new can pin it, because we are * holding the inode lock shared and you need to hold it exclusively to diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ec1f28c..8b618ea 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -483,6 +483,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); +void xfs_iunpin_wait(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); void xfs_ichgtime(xfs_inode_t *, int); void xfs_lock_inodes(xfs_inode_t **, int, uint); |