summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-16 12:02:23 +0000
committerAlex Elder <aelder@sgi.com>2011-06-16 10:52:39 -0500
commita27a263bae072a499acc77b632238a6dacccf888 (patch)
treeb2728b2900c3926cba5833e171b56bc7371dbc07 /fs
parentc46a131c0c0f4c2457e6b1e430c578a5cb057334 (diff)
downloadop-kernel-dev-a27a263bae072a499acc77b632238a6dacccf888.zip
op-kernel-dev-a27a263bae072a499acc77b632238a6dacccf888.tar.gz
xfs: make log devices with write back caches work
There's no reason not to support cache flushing on external log devices. The only thing this really requires is flushing the data device first both in fsync and log commits. A side effect is that we also have to remove the barrier write test during mount, which has been superflous since the new FLUSH+FUA code anyway. Also use the chance to flush the RT subvolume write cache before the fsync commit, which is required for correct semantics. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c75
-rw-r--r--fs/xfs/xfs_log.c11
3 files changed, 41 insertions, 95 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba..7f782af 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
trace_xfs_file_fsync(ip);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ioend_wait(ip);
+ if (mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an RT and/or log subvolume we need to make sure
+ * to flush the write cache the device used for file data
+ * first. This is to ensure newly written file data make
+ * it to disk before logging the new inode size in case of
+ * an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
+ }
+
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+ XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
* force the log.
*/
if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(ip->i_mount,
+ error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
- if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If the log write didn't issue an ordered tag we need
- * to flush the disk cache for the data device now.
- */
- if (!log_flushed)
- xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
-
- /*
- * If this inode is on the RT dev we need to flush that
- * cache as well.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
- }
+ /*
+ * If we only have a single device, and the log force about was
+ * a no-op we might have to flush the data device cache here.
+ * This can only happen for fdatasync/O_DSYNC if we were overwriting
+ * an already allocated file and thus do not have any metadata to
+ * commit.
+ */
+ if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+ mp->m_logdev_targp == mp->m_ddev_targp &&
+ !XFS_IS_REALTIME_INODE(ip) &&
+ !log_flushed)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
return -error;
}
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce..a1a881e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
- xfs_mount_t *mp)
-{
- xfs_buf_t *sbp = xfs_getsb(mp, 0);
- int error;
-
- XFS_BUF_UNDONE(sbp);
- XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
- XFS_BUF_WRITE(sbp);
- XFS_BUF_UNASYNC(sbp);
- XFS_BUF_ORDERED(sbp);
-
- xfsbdstrat(mp, sbp);
- error = xfs_buf_iowait(sbp);
-
- /*
- * Clear all the flags we set and possible error state in the
- * buffer. We only did the write to try out whether barriers
- * worked and shouldn't leave any traces in the superblock
- * buffer.
- */
- XFS_BUF_DONE(sbp);
- XFS_BUF_ERROR(sbp, 0);
- XFS_BUF_UNORDERED(sbp);
-
- xfs_buf_relse(sbp);
- return error;
-}
-
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
- int error;
-
- if (mp->m_logdev_targp != mp->m_ddev_targp) {
- xfs_notice(mp,
- "Disabling barriers, not supported with external log device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
- xfs_notice(mp,
- "Disabling barriers, underlying device is readonly");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- error = xfs_barrier_test(mp);
- if (error) {
- xfs_notice(mp,
- "Disabling barriers, trial barrier write failed");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-}
-
void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
switch (token) {
case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
-
- /*
- * Test if barriers are actually working if we can,
- * else delay this check until the filesystem is
- * marked writeable.
- */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- xfs_mountfs_check_barriers(mp);
break;
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
/* ro -> rw */
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
/*
* If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2119302..41d5b8f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
XFS_BUF_ASYNC(bp);
bp->b_flags |= XBF_LOG_BUFFER;
- if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+ if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an external log device, flush the data device
+ * before flushing the log to make sure all meta data
+ * written back from the AIL actually made it to disk
+ * before writing out the new log tail LSN in the log buffer.
+ */
+ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
XFS_BUF_ORDERED(bp);
+ }
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
OpenPOWER on IntegriCloud