xfs: handle DIO overwrite EOF update completion correctly

Currently a DIO overwrite that extends the EOF (e.g sub-block IO or write into allocated blocks beyond EOF) requires a transaction for the EOF update. Thi is done in IO completion context, but we aren't explicitly handling this situation properly and so it can run in interrupt context. Ensure that we defer IO that spans EOF correctly to the DIO completion workqueue, and now that we have an ioend in IO completion we can use the common ioend completion path to do all the work. Note: we do not preallocate the append transaction as we can have multiple mapping and allocation calls per direct IO. hence preallocating can still leave us with nested transactions by attempting to map and allocate more blocks after we've preallocated an append transaction. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
author: Dave Chinner <dchinner@redhat.com> 2015-04-16 21:59:34 +1000
committer: Dave Chinner <david@fromorbit.com> 2015-04-16 21:59:34 +1000
commit: 6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5 (patch)
tree: 142857ff25d5d43393bdbc9c1ff18bffba4d7257
parent: d5cc2e3f968ff60f247fdef15b04fac788ef46d2 (diff)
download: op-kernel-dev-6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5.zip
op-kernel-dev-6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5.tar.gz
2 files changed, 31 insertions, 31 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 60d6466..a59443d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1293,7 +1293,7 @@ xfs_map_direct(
 					   imap);
 	}
 
-	if (ioend->io_type == XFS_IO_UNWRITTEN)
+	if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
 		set_buffer_defer_completion(bh_result);
 }
 
@@ -1535,8 +1535,10 @@ xfs_end_io_direct_write(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_ioend	*ioend = private;
 
+	trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL);
+
 	if (XFS_FORCED_SHUTDOWN(mp))
-		goto out_destroy_ioend;
+		goto out_end_io;
 
 	/*
 	 * dio completion end_io functions are only called on writes if more
@@ -1557,40 +1559,37 @@ xfs_end_io_direct_write(
 	ioend->io_offset = offset;
 
 	/*
-	 * While the generic direct I/O code updates the inode size, it does
-	 * so only after the end_io handler is called, which means our
-	 * end_io handler thinks the on-disk size is outside the in-core
-	 * size.  To prevent this just update it a little bit earlier here.
+	 * The ioend tells us whether we are doing unwritten extent conversion
+	 * or an append transaction that updates the on-disk file size. These
+	 * cases are the only cases where we should *potentially* be needing
+	 * to update the VFS inode size. When the ioend indicates this, we
+	 * are *guaranteed* to be running in non-interrupt context.
+	 *
+	 * We need to update the in-core inode size here so that we don't end up
+	 * with the on-disk inode size being outside the in-core inode size.
+	 * While we can do this in the process context after the IO has
+	 * completed, this does not work for AIO and hence we always update
+	 * the in-core inode size here if necessary.
 	 */
-	if (offset + size > i_size_read(inode))
-		i_size_write(inode, offset + size);
+	if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) {
+		if (offset + size > i_size_read(inode))
+			i_size_write(inode, offset + size);
+	} else
+		ASSERT(offset + size <= i_size_read(inode));
 
 	/*
-	 * For direct I/O we do not know if we need to allocate blocks or not,
-	 * so we can't preallocate an append transaction, as that results in
-	 * nested reservations and log space deadlocks. Hence allocate the
-	 * transaction here. While this is sub-optimal and can block IO
-	 * completion for some time, we're stuck with doing it this way until
-	 * we can pass the ioend to the direct IO allocation callbacks and
-	 * avoid nesting that way.
+	 * If we are doing an append IO that needs to update the EOF on disk,
+	 * do the transaction reserve now so we can use common end io
+	 * processing. Stashing the error (if there is one) in the ioend will
+	 * result in the ioend processing passing on the error if it is
+	 * possible as we can't return it from here.
 	 */
-	if (ioend->io_type == XFS_IO_UNWRITTEN) {
-		xfs_iomap_write_unwritten(ip, offset, size);
-	} else if (offset + size > ip->i_d.di_size) {
-		struct xfs_trans	*tp;
-		int			error;
-
-		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			goto out_destroy_ioend;
-		}
+	if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend))
+		ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
 
-		xfs_setfilesize(ip, tp, offset, size);
-	}
-out_destroy_ioend:
-	xfs_destroy_ioend(ioend);
+out_end_io:
+	xfs_end_io(&ioend->io_work);
+	return;
 }
 
 STATIC ssize_t
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2de8556..0ae50e9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
 DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
 DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
author	Dave Chinner <dchinner@redhat.com>	2015-04-16 21:59:34 +1000
committer	Dave Chinner <david@fromorbit.com>	2015-04-16 21:59:34 +1000
commit	6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5 (patch)
tree	142857ff25d5d43393bdbc9c1ff18bffba4d7257
parent	d5cc2e3f968ff60f247fdef15b04fac788ef46d2 (diff)
download	op-kernel-dev-6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5.zip op-kernel-dev-6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5.tar.gz