diff options
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 169 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 19 |
8 files changed, 117 insertions, 128 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f1e30d..c53cc03 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1157,12 +1157,7 @@ xfs_zero_remaining_bytes( XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); - if (XFS_FORCED_SHUTDOWN(mp)) { - error = -EIO; - break; - } - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(read)"); @@ -1175,12 +1170,7 @@ xfs_zero_remaining_bytes( XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); - if (XFS_FORCED_SHUTDOWN(mp)) { - error = -EIO; - break; - } - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(write)"); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 108eba7..d99ec83 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -623,10 +623,11 @@ _xfs_buf_read( bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - xfs_buf_iorequest(bp); - if (flags & XBF_ASYNC) + if (flags & XBF_ASYNC) { + xfs_buf_submit(bp); return 0; - return xfs_buf_iowait(bp); + } + return xfs_buf_submit_wait(bp); } xfs_buf_t * @@ -708,12 +709,7 @@ xfs_buf_read_uncached( bp->b_flags |= XBF_READ; bp->b_ops = ops; - if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { - xfs_buf_relse(bp); - return NULL; - } - xfs_buf_iorequest(bp); - xfs_buf_iowait(bp); + xfs_buf_submit_wait(bp); return bp; } @@ -1028,12 +1024,8 @@ xfs_buf_ioend( (*(bp->b_iodone))(bp); else if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); - else { + else complete(&bp->b_iowait); - - /* release the !XBF_ASYNC ref now we are done. */ - xfs_buf_rele(bp); - } } static void @@ -1086,21 +1078,7 @@ xfs_bwrite( bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL | XBF_DONE); - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - - /* sync IO, xfs_buf_ioend is going to remove a ref here */ - xfs_buf_hold(bp); - xfs_buf_ioend(bp); - return -EIO; - } - - xfs_buf_iorequest(bp); - - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); @@ -1209,7 +1187,7 @@ next_chunk: } else { /* * This is guaranteed not to be the last io reference count - * because the caller (xfs_buf_iorequest) holds a count itself. + * because the caller (xfs_buf_submit) holds a count itself. */ atomic_dec(&bp->b_io_remaining); xfs_buf_ioerror(bp, -EIO); @@ -1299,13 +1277,29 @@ _xfs_buf_ioapply( blk_finish_plug(&plug); } +/* + * Asynchronous IO submission path. This transfers the buffer lock ownership and + * the current reference to the IO. It is not safe to reference the buffer after + * a call to this function unless the caller holds an additional reference + * itself. + */ void -xfs_buf_iorequest( - xfs_buf_t *bp) +xfs_buf_submit( + struct xfs_buf *bp) { - trace_xfs_buf_iorequest(bp, _RET_IP_); + trace_xfs_buf_submit(bp, _RET_IP_); ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + ASSERT(bp->b_flags & XBF_ASYNC); + + /* on shutdown we stale and complete the buffer immediately */ + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror(bp, -EIO); + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioend(bp); + return; + } if (bp->b_flags & XBF_WRITE) xfs_buf_wait_unpin(bp); @@ -1314,25 +1308,14 @@ xfs_buf_iorequest( bp->b_io_error = 0; /* - * Take references to the buffer. For XBF_ASYNC buffers, holding a - * reference for as long as submission takes is all that is necessary - * here. The IO inherits the lock and hold count from the submitter, - * and these are release during IO completion processing. Taking a hold - * over submission ensures that the buffer is not freed until we have - * completed all processing, regardless of when IO errors occur or are - * reported. - * - * However, for synchronous IO, the IO does not inherit the submitters - * reference count, nor the buffer lock. Hence we need to take an extra - * reference to the buffer for the for the IO context so that we can - * guarantee the buffer is not freed until all IO completion processing - * is done. Otherwise the caller can drop their reference while the IO - * is still in progress and hence trigger a use-after-free situation. + * The caller's reference is released during I/O completion. + * This occurs some time after the last b_io_remaining reference is + * released, so after we drop our Io reference we have to have some + * other reference to ensure the buffer doesn't go away from underneath + * us. Take a direct reference to ensure we have safe access to the + * buffer until we are finished with it. */ xfs_buf_hold(bp); - if (!(bp->b_flags & XBF_ASYNC)) - xfs_buf_hold(bp); - /* * Set the count to 1 initially, this will stop an I/O completion @@ -1343,40 +1326,82 @@ xfs_buf_iorequest( _xfs_buf_ioapply(bp); /* - * If _xfs_buf_ioapply failed or we are doing synchronous IO that - * completes extremely quickly, we can get back here with only the IO - * reference we took above. If we drop it to zero, run completion - * processing synchronously so that we don't return to the caller with - * completion still pending. This avoids unnecessary context switches - * associated with the end_io workqueue. + * If _xfs_buf_ioapply failed, we can get back here with only the IO + * reference we took above. If we drop it to zero, run completion so + * that we don't return to the caller with completion still pending. */ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { - if (bp->b_error || !(bp->b_flags & XBF_ASYNC)) + if (bp->b_error) xfs_buf_ioend(bp); else xfs_buf_ioend_async(bp); } xfs_buf_rele(bp); + /* Note: it is not safe to reference bp now we've dropped our ref */ } /* - * Waits for I/O to complete on the buffer supplied. It returns immediately if - * no I/O is pending or there is already a pending error on the buffer, in which - * case nothing will ever complete. It returns the I/O error code, if any, or - * 0 if there was no error. + * Synchronous buffer IO submission path, read or write. */ int -xfs_buf_iowait( - xfs_buf_t *bp) +xfs_buf_submit_wait( + struct xfs_buf *bp) { - trace_xfs_buf_iowait(bp, _RET_IP_); + int error; - if (!bp->b_error) - wait_for_completion(&bp->b_iowait); + trace_xfs_buf_submit_wait(bp, _RET_IP_); + + ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC))); + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror(bp, -EIO); + xfs_buf_stale(bp); + bp->b_flags &= ~XBF_DONE; + return -EIO; + } + + if (bp->b_flags & XBF_WRITE) + xfs_buf_wait_unpin(bp); + + /* clear the internal error state to avoid spurious errors */ + bp->b_io_error = 0; + + /* + * For synchronous IO, the IO does not inherit the submitters reference + * count, nor the buffer lock. Hence we cannot release the reference we + * are about to take until we've waited for all IO completion to occur, + * including any xfs_buf_ioend_async() work that may be pending. + */ + xfs_buf_hold(bp); + + /* + * Set the count to 1 initially, this will stop an I/O completion + * callout which happens before we have started all the I/O from calling + * xfs_buf_ioend too early. + */ + atomic_set(&bp->b_io_remaining, 1); + _xfs_buf_ioapply(bp); + + /* + * make sure we run completion synchronously if it raced with us and is + * already complete. + */ + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp); + + /* wait for completion before gathering the error from the buffer */ + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); trace_xfs_buf_iowait_done(bp, _RET_IP_); - return bp->b_error; + error = bp->b_error; + + /* + * all done now, we can release the hold that keeps the buffer + * referenced for the entire IO. + */ + xfs_buf_rele(bp); + return error; } xfs_caddr_t @@ -1782,15 +1807,7 @@ __xfs_buf_delwri_submit( else list_del_init(&bp->b_list); - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - xfs_buf_ioend(bp); - continue; - } - xfs_buf_iorequest(bp); + xfs_buf_submit(bp); } blk_finish_plug(&plug); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d8f57f6..0acfc30 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -290,8 +290,8 @@ extern int xfs_bwrite(struct xfs_buf *bp); extern void xfs_buf_ioend(struct xfs_buf *bp); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); -extern void xfs_buf_iorequest(xfs_buf_t *); -extern int xfs_buf_iowait(xfs_buf_t *); +extern void xfs_buf_submit(struct xfs_buf *bp); +extern int xfs_buf_submit_wait(struct xfs_buf *bp); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); #define xfs_buf_zero(bp, off, len) \ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4fd41b5..cbea909 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks( * a way to shut the filesystem down if the writes keep failing. * * In practice we'll shut the filesystem down soon as non-transient - * erorrs tend to affect the whole device and a failing log write + * errors tend to affect the whole device and a failing log write * will make us give up. But we really ought to do better here. */ if (XFS_BUF_ISASYNC(bp)) { @@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks( if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE | XBF_WRITE_FAIL; - xfs_buf_iorequest(bp); + xfs_buf_submit(bp); } else { xfs_buf_relse(bp); } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3567396..fe88ef6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1688,7 +1688,7 @@ xlog_bdstrat( return 0; } - xfs_buf_iorequest(bp); + xfs_buf_submit(bp); return 0; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4ba19bf..980e296 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -193,12 +193,8 @@ xlog_bread_noalign( bp->b_io_length = nbblks; bp->b_error = 0; - if (XFS_FORCED_SHUTDOWN(log->l_mp)) - return -EIO; - - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); - if (error) + error = xfs_buf_submit_wait(bp); + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) xfs_buf_ioerror_alert(bp, __func__); return error; } @@ -378,9 +374,11 @@ xlog_recover_iodone( * We're not going to bother about retrying * this during recovery. One strike! */ - xfs_buf_ioerror_alert(bp, __func__); - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); + if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror_alert(bp, __func__); + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } } bp->b_iodone = NULL; xfs_buf_ioend(bp); @@ -4427,16 +4425,12 @@ xlog_do_recover( XFS_BUF_UNASYNC(bp); bp->b_ops = &xfs_sb_buf_ops; - if (XFS_FORCED_SHUTDOWN(log->l_mp)) { - xfs_buf_relse(bp); - return -EIO; - } - - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { - xfs_buf_ioerror_alert(bp, __func__); - ASSERT(0); + if (!XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_buf_ioerror_alert(bp, __func__); + ASSERT(0); + } xfs_buf_relse(bp); return error; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 152f827..51372e3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); DEFINE_BUF_EVENT(xfs_buf_iodone); -DEFINE_BUF_EVENT(xfs_buf_iorequest); +DEFINE_BUF_EVENT(xfs_buf_submit); +DEFINE_BUF_EVENT(xfs_buf_submit_wait); DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index db4be5b..e2b2216 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -318,23 +318,10 @@ xfs_trans_read_buf_map( XFS_BUF_READ(bp); bp->b_ops = ops; - /* - * XXX(hch): clean up the error handling here to be less - * of a mess.. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - bp->b_flags &= ~(XBF_READ | XBF_DONE); - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - xfs_buf_relse(bp); - return -EIO; - } - - xfs_buf_iorequest(bp); - error = xfs_buf_iowait(bp); + error = xfs_buf_submit_wait(bp); if (error) { - xfs_buf_ioerror_alert(bp, __func__); + if (!XFS_FORCED_SHUTDOWN(mp)) + xfs_buf_ioerror_alert(bp, __func__); xfs_buf_relse(bp); /* * We can gracefully recover from most read |