From 9bdd9bd69b826875531bb1b2efb6aeb8d70e6f72 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 18 May 2016 10:56:41 +1000 Subject: xfs: buffer ->bi_end_io function requires irq-safe lock Reports have surfaced of a lockdep splat complaining about an irq-safe -> irq-unsafe locking order in the xfs_buf_bio_end_io() bio completion handler. This only occurs when I/O errors are present because bp->b_lock is only acquired in this context to protect setting an error on the buffer. The problem is that this lock can be acquired with the (request_queue) q->queue_lock held. See scsi_end_request() or ata_qc_schedule_eh(), for example. Replace the locked test/set of b_io_error with a cmpxchg() call. This eliminates the need for the lock and thus the lock ordering problem goes away. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9a2191b..e71cfbd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1100,22 +1100,18 @@ xfs_bwrite( return error; } -STATIC void +static void xfs_buf_bio_end_io( struct bio *bio) { - xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; + struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; /* * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (bio->bi_error) { - spin_lock(&bp->b_lock); - if (!bp->b_io_error) - bp->b_io_error = bio->bi_error; - spin_unlock(&bp->b_lock); - } + if (bio->bi_error) + cmpxchg(&bp->b_io_error, 0, bio->bi_error); if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); -- cgit v1.1 From 192852be8b5fb14268c2133fe9ce5312e4745963 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 10:58:51 +1000 Subject: xfs: configurable error behavior via sysfs We need to be able to change the way XFS behaviours in error conditions depending on the type of underlying storage. This is necessary for handling non-traditional block devices with extended error cases, such as thin provisioned devices that can return ENOSPC as an IO error. Introduce the basic sysfs infrastructure needed to define and configure error behaviours. This is done to be generic enough to extend to configuring behaviour in other error conditions, such as ENOMEM, which also has different desired behaviours according to machine configuration. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 10 +++++++++- fs/xfs/xfs_mount.h | 20 ++++++++++++++++++++ fs/xfs/xfs_sysfs.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_sysfs.h | 3 +++ 4 files changed, 84 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 536a0ee..677c3e0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -690,10 +690,15 @@ xfs_mountfs( if (error) goto out_remove_sysfs; - error = xfs_uuid_mount(mp); + error = xfs_error_sysfs_init(mp); if (error) goto out_del_stats; + + error = xfs_uuid_mount(mp); + if (error) + goto out_remove_error_sysfs; + /* * Set the minimum read and write sizes */ @@ -968,6 +973,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_remove_error_sysfs: + xfs_error_sysfs_del(mp); out_del_stats: xfs_sysfs_del(&mp->m_stats.xs_kobj); out_remove_sysfs: @@ -1056,6 +1063,7 @@ xfs_unmountfs( #endif xfs_free_perag(mp); + xfs_error_sysfs_del(mp); xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_kobj); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index bac6b34..d639795 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -37,6 +37,24 @@ enum { XFS_LOWSP_MAX, }; +/* + * Error Configuration + * + * Error classes define the subsystem the configuration belongs to. + * Error numbers define the errors that are configurable. + */ +enum { + XFS_ERR_CLASS_MAX, +}; +enum { + XFS_ERR_ERRNO_MAX, +}; + +struct xfs_error_cfg { + struct xfs_kobj kobj; + int max_retries; +}; + typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ @@ -127,6 +145,8 @@ typedef struct xfs_mount { int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; + struct xfs_kobj m_error_kobj; + struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ struct workqueue_struct *m_buf_workqueue; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 6ced4f1..74e3940 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -17,10 +17,11 @@ */ #include "xfs.h" -#include "xfs_sysfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_sysfs.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_stats.h" @@ -362,3 +363,53 @@ struct kobj_type xfs_log_ktype = { .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_log_attrs, }; + +/* + * Metadata IO error configuration + * + * The sysfs structure here is: + * ...xfs//error/// + * + * where allows us to discriminate between data IO and metadata IO, + * and any other future type of IO (e.g. special inode or directory error + * handling) we care to support. + */ +static struct attribute *xfs_error_attrs[] = { + NULL, +}; + +static inline struct xfs_error_cfg * +to_error_cfg(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + return container_of(kobj, struct xfs_error_cfg, kobj); +} + +struct kobj_type xfs_error_cfg_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_error_attrs, +}; + +struct kobj_type xfs_error_ktype = { + .release = xfs_sysfs_release, +}; + +int +xfs_error_sysfs_init( + struct xfs_mount *mp) +{ + int error; + + /* .../xfs//error/ */ + error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype, + &mp->m_kobj, "error"); + return error; +} + +void +xfs_error_sysfs_del( + struct xfs_mount *mp) +{ + xfs_sysfs_del(&mp->m_error_kobj); +} diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index be692e5..d046371 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -58,4 +58,7 @@ xfs_sysfs_del( wait_for_completion(&kobj->complete); } +int xfs_error_sysfs_init(struct xfs_mount *mp); +void xfs_error_sysfs_del(struct xfs_mount *mp); + #endif /* __XFS_SYSFS_H__ */ -- cgit v1.1 From ffd40ef697dfd3e06f44b1bb5fea93079de8c77d Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:01:00 +1000 Subject: xfs: introduce metadata IO error class Now we have the basic infrastructure, add the first error class so we can build up the infrastructure in a meaningful way. Add the metadata async write IO error class and sysfs entry, and introduce a default configuration that matches the existing "retry forever" behavior for async write metadata buffers. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.h | 3 +++ fs/xfs/xfs_sysfs.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d639795..352a5c8 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -44,9 +44,11 @@ enum { * Error numbers define the errors that are configurable. */ enum { + XFS_ERR_METADATA, XFS_ERR_CLASS_MAX, }; enum { + XFS_ERR_DEFAULT, XFS_ERR_ERRNO_MAX, }; @@ -146,6 +148,7 @@ typedef struct xfs_mount { /* low free space thresholds */ struct xfs_kobj m_kobj; struct xfs_kobj m_error_kobj; + struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 74e3940..07c9599 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -399,11 +399,34 @@ int xfs_error_sysfs_init( struct xfs_mount *mp) { + struct xfs_error_cfg *cfg; int error; /* .../xfs//error/ */ error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype, &mp->m_kobj, "error"); + if (error) + return error; + + /* .../xfs//error/metadata/ */ + error = xfs_sysfs_init(&mp->m_error_meta_kobj, &xfs_error_ktype, + &mp->m_error_kobj, "metadata"); + if (error) + goto out_error; + + cfg = &mp->m_error_cfg[XFS_ERR_METADATA][XFS_ERR_DEFAULT]; + error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype, + &mp->m_error_meta_kobj, "default"); + if (error) + goto out_error_meta; + cfg->max_retries = -1; + + return 0; + +out_error_meta: + xfs_sysfs_del(&mp->m_error_meta_kobj); +out_error: + xfs_sysfs_del(&mp->m_error_kobj); return error; } @@ -411,5 +434,16 @@ void xfs_error_sysfs_del( struct xfs_mount *mp) { + struct xfs_error_cfg *cfg; + int i, j; + + for (i = 0; i < XFS_ERR_CLASS_MAX; i++) { + for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) { + cfg = &mp->m_error_cfg[i][j]; + + xfs_sysfs_del(&cfg->kobj); + } + } + xfs_sysfs_del(&mp->m_error_meta_kobj); xfs_sysfs_del(&mp->m_error_kobj); } -- cgit v1.1 From df3093907ccc718459c54c99da29dd774af41186 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:05:33 +1000 Subject: xfs: add configurable error support to metadata buffers With the error configuration handle for async metadata write errors in place, we can now add initial support to the IO error processing in xfs_buf_iodone_error(). Add an infrastructure function to look up the configuration handle, and rearrange the error handling to prepare the way for different error handling conigurations to be used. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_buf_item.c | 112 ++++++++++++++++++++++++++++++-------------------- fs/xfs/xfs_mount.h | 3 ++ fs/xfs/xfs_sysfs.c | 17 ++++++++ fs/xfs/xfs_trace.h | 1 - 5 files changed, 88 insertions(+), 46 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4eb89bd..adef116 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -183,6 +183,7 @@ typedef struct xfs_buf { unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ int b_error; /* error code on I/O */ + int b_last_error; /* previous async I/O error */ const struct xfs_buf_ops *b_ops; #ifdef XFS_BUF_LOCK_TRACKING diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 99e91a0..b8d0cd4 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks( } } -/* - * This is the iodone() function for buffers which have had callbacks - * attached to them by xfs_buf_attach_iodone(). It should remove each - * log item from the buffer's list and call the callback of each in turn. - * When done, the buffer's fsprivate field is set to NULL and the buffer - * is unlocked with a call to iodone(). - */ -void -xfs_buf_iodone_callbacks( +static bool +xfs_buf_iodone_callback_error( struct xfs_buf *bp) { struct xfs_log_item *lip = bp->b_fspriv; struct xfs_mount *mp = lip->li_mountp; static ulong lasttime; static xfs_buftarg_t *lasttarg; - - if (likely(!bp->b_error)) - goto do_callbacks; + struct xfs_error_cfg *cfg; /* * If we've already decided to shutdown the filesystem because of * I/O errors, there's no point in giving this a retry. */ - if (XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_stale(bp); - bp->b_flags |= XBF_DONE; - trace_xfs_buf_item_iodone(bp, _RET_IP_); - goto do_callbacks; - } + if (XFS_FORCED_SHUTDOWN(mp)) + goto out_stale; if (bp->b_target != lasttarg || time_after(jiffies, (lasttime + 5*HZ))) { @@ -1079,45 +1066,80 @@ xfs_buf_iodone_callbacks( } lasttarg = bp->b_target; + /* synchronous writes will have callers process the error */ + if (!(bp->b_flags & XBF_ASYNC)) + goto out_stale; + + trace_xfs_buf_item_iodone_async(bp, _RET_IP_); + ASSERT(bp->b_iodone != NULL); + /* * If the write was asynchronous then no one will be looking for the - * error. Clear the error state and write the buffer out again. - * - * XXX: This helps against transient write errors, but we need to find - * a way to shut the filesystem down if the writes keep failing. - * - * In practice we'll shut the filesystem down soon as non-transient - * errors tend to affect the whole device and a failing log write - * will make us give up. But we really ought to do better here. + * error. If this is the first failure of this type, clear the error + * state and write the buffer out again. This means we always retry an + * async write failure at least once, but we also need to set the buffer + * up to behave correctly now for repeated failures. */ - if (bp->b_flags & XBF_ASYNC) { - ASSERT(bp->b_iodone != NULL); - - trace_xfs_buf_item_iodone_async(bp, _RET_IP_); + if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) || + bp->b_last_error != bp->b_error) { + bp->b_flags |= (XBF_WRITE | XBF_ASYNC | + XBF_DONE | XBF_WRITE_FAIL); + bp->b_last_error = bp->b_error; + xfs_buf_ioerror(bp, 0); + xfs_buf_submit(bp); + return true; + } - xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ + /* + * Repeated failure on an async write. Take action according to the + * error configuration we have been set up to use. + */ + cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + if (!cfg->max_retries) + goto permanent_error; - if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { - bp->b_flags |= XBF_WRITE | XBF_ASYNC | - XBF_DONE | XBF_WRITE_FAIL; - xfs_buf_submit(bp); - } else { - xfs_buf_relse(bp); - } - - return; - } + /* still a transient error, higher layers will retry */ + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return true; /* - * If the write of the buffer was synchronous, we want to make - * sure to return the error to the caller of xfs_bwrite(). + * Permanent error - we need to trigger a shutdown if we haven't already + * to indicate that inconsistency will result from this action. */ +permanent_error: + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); +out_stale: xfs_buf_stale(bp); bp->b_flags |= XBF_DONE; - trace_xfs_buf_error_relse(bp, _RET_IP_); + return false; +} + +/* + * This is the iodone() function for buffers which have had callbacks attached + * to them by xfs_buf_attach_iodone(). We need to iterate the items on the + * callback list, mark the buffer as having no more callbacks and then push the + * buffer through IO completion processing. + */ +void +xfs_buf_iodone_callbacks( + struct xfs_buf *bp) +{ + /* + * If there is an error, process it. Some errors require us + * to run callbacks after failure processing is done so we + * detect that and take appropriate action. + */ + if (bp->b_error && xfs_buf_iodone_callback_error(bp)) + return; + + /* + * Successful IO or permanent error. Either way, we can clear the + * retry state here in preparation for the next error that may occur. + */ + bp->b_last_error = 0; -do_callbacks: xfs_buf_do_callbacks(bp); bp->b_fspriv = NULL; bp->b_iodone = NULL; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 352a5c8..0c5a976 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -387,4 +387,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *); int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, xfs_off_t count_fsb); +struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, + int error_class, int error); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 07c9599..1cb5a85 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -447,3 +447,20 @@ xfs_error_sysfs_del( xfs_sysfs_del(&mp->m_error_meta_kobj); xfs_sysfs_del(&mp->m_error_kobj); } + +struct xfs_error_cfg * +xfs_error_get_cfg( + struct xfs_mount *mp, + int error_class, + int error) +{ + struct xfs_error_cfg *cfg; + + switch (error) { + default: + cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT]; + break; + } + + return cfg; +} diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c8d5842..a133dd4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_bdstrat_shut); DEFINE_BUF_EVENT(xfs_buf_item_relse); -DEFINE_BUF_EVENT(xfs_buf_item_iodone); DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); DEFINE_BUF_EVENT(xfs_buf_error_relse); DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); -- cgit v1.1 From ef6a50fbb1bba7951aa23adcfb40e99ca72dc51c Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:06:44 +1000 Subject: xfs: introduce table-based init for error behaviors Before we start expanding the number of error classes and errors we can configure behaviour for, we need a simple and clear way to define the default behaviour that we initialized each mount with. Introduce a table based method for keeping the initial configuration in, and apply that to the existing initialization code. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_sysfs.c | 72 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 12 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 1cb5a85..71046d9 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -395,11 +395,67 @@ struct kobj_type xfs_error_ktype = { .release = xfs_sysfs_release, }; +/* + * Error initialization tables. These need to be ordered in the same + * order as the enums used to index the array. All class init tables need to + * define a "default" behaviour as the first entry, all other entries can be + * empty. + */ +struct xfs_error_init { + char *name; + int max_retries; +}; + +static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = { + { .name = "default", + .max_retries = -1, + }, +}; + +static int +xfs_error_sysfs_init_class( + struct xfs_mount *mp, + int class, + const char *parent_name, + struct xfs_kobj *parent_kobj, + const struct xfs_error_init init[]) +{ + struct xfs_error_cfg *cfg; + int error; + int i; + + ASSERT(class < XFS_ERR_CLASS_MAX); + + error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype, + &mp->m_error_kobj, parent_name); + if (error) + return error; + + for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) { + cfg = &mp->m_error_cfg[class][i]; + error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype, + parent_kobj, init[i].name); + if (error) + goto out_error; + + cfg->max_retries = init[i].max_retries; + } + return 0; + +out_error: + /* unwind the entries that succeeded */ + for (i--; i >= 0; i--) { + cfg = &mp->m_error_cfg[class][i]; + xfs_sysfs_del(&cfg->kobj); + } + xfs_sysfs_del(parent_kobj); + return error; +} + int xfs_error_sysfs_init( struct xfs_mount *mp) { - struct xfs_error_cfg *cfg; int error; /* .../xfs//error/ */ @@ -409,22 +465,14 @@ xfs_error_sysfs_init( return error; /* .../xfs//error/metadata/ */ - error = xfs_sysfs_init(&mp->m_error_meta_kobj, &xfs_error_ktype, - &mp->m_error_kobj, "metadata"); + error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA, + "metadata", &mp->m_error_meta_kobj, + xfs_error_meta_init); if (error) goto out_error; - cfg = &mp->m_error_cfg[XFS_ERR_METADATA][XFS_ERR_DEFAULT]; - error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype, - &mp->m_error_meta_kobj, "default"); - if (error) - goto out_error_meta; - cfg->max_retries = -1; - return 0; -out_error_meta: - xfs_sysfs_del(&mp->m_error_meta_kobj); out_error: xfs_sysfs_del(&mp->m_error_kobj); return error; -- cgit v1.1 From a5ea70d25d76950e11690110b526374307d05d81 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:08:15 +1000 Subject: xfs: add configuration of error failure speed On reception of an error, we can fail immediately, perform some bound amount of retries or retry indefinitely. The current behaviour we have is to retry forever. However, we'd like the ability to choose how long the filesystem should try after an error, it can either fail immediately, retry a few times, or retry forever. This is implemented by using max_retries sysfs attribute, to hold the amount of times we allow the filesystem to retry after an error. Being -1 a special case where the filesystem will retry indefinitely. Add both a maximum retry count and a retry timeout so that we can bound by time and/or physical IO attempts. Finally, plumb these into xfs_buf_iodone error processing so that the error behaviour follows the selected configuration. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.h | 21 ++++++++++++- fs/xfs/xfs_buf_item.c | 13 +++++++-- fs/xfs/xfs_mount.h | 3 ++ fs/xfs/xfs_sysfs.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 111 insertions(+), 7 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index adef116..8bfb974 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -183,7 +183,26 @@ typedef struct xfs_buf { unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ int b_error; /* error code on I/O */ - int b_last_error; /* previous async I/O error */ + + /* + * async write failure retry count. Initialised to zero on the first + * failure, then when it exceeds the maximum configured without a + * success the write is considered to be failed permanently and the + * iodone handler will take appropriate action. + * + * For retry timeouts, we record the jiffie of the first failure. This + * means that we can change the retry timeout for buffers already under + * I/O and thus avoid getting stuck in a retry loop with a long timeout. + * + * last_error is used to ensure that we are getting repeated errors, not + * different errors. e.g. a block device might change ENOSPC to EIO when + * a failure timeout occurs, so we want to re-initialise the error + * retry behaviour appropriately when that happens. + */ + int b_retries; + unsigned long b_first_retry_time; /* in jiffies */ + int b_last_error; + const struct xfs_buf_ops *b_ops; #ifdef XFS_BUF_LOCK_TRACKING diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b8d0cd4..0d95c59 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1085,6 +1085,9 @@ xfs_buf_iodone_callback_error( bp->b_flags |= (XBF_WRITE | XBF_ASYNC | XBF_DONE | XBF_WRITE_FAIL); bp->b_last_error = bp->b_error; + bp->b_retries = 0; + bp->b_first_retry_time = jiffies; + xfs_buf_ioerror(bp, 0); xfs_buf_submit(bp); return true; @@ -1095,8 +1098,13 @@ xfs_buf_iodone_callback_error( * error configuration we have been set up to use. */ cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); - if (!cfg->max_retries) - goto permanent_error; + + if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && + ++bp->b_retries > cfg->max_retries) + goto permanent_error; + if (cfg->retry_timeout && + time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) + goto permanent_error; /* still a transient error, higher layers will retry */ xfs_buf_ioerror(bp, 0); @@ -1139,6 +1147,7 @@ xfs_buf_iodone_callbacks( * retry state here in preparation for the next error that may occur. */ bp->b_last_error = 0; + bp->b_retries = 0; xfs_buf_do_callbacks(bp); bp->b_fspriv = NULL; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0c5a976..2fafa94 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -52,9 +52,12 @@ enum { XFS_ERR_ERRNO_MAX, }; +#define XFS_ERR_RETRY_FOREVER -1 + struct xfs_error_cfg { struct xfs_kobj kobj; int max_retries; + unsigned long retry_timeout; /* in jiffies, 0 = no timeout */ }; typedef struct xfs_mount { diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 71046d9..918d144 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -374,10 +374,6 @@ struct kobj_type xfs_log_ktype = { * and any other future type of IO (e.g. special inode or directory error * handling) we care to support. */ -static struct attribute *xfs_error_attrs[] = { - NULL, -}; - static inline struct xfs_error_cfg * to_error_cfg(struct kobject *kobject) { @@ -385,6 +381,79 @@ to_error_cfg(struct kobject *kobject) return container_of(kobj, struct xfs_error_cfg, kobj); } +static ssize_t +max_retries_show( + struct kobject *kobject, + char *buf) +{ + struct xfs_error_cfg *cfg = to_error_cfg(kobject); + + return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries); +} + +static ssize_t +max_retries_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + struct xfs_error_cfg *cfg = to_error_cfg(kobject); + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < -1) + return -EINVAL; + + cfg->max_retries = val; + return count; +} +XFS_SYSFS_ATTR_RW(max_retries); + +static ssize_t +retry_timeout_seconds_show( + struct kobject *kobject, + char *buf) +{ + struct xfs_error_cfg *cfg = to_error_cfg(kobject); + + return snprintf(buf, PAGE_SIZE, "%ld\n", + jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC); +} + +static ssize_t +retry_timeout_seconds_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + struct xfs_error_cfg *cfg = to_error_cfg(kobject); + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + /* 1 day timeout maximum */ + if (val < 0 || val > 86400) + return -EINVAL; + + cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC); + return count; +} +XFS_SYSFS_ATTR_RW(retry_timeout_seconds); + +static struct attribute *xfs_error_attrs[] = { + ATTR_LIST(max_retries), + ATTR_LIST(retry_timeout_seconds), + NULL, +}; + + struct kobj_type xfs_error_cfg_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, @@ -404,11 +473,13 @@ struct kobj_type xfs_error_ktype = { struct xfs_error_init { char *name; int max_retries; + int retry_timeout; /* in seconds */ }; static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = { { .name = "default", .max_retries = -1, + .retry_timeout = 0, }, }; @@ -439,6 +510,8 @@ xfs_error_sysfs_init_class( goto out_error; cfg->max_retries = init[i].max_retries; + cfg->retry_timeout = msecs_to_jiffies( + init[i].retry_timeout * MSEC_PER_SEC); } return 0; -- cgit v1.1 From e0a431b3a3cc3d0a4c38ccfca8c7320fde40efb6 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:09:28 +1000 Subject: xfs: add configuration handlers for specific errors now most of the infrastructure is in place, we can start adding support for configuring specific errors such as ENODEV, ENOSPC, EIO, etc. Add these error configurations and configure them all to have appropriate behaviours. That is, all will be configured to retry forever by default, except for ENODEV, which is an unrecoverable error, so it will be configured to not retry on error Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.h | 3 +++ fs/xfs/xfs_sysfs.c | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2fafa94..72ec3e3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -49,6 +49,9 @@ enum { }; enum { XFS_ERR_DEFAULT, + XFS_ERR_EIO, + XFS_ERR_ENOSPC, + XFS_ERR_ENODEV, XFS_ERR_ERRNO_MAX, }; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 918d144..084a606 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -478,9 +478,20 @@ struct xfs_error_init { static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = { { .name = "default", - .max_retries = -1, + .max_retries = XFS_ERR_RETRY_FOREVER, .retry_timeout = 0, }, + { .name = "EIO", + .max_retries = XFS_ERR_RETRY_FOREVER, + .retry_timeout = 0, + }, + { .name = "ENOSPC", + .max_retries = XFS_ERR_RETRY_FOREVER, + .retry_timeout = 0, + }, + { .name = "ENODEV", + .max_retries = 0, + }, }; static int @@ -578,6 +589,15 @@ xfs_error_get_cfg( struct xfs_error_cfg *cfg; switch (error) { + case EIO: + cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO]; + break; + case ENOSPC: + cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC]; + break; + case ENODEV: + cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV]; + break; default: cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT]; break; -- cgit v1.1 From e6b3bb78962e65c4ad125598755cfbf2a8779e86 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 18 May 2016 11:11:27 +1000 Subject: xfs: add "fail at unmount" error handling configuration If we take "retry forever" literally on metadata IO errors, we can hang at unmount, once it retries those writes forever. This is the default behavior, unfortunately. Add an error configuration option for this behavior and default it to "fail" so that an unmount will trigger actuall errors, a shutdown and allow the unmount to succeed. It will be noisy, though, as it will log the errors and shutdown that occurs. To fix this, we need to mark the filesystem as being in the process of unmounting. Do this with a mount flag that is added at the appropriate time (i.e. before the blocking AIL sync). We also need to add this flag if mount fails after the initial phase of log recovery has been run. Signed-off-by: Dave Chinner Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf_item.c | 4 ++++ fs/xfs/xfs_mount.c | 12 ++++++++++++ fs/xfs/xfs_mount.h | 2 ++ fs/xfs/xfs_sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0d95c59..3425799 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1106,6 +1106,10 @@ xfs_buf_iodone_callback_error( time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) goto permanent_error; + /* At unmount we may treat errors differently */ + if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) + goto permanent_error; + /* still a transient error, higher layers will retry */ xfs_buf_ioerror(bp, 0); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 677c3e0..7c05a22 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -681,6 +681,9 @@ xfs_mountfs( xfs_set_maxicount(mp); + /* enable fail_at_unmount as default */ + mp->m_fail_unmount = 1; + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) goto out; @@ -962,6 +965,7 @@ xfs_mountfs( cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); out_log_dealloc: + mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1013,6 +1017,14 @@ xfs_unmountfs( xfs_log_force(mp, XFS_LOG_SYNC); /* + * We now need to tell the world we are unmounting. This will allow + * us to detect that the filesystem is going away and we should error + * out anything that we have been retrying in the background. This will + * prevent neverending retries in AIL pushing from hanging the unmount. + */ + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + /* * Flush all pending changes from the AIL. */ xfs_ail_push_all_sync(mp->m_ail); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 72ec3e3..9063a9c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -177,6 +177,7 @@ typedef struct xfs_mount { */ __uint32_t m_generation; + bool m_fail_unmount; #ifdef DEBUG /* * DEBUG mode instrumentation to test and/or trigger delayed allocation @@ -195,6 +196,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops must be synchronous except for space allocations */ +#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */ #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 084a606..4c2c550 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -381,6 +381,13 @@ to_error_cfg(struct kobject *kobject) return container_of(kobj, struct xfs_error_cfg, kobj); } +static inline struct xfs_mount * +err_to_mp(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + return container_of(kobj, struct xfs_mount, m_error_kobj); +} + static ssize_t max_retries_show( struct kobject *kobject, @@ -447,6 +454,38 @@ retry_timeout_seconds_store( } XFS_SYSFS_ATTR_RW(retry_timeout_seconds); +static ssize_t +fail_at_unmount_show( + struct kobject *kobject, + char *buf) +{ + struct xfs_mount *mp = err_to_mp(kobject); + + return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount); +} + +static ssize_t +fail_at_unmount_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + struct xfs_mount *mp = err_to_mp(kobject); + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < 0 || val > 1) + return -EINVAL; + + mp->m_fail_unmount = val; + return count; +} +XFS_SYSFS_ATTR_RW(fail_at_unmount); + static struct attribute *xfs_error_attrs[] = { ATTR_LIST(max_retries), ATTR_LIST(retry_timeout_seconds), @@ -462,6 +501,7 @@ struct kobj_type xfs_error_cfg_ktype = { struct kobj_type xfs_error_ktype = { .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, }; /* @@ -548,6 +588,12 @@ xfs_error_sysfs_init( if (error) return error; + error = sysfs_create_file(&mp->m_error_kobj.kobject, + ATTR_LIST(fail_at_unmount)); + + if (error) + goto out_error; + /* .../xfs//error/metadata/ */ error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA, "metadata", &mp->m_error_meta_kobj, -- cgit v1.1