summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-cache-metadata.c8
-rw-r--r--drivers/md/dm-cache-target.c1
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c1
-rw-r--r--drivers/md/dm-io.c18
-rw-r--r--drivers/md/dm-kcopyd.c6
-rw-r--r--drivers/md/dm-linear.c1
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-raid.c8
-rw-r--r--drivers/md/dm-raid1.c1
-rw-r--r--drivers/md/dm-rq.c16
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-table.c49
-rw-r--r--drivers/md/dm-thin.c2
-rw-r--r--drivers/md/dm-verity-fec.c18
-rw-r--r--drivers/md/dm-verity-fec.h4
-rw-r--r--drivers/md/dm.c32
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.h7
-rw-r--r--drivers/md/multipath.c1
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c1
-rw-r--r--drivers/md/raid5.c53
24 files changed, 151 insertions, 89 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index e4c2c1a..6735c8d 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -932,7 +932,7 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
*result = true;
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
- from_cblock(begin), &cmd->dirty_cursor);
+ from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
if (r) {
DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
return r;
@@ -959,14 +959,16 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
return 0;
}
+ begin = to_cblock(from_cblock(begin) + 1);
+ if (begin == end)
+ break;
+
r = dm_bitset_cursor_next(&cmd->dirty_cursor);
if (r) {
DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
dm_bitset_cursor_end(&cmd->dirty_cursor);
return r;
}
-
- begin = to_cblock(from_cblock(begin) + 1);
}
dm_bitset_cursor_end(&cmd->dirty_cursor);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 9c689b3..975922c8 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2773,7 +2773,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->num_discard_bios = 1;
ti->discards_supported = true;
- ti->discard_zeroes_data_unsupported = true;
ti->split_discard_bios = false;
cache->features = ca->features;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 136fda3..fea5bd5 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -132,6 +132,7 @@ void dm_init_md_queue(struct mapped_device *md);
void dm_init_normal_md_queue(struct mapped_device *md);
int md_in_flight(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
+void disable_write_zeroes(struct mapped_device *md);
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 389a363..ef1d836 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2030,7 +2030,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
wake_up_process(cc->write_thread);
ti->num_flush_bios = 1;
- ti->discard_zeroes_data_unsupported = true;
return 0;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 03940bf..3702e50 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -312,9 +312,12 @@ static void do_region(int op, int op_flags, unsigned region,
*/
if (op == REQ_OP_DISCARD)
special_cmd_max_sectors = q->limits.max_discard_sectors;
+ else if (op == REQ_OP_WRITE_ZEROES)
+ special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
else if (op == REQ_OP_WRITE_SAME)
special_cmd_max_sectors = q->limits.max_write_same_sectors;
- if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) &&
+ if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
+ op == REQ_OP_WRITE_SAME) &&
special_cmd_max_sectors == 0) {
dec_count(io, region, -EOPNOTSUPP);
return;
@@ -328,11 +331,18 @@ static void do_region(int op, int op_flags, unsigned region,
/*
* Allocate a suitably sized-bio.
*/
- if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME))
+ switch (op) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_WRITE_ZEROES:
+ num_bvecs = 0;
+ break;
+ case REQ_OP_WRITE_SAME:
num_bvecs = 1;
- else
+ break;
+ default:
num_bvecs = min_t(int, BIO_MAX_PAGES,
dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+ }
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
@@ -341,7 +351,7 @@ static void do_region(int op, int op_flags, unsigned region,
bio_set_op_attrs(bio, op, op_flags);
store_io_and_region_in_bio(bio, io, region);
- if (op == REQ_OP_DISCARD) {
+ if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
remaining -= num_sectors;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9e9d04cb..f858467 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -733,11 +733,11 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->pages = &zero_page_list;
/*
- * Use WRITE SAME to optimize zeroing if all dests support it.
+ * Use WRITE ZEROES to optimize zeroing if all dests support it.
*/
- job->rw = REQ_OP_WRITE_SAME;
+ job->rw = REQ_OP_WRITE_ZEROES;
for (i = 0; i < job->num_dests; i++)
- if (!bdev_write_same(job->dests[i].bdev)) {
+ if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
job->rw = WRITE;
break;
}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 4788b0b..e17fd44 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
+ ti->num_write_zeroes_bios = 1;
ti->private = lc;
return 0;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7f223db..2950b14 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1103,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
+ ti->num_write_zeroes_bios = 1;
if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
@@ -1491,7 +1492,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
*/
int r = DM_ENDIO_REQUEUE;
- if (!error && !clone->errors)
+ if (!error)
return 0; /* I/O complete */
if (noretry_error(error))
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index f8564d6..2dae3e5 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2813,7 +2813,9 @@ static void configure_discard_support(struct raid_set *rs)
/* Assume discards not supported until after checks below. */
ti->discards_supported = false;
- /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */
+ /*
+ * XXX: RAID level 4,5,6 require zeroing for safety.
+ */
raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
for (i = 0; i < rs->raid_disks; i++) {
@@ -2827,8 +2829,6 @@ static void configure_discard_support(struct raid_set *rs)
return;
if (raid456) {
- if (!q->limits.discard_zeroes_data)
- return;
if (!devices_handle_discard_safely) {
DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty.");
DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
@@ -3726,7 +3726,7 @@ static int raid_preresume(struct dm_target *ti)
return r;
/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) &&
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
r = bitmap_resize(mddev->bitmap, mddev->dev_sectors,
to_bytes(rs->requested_bitmap_chunk_sectors), 0);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 2ddc2d2..a95cbb8 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1124,7 +1124,6 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
- ti->discard_zeroes_data_unsupported = true;
ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
if (!ms->kmirrord_wq) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 28955b9..bff7e3b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -298,9 +298,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
- if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
- !clone->q->limits.max_write_same_sectors))
- disable_write_same(tio->md);
+ if (unlikely(r == -EREMOTEIO)) {
+ if (req_op(clone) == REQ_OP_WRITE_SAME &&
+ !clone->q->limits.max_write_same_sectors)
+ disable_write_same(tio->md);
+ if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+ !clone->q->limits.max_write_zeroes_sectors)
+ disable_write_zeroes(tio->md);
+ }
if (r <= 0)
/* The target wants to complete the I/O */
@@ -358,7 +363,7 @@ static void dm_complete_request(struct request *rq, int error)
if (!rq->q->mq_ops)
blk_complete_request(rq);
else
- blk_mq_complete_request(rq, error);
+ blk_mq_complete_request(rq);
}
/*
@@ -755,13 +760,14 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
/* Undo dm_start_request() before requeuing */
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
+ blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
return BLK_MQ_RQ_QUEUE_BUSY;
}
return BLK_MQ_RQ_QUEUE_OK;
}
-static struct blk_mq_ops dm_mq_ops = {
+static const struct blk_mq_ops dm_mq_ops = {
.queue_rq = dm_mq_queue_rq,
.complete = dm_softirq_done,
.init_request = dm_mq_init_request,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 28193a5..5ef49c1 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_bios = stripes;
ti->num_discard_bios = stripes;
ti->num_write_same_bios = stripes;
+ ti->num_write_zeroes_bios = stripes;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
@@ -293,6 +294,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+ unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
target_bio_nr = dm_bio_get_target_bio_nr(bio);
BUG_ON(target_bio_nr >= sc->stripes);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3ad16d9..958275a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1449,22 +1449,6 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
return false;
}
-static bool dm_table_discard_zeroes_data(struct dm_table *t)
-{
- struct dm_target *ti;
- unsigned i = 0;
-
- /* Ensure that all targets supports discard_zeroes_data. */
- while (i < dm_table_get_num_targets(t)) {
- ti = dm_table_get_target(t, i++);
-
- if (ti->discard_zeroes_data_unsupported)
- return false;
- }
-
- return true;
-}
-
static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -1533,6 +1517,34 @@ static bool dm_table_supports_write_same(struct dm_table *t)
return true;
}
+static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !q->limits.max_write_zeroes_sectors;
+}
+
+static bool dm_table_supports_write_zeroes(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i = 0;
+
+ while (i < dm_table_get_num_targets(t)) {
+ ti = dm_table_get_target(t, i++);
+
+ if (!ti->num_write_zeroes_bios)
+ return false;
+
+ if (!ti->type->iterate_devices ||
+ ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
+ return false;
+ }
+
+ return true;
+}
+
+
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -1592,9 +1604,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_write_cache(q, wc, fua);
- if (!dm_table_discard_zeroes_data(t))
- q->limits.discard_zeroes_data = 0;
-
/* Ensure that all underlying devices are non-rotational. */
if (dm_table_all_devices_attribute(t, device_is_nonrot))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1603,6 +1612,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
+ if (!dm_table_supports_write_zeroes(t))
+ q->limits.max_write_zeroes_sectors = 0;
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2b266a2..a5f1916 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3263,7 +3263,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* them down to the data device. The thin device's discard
* processing will cause mappings to be removed from the btree.
*/
- ti->discard_zeroes_data_unsupported = true;
if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_bios = 1;
@@ -4119,7 +4118,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */
- ti->discard_zeroes_data_unsupported = true;
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 0f0eb8a..78f3601 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -146,8 +146,6 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
block = fec_buffer_rs_block(v, fio, n, i);
res = fec_decode_rs8(v, fio, block, &par[offset], neras);
if (res < 0) {
- dm_bufio_release(buf);
-
r = res;
goto error;
}
@@ -172,6 +170,8 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
done:
r = corrected;
error:
+ dm_bufio_release(buf);
+
if (r < 0 && neras)
DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
v->data_dev->name, (unsigned long long)rsb, r);
@@ -269,7 +269,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
&is_zero) == 0) {
/* skip known zero blocks entirely */
if (is_zero)
- continue;
+ goto done;
/*
* skip if we have already found the theoretical
@@ -439,6 +439,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
if (!verity_fec_is_enabled(v))
return -EOPNOTSUPP;
+ if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
+ DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
+ return -EIO;
+ }
+
+ fio->level++;
+
if (type == DM_VERITY_BLOCK_TYPE_METADATA)
block += v->data_blocks;
@@ -470,7 +477,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
if (r < 0) {
r = fec_decode_rsb(v, io, fio, rsb, offset, true);
if (r < 0)
- return r;
+ goto done;
}
if (dest)
@@ -480,6 +487,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
r = verity_for_bv_block(v, io, iter, fec_bv_copy);
}
+done:
+ fio->level--;
return r;
}
@@ -520,6 +529,7 @@ void verity_fec_init_io(struct dm_verity_io *io)
memset(fio->bufs, 0, sizeof(fio->bufs));
fio->nbufs = 0;
fio->output = NULL;
+ fio->level = 0;
}
/*
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 7fa0298..bb31ce8 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -27,6 +27,9 @@
#define DM_VERITY_FEC_BUF_MAX \
(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+/* maximum recursion level for verity_fec_decode */
+#define DM_VERITY_FEC_MAX_RECURSION 4
+
#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
#define DM_VERITY_OPT_FEC_START "fec_start"
@@ -58,6 +61,7 @@ struct dm_verity_fec_io {
unsigned nbufs; /* number of buffers allocated */
u8 *output; /* buffer for corrected output */
size_t output_pos;
+ unsigned level; /* recursion level */
};
#ifdef CONFIG_DM_VERITY_FEC
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dfb7597..8bf3977 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -810,7 +810,6 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- trace_block_bio_complete(md->queue, bio, io_error);
bio->bi_error = io_error;
bio_endio(bio);
}
@@ -825,6 +824,14 @@ void disable_write_same(struct mapped_device *md)
limits->max_write_same_sectors = 0;
}
+void disable_write_zeroes(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support WRITE ZEROES, disable it */
+ limits->max_write_zeroes_sectors = 0;
+}
+
static void clone_endio(struct bio *bio)
{
int error = bio->bi_error;
@@ -851,9 +858,14 @@ static void clone_endio(struct bio *bio)
}
}
- if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
- disable_write_same(md);
+ if (unlikely(r == -EREMOTEIO)) {
+ if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+ disable_write_same(md);
+ if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+ disable_write_zeroes(md);
+ }
free_tio(tio);
dec_pending(io, error);
@@ -1202,6 +1214,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti)
return ti->num_write_same_bios;
}
+static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
+{
+ return ti->num_write_zeroes_bios;
+}
+
typedef bool (*is_split_required_fn)(struct dm_target *ti);
static bool is_split_required_for_discard(struct dm_target *ti)
@@ -1256,6 +1273,11 @@ static int __send_write_same(struct clone_info *ci)
return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
}
+static int __send_write_zeroes(struct clone_info *ci)
+{
+ return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL);
+}
+
/*
* Select the correct strategy for processing a non-flush bio.
*/
@@ -1270,6 +1292,8 @@ static int __split_and_process_non_flush(struct clone_info *ci)
return __send_discard(ci);
else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
return __send_write_same(ci);
+ else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
+ return __send_write_zeroes(ci);
ti = dm_table_find_target(ci->map, ci->sector);
if (!dm_target_is_valid(ti))
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 3e38e02..377a8a3 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -293,6 +293,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
split, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, split);
+ mddev_check_write_zeroes(mddev, split);
generic_make_request(split);
}
} while (split != bio);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index dde8ecb..1e76d64 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -709,4 +709,11 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
mddev->queue->limits.max_write_same_sectors = 0;
}
+
+static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
+{
+ if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+ mddev->queue->limits.max_write_zeroes_sectors = 0;
+}
#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 79a12b5..e95d521 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -139,6 +139,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
mp_bh->bio.bi_end_io = multipath_end_request;
mp_bh->bio.bi_private = mp_bh;
mddev_check_writesame(mddev, &mp_bh->bio);
+ mddev_check_write_zeroes(mddev, &mp_bh->bio);
generic_make_request(&mp_bh->bio);
return;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 93347ca..ce7a6a5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -383,6 +383,7 @@ static int raid0_run(struct mddev *mddev)
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+ blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
@@ -504,6 +505,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
split, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, split);
+ mddev_check_write_zeroes(mddev, split);
generic_make_request(split);
}
} while (split != bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a34f587..b59cc10 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3177,8 +3177,10 @@ static int raid1_run(struct mddev *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- if (mddev->queue)
+ if (mddev->queue) {
blk_queue_max_write_same_sectors(mddev->queue, 0);
+ blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
+ }
rdev_for_each(rdev, mddev) {
if (!mddev->gendisk)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e89a8d7..28ec3a9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3749,6 +3749,7 @@ static int raid10_run(struct mddev *mddev)
blk_queue_max_discard_sectors(mddev->queue,
mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, 0);
+ blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, chunk_size);
if (conf->geo.raid_disks % conf->geo.near_copies)
blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ed5cd70..2efdb0d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5031,8 +5031,6 @@ static void raid5_align_endio(struct bio *bi)
rdev_dec_pending(rdev, conf->mddev);
if (!error) {
- trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
- raid_bi, 0);
bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent);
@@ -7229,7 +7227,6 @@ static int raid5_run(struct mddev *mddev)
if (mddev->queue) {
int chunk_size;
- bool discard_supported = true;
/* read-ahead size must cover two whole stripes, which
* is 2 * (datadisks) * chunksize where 'n' is the
* number of raid devices
@@ -7265,48 +7262,32 @@ static int raid5_run(struct mddev *mddev)
blk_queue_max_discard_sectors(mddev->queue,
0xfffe * STRIPE_SECTORS);
- /*
- * unaligned part of discard request will be ignored, so can't
- * guarantee discard_zeroes_data
- */
- mddev->queue->limits.discard_zeroes_data = 0;
-
blk_queue_max_write_same_sectors(mddev->queue, 0);
+ blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->new_data_offset << 9);
- /*
- * discard_zeroes_data is required, otherwise data
- * could be lost. Consider a scenario: discard a stripe
- * (the stripe could be inconsistent if
- * discard_zeroes_data is 0); write one disk of the
- * stripe (the stripe could be inconsistent again
- * depending on which disks are used to calculate
- * parity); the disk is broken; The stripe data of this
- * disk is lost.
- */
- if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) ||
- !bdev_get_queue(rdev->bdev)->
- limits.discard_zeroes_data)
- discard_supported = false;
- /* Unfortunately, discard_zeroes_data is not currently
- * a guarantee - just a hint. So we only allow DISCARD
- * if the sysadmin has confirmed that only safe devices
- * are in use by setting a module parameter.
- */
- if (!devices_handle_discard_safely) {
- if (discard_supported) {
- pr_info("md/raid456: discard support disabled due to uncertainty.\n");
- pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n");
- }
- discard_supported = false;
- }
}
- if (discard_supported &&
+ /*
+ * zeroing is required, otherwise data
+ * could be lost. Consider a scenario: discard a stripe
+ * (the stripe could be inconsistent if
+ * discard_zeroes_data is 0); write one disk of the
+ * stripe (the stripe could be inconsistent again
+ * depending on which disks are used to calculate
+ * parity); the disk is broken; The stripe data of this
+ * disk is lost.
+ *
+ * We only allow DISCARD if the sysadmin has confirmed that
+ * only safe devices are in use by setting a module parameter.
+ * A better idea might be to turn DISCARD into WRITE_ZEROES
+ * requests, as that is required to be safe.
+ */
+ if (devices_handle_discard_safely &&
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
OpenPOWER on IntegriCloud