From 93e6442c76a0d26ad028c5df9b4a1e3096d9c36b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 16 Jan 2017 16:05:59 -0500 Subject: dm: add basic support for using the select or poll function Add the ability to poll on the /dev/mapper/control device. The select or poll function waits until any event happens on any dm device since opening the /dev/mapper/control device. When select or poll returns the device as readable, we must close and reopen the device to wait for new dm events. Usage: 1. open the /dev/mapper/control device 2. scan the event numbers of all devices we are interested in and process them 3. call select, poll or epoll on the handle (it waits until some new event happens since opening the device) 4. close the /dev/mapper/control handle 5. go to step 1 The next commit allows to re-arm the polling without closing and reopening the device. Signed-off-by: Mikulas Patocka Signed-off-by: Andy Grover Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fbd06b9..ff22aa2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -58,6 +58,9 @@ static DECLARE_WORK(deferred_remove_work, do_deferred_remove); static struct workqueue_struct *deferred_remove_workqueue; +atomic_t dm_global_event_nr = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); + /* * One of these is allocated per bio. */ @@ -1760,7 +1763,9 @@ static void event_callback(void *context) dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); + atomic_inc(&dm_global_event_nr); wake_up(&md->eventq); + wake_up(&dm_global_eventq); } /* -- cgit v1.1 From a4aa5e56e5189b88a2891cdcc350dac618810354 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:46 -0700 Subject: dm: fix REQ_OP_ZONE_RESET bio handling The REQ_OP_ZONE_RESET bio has no payload and zero sectors. Its position is the only information used to indicate the zone to reset on the device. Due to its zero length, this bio is not cloned and sent to the target through the non-flush case in __split_and_process_bio(). Add an additional case in that function to call __split_and_process_non_flush() without checking the clone info size. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ff22aa2..d85adec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1384,6 +1384,10 @@ static void __split_and_process_bio(struct mapped_device *md, ci.sector_count = 0; error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ + } else if (bio_op(bio) == REQ_OP_ZONE_RESET) { + ci.bio = bio; + ci.sector_count = 0; + error = __split_and_process_non_flush(&ci); } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); -- cgit v1.1 From 264c869d44dcff17e3b108dbb2fbea24bed08538 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:47 -0700 Subject: dm: fix REQ_OP_ZONE_REPORT bio handling A REQ_OP_ZONE_REPORT bio is not a medium access command. Its number of sectors indicates the maximum size allowed for the report reply size and not an amount of sectors accessed from the device. REQ_OP_ZONE_REPORT bios should thus not be split depending on the target device maximum I/O length but passed as-is. Note that it is the responsability of the target to remap and format the report reply. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d85adec..e38d1d7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1152,7 +1152,8 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return r; } - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); + if (bio_op(bio) != REQ_OP_ZONE_REPORT) + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); clone->bi_iter.bi_size = to_bytes(len); if (unlikely(bio_integrity(bio) != NULL)) @@ -1341,7 +1342,11 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); + if (bio_op(bio) == REQ_OP_ZONE_REPORT) + len = ci->sector_count; + else + len = min_t(sector_t, max_io_len(ci->sector, ti), + ci->sector_count); r = __clone_and_map_data_bio(ci, ti, ci->sector, &len); if (r < 0) -- cgit v1.1 From 10999307c14eac281fbec3ada73bee7a05bd41dc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:48 -0700 Subject: dm: introduce dm_remap_zone_report() A target driver support zoned block devices and exposing it as such may receive REQ_OP_ZONE_REPORT request for the user to determine the mapped device zone configuration. To process properly such request, the target driver may need to remap the zone descriptors provided in the report reply. The helper function dm_remap_zone_report() does this generically using only the target start offset and length and the start offset within the target device. dm_remap_zone_report() will remap the start sector of all zones reported. If the report includes sequential zones, the write pointer position of these zones will also be remapped. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e38d1d7..96bd13e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1013,6 +1013,85 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* + * The zone descriptors obtained with a zone report indicate + * zone positions within the target device. The zone descriptors + * must be remapped to match their position within the dm device. + * A target may call dm_remap_zone_report after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained + * from the target device mapping to the dm device. + */ +void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) +{ +#ifdef CONFIG_BLK_DEV_ZONED + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + struct bio *report_bio = tio->io->bio; + struct blk_zone_report_hdr *hdr = NULL; + struct blk_zone *zone; + unsigned int nr_rep = 0; + unsigned int ofst; + struct bio_vec bvec; + struct bvec_iter iter; + void *addr; + + if (bio->bi_status) + return; + + /* + * Remap the start sector of the reported zones. For sequential zones, + * also remap the write pointer position. + */ + bio_for_each_segment(bvec, report_bio, iter) { + addr = kmap_atomic(bvec.bv_page); + + /* Remember the report header in the first page */ + if (!hdr) { + hdr = addr; + ofst = sizeof(struct blk_zone_report_hdr); + } else + ofst = 0; + + /* Set zones start sector */ + while (hdr->nr_zones && ofst < bvec.bv_len) { + zone = addr + ofst; + if (zone->start >= start + ti->len) { + hdr->nr_zones = 0; + break; + } + zone->start = zone->start + ti->begin - start; + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { + if (zone->cond == BLK_ZONE_COND_FULL) + zone->wp = zone->start + zone->len; + else if (zone->cond == BLK_ZONE_COND_EMPTY) + zone->wp = zone->start; + else + zone->wp = zone->wp + ti->begin - start; + } + ofst += sizeof(struct blk_zone); + hdr->nr_zones--; + nr_rep++; + } + + if (addr != hdr) + kunmap_atomic(addr); + + if (!hdr->nr_zones) + break; + } + + if (hdr) { + hdr->nr_zones = nr_rep; + kunmap_atomic(hdr); + } + + bio_advance(report_bio, report_bio->bi_iter.bi_size); + +#else /* !CONFIG_BLK_DEV_ZONED */ + bio->bi_status = BLK_STS_NOTSUPP; +#endif +} +EXPORT_SYMBOL_GPL(dm_remap_zone_report); + +/* * Flush current->bio_list when the target map method blocks. * This fixes deadlocks in snapshot and possibly in other targets. */ -- cgit v1.1