From 4246a0b63bd8f56a1469b12eafeb875b1041a451 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Jul 2015 15:29:37 +0200
Subject: block: add a bi_error field to struct bio

Currently we have two different ways to signal an I/O error on a BIO:

 (1) by clearing the BIO_UPTODATE flag
 (2) by returning a Linux errno value to the bi_end_io callback

The first one has the drawback of only communicating a single possible
error (-EIO), and the second one has the drawback of not beeing persistent
when bios are queued up, and are not passed along from child to parent
bio in the ever more popular chaining scenario.  Having both mechanisms
available has the additional drawback of utterly confusing driver authors
and introducing bugs where various I/O submitters only deal with one of
them, and the others have to add boilerplate code to deal with both kinds
of error returns.

So add a new bi_error field to store an errno value directly in struct
bio and remove the existing mechanisms to clean all this up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 59e44e9..84d6eec 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -233,7 +233,7 @@ static void return_io(struct bio *return_bi)
 		bi->bi_iter.bi_size = 0;
 		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
 					 bi, 0);
-		bio_endio(bi, 0);
+		bio_endio(bi);
 		bi = return_bi;
 	}
 }
@@ -887,9 +887,9 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
 }
 
 static void
-raid5_end_read_request(struct bio *bi, int error);
+raid5_end_read_request(struct bio *bi);
 static void
-raid5_end_write_request(struct bio *bi, int error);
+raid5_end_write_request(struct bio *bi);
 
 static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 {
@@ -2277,12 +2277,11 @@ static void shrink_stripes(struct r5conf *conf)
 	conf->slab_cache = NULL;
 }
 
-static void raid5_end_read_request(struct bio * bi, int error)
+static void raid5_end_read_request(struct bio * bi)
 {
 	struct stripe_head *sh = bi->bi_private;
 	struct r5conf *conf = sh->raid_conf;
 	int disks = sh->disks, i;
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 	char b[BDEVNAME_SIZE];
 	struct md_rdev *rdev = NULL;
 	sector_t s;
@@ -2291,9 +2290,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
 		if (bi == &sh->dev[i].req)
 			break;
 
-	pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		uptodate);
+		bi->bi_error);
 	if (i == disks) {
 		BUG();
 		return;
@@ -2312,7 +2311,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
 		s = sh->sector + rdev->new_data_offset;
 	else
 		s = sh->sector + rdev->data_offset;
-	if (uptodate) {
+	if (!bi->bi_error) {
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			/* Note that this cannot happen on a
@@ -2400,13 +2399,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
 	release_stripe(sh);
 }
 
-static void raid5_end_write_request(struct bio *bi, int error)
+static void raid5_end_write_request(struct bio *bi)
 {
 	struct stripe_head *sh = bi->bi_private;
 	struct r5conf *conf = sh->raid_conf;
 	int disks = sh->disks, i;
 	struct md_rdev *uninitialized_var(rdev);
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 	sector_t first_bad;
 	int bad_sectors;
 	int replacement = 0;
@@ -2429,23 +2427,23 @@ static void raid5_end_write_request(struct bio *bi, int error)
 			break;
 		}
 	}
-	pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		uptodate);
+		bi->bi_error);
 	if (i == disks) {
 		BUG();
 		return;
 	}
 
 	if (replacement) {
-		if (!uptodate)
+		if (bi->bi_error)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
 				     STRIPE_SECTORS,
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
-		if (!uptodate) {
+		if (bi->bi_error) {
 			set_bit(STRIPE_DEGRADED, &sh->state);
 			set_bit(WriteErrorSeen, &rdev->flags);
 			set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2466,7 +2464,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
 	}
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (sh->batch_head && !uptodate && !replacement)
+	if (sh->batch_head && bi->bi_error && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
 	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@ -3107,7 +3105,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		while (bi && bi->bi_iter.bi_sector <
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
+
+			bi->bi_error = -EIO;
 			if (!raid5_dec_bi_active_stripes(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
@@ -3131,7 +3130,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		while (bi && bi->bi_iter.bi_sector <
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
+
+			bi->bi_error = -EIO;
 			if (!raid5_dec_bi_active_stripes(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
@@ -3156,7 +3156,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 			       sh->dev[i].sector + STRIPE_SECTORS) {
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
-				clear_bit(BIO_UPTODATE, &bi->bi_flags);
+
+				bi->bi_error = -EIO;
 				if (!raid5_dec_bi_active_stripes(bi)) {
 					bi->bi_next = *return_bi;
 					*return_bi = bi;
@@ -4749,12 +4750,11 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf)
  *  first).
  *  If the read failed..
  */
-static void raid5_align_endio(struct bio *bi, int error)
+static void raid5_align_endio(struct bio *bi)
 {
 	struct bio* raid_bi  = bi->bi_private;
 	struct mddev *mddev;
 	struct r5conf *conf;
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 	struct md_rdev *rdev;
 
 	bio_put(bi);
@@ -4766,10 +4766,10 @@ static void raid5_align_endio(struct bio *bi, int error)
 
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (!error && uptodate) {
+	if (!bi->bi_error) {
 		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
 					 raid_bi, 0);
-		bio_endio(raid_bi, 0);
+		bio_endio(raid_bi);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_quiescent);
 		return;
@@ -5133,7 +5133,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 	remaining = raid5_dec_bi_active_stripes(bi);
 	if (remaining == 0) {
 		md_write_end(mddev);
-		bio_endio(bi, 0);
+		bio_endio(bi);
 	}
 }
 
@@ -5297,7 +5297,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 			release_stripe_plug(mddev, sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
+			bi->bi_error = -EIO;
 			break;
 		}
 	}
@@ -5311,7 +5311,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 
 		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
 					 bi, 0);
-		bio_endio(bi, 0);
+		bio_endio(bi);
 	}
 }
 
@@ -5707,7 +5707,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 	if (remaining == 0) {
 		trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
 					 raid_bio, 0);
-		bio_endio(raid_bio, 0);
+		bio_endio(raid_bio);
 	}
 	if (atomic_dec_and_test(&conf->active_aligned_reads))
 		wake_up(&conf->wait_for_quiescent);
-- 
cgit v1.1


From b7c44ed9d2fc6b461378c65eaf144ccc80a47772 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 24 Jul 2015 12:37:59 -0600
Subject: block: manipulate bio->bi_flags through helpers

Some places use helpers now, others don't. We only have the 'is set'
helper, add helpers for setting and clearing flags too.

It was a bit of a mess of atomic vs non-atomic access. With
BIO_UPTODATE gone, we don't have any risk of concurrent access to the
flags. So relax the restriction and don't make any of them atomic. The
flags that do have serialization issues (reffed and chained), we
already handle those separately.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 84d6eec..e3d4877 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4850,7 +4850,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		rcu_read_unlock();
 		raid_bio->bi_next = (void*)rdev;
 		align_bi->bi_bdev =  rdev->bdev;
-		__clear_bit(BIO_SEG_VALID, &align_bi->bi_flags);
+		bio_clear_flag(align_bi, BIO_SEG_VALID);
 
 		if (!bio_fits_rdev(align_bi) ||
 		    is_badblock(rdev, align_bi->bi_iter.bi_sector,
-- 
cgit v1.1


From 9b81c842355ac96097ba32ad5632e9ef0ff59f92 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 10 Aug 2015 19:05:18 -0400
Subject: block: don't access bio->bi_error after bio_put()

Commit 4246a0b6 ("block: add a bi_error field to struct bio") has added a few
dereferences of 'bio' after a call to bio_put(). This causes use-after-frees
such as:

[521120.719695] BUG: KASan: use after free in dio_bio_complete+0x2b3/0x320 at addr ffff880f36b38714
[521120.720638] Read of size 4 by task mount.ocfs2/9644
[521120.721212] =============================================================================
[521120.722056] BUG kmalloc-256 (Not tainted): kasan: bad access detected
[521120.722968] -----------------------------------------------------------------------------
[521120.722968]
[521120.723915] Disabling lock debugging due to kernel taint
[521120.724539] INFO: Slab 0xffffea003cdace00 objects=32 used=25 fp=0xffff880f36b38600 flags=0x46fffff80004080
[521120.726037] INFO: Object 0xffff880f36b38700 @offset=1792 fp=0xffff880f36b38800
[521120.726037]
[521120.726974] Bytes b4 ffff880f36b386f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.727898] Object ffff880f36b38700: 00 88 b3 36 0f 88 ff ff 00 00 d8 de 0b 88 ff ff  ...6............
[521120.728822] Object ffff880f36b38710: 02 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.729705] Object ffff880f36b38720: 01 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00  ................
[521120.730623] Object ffff880f36b38730: 00 00 00 00 00 00 00 00 01 00 00 00 00 02 00 00  ................
[521120.731621] Object ffff880f36b38740: 00 02 00 00 01 00 00 00 d0 f7 87 ad ff ff ff ff  ................
[521120.732776] Object ffff880f36b38750: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.733640] Object ffff880f36b38760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.734508] Object ffff880f36b38770: 01 00 03 00 01 00 00 00 88 87 b3 36 0f 88 ff ff  ...........6....
[521120.735385] Object ffff880f36b38780: 00 73 22 ad 02 88 ff ff 40 13 e0 3c 00 ea ff ff  .s".....@..<....
[521120.736667] Object ffff880f36b38790: 00 02 00 00 00 04 00 00 00 00 00 00 00 00 00 00  ................
[521120.737596] Object ffff880f36b387a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.738524] Object ffff880f36b387b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.739388] Object ffff880f36b387c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.740277] Object ffff880f36b387d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.741187] Object ffff880f36b387e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.742233] Object ffff880f36b387f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[521120.743229] CPU: 41 PID: 9644 Comm: mount.ocfs2 Tainted: G    B           4.2.0-rc6-next-20150810-sasha-00039-gf909086 #2420
[521120.744274]  ffff880f36b38000 ffff880d89c8f638 ffffffffb6e9ba8a ffff880101c0e5c0
[521120.745025]  ffff880d89c8f668 ffffffffad76a313 ffff880101c0e5c0 ffffea003cdace00
[521120.745908]  ffff880f36b38700 ffff880f36b38798 ffff880d89c8f690 ffffffffad772854
[521120.747063] Call Trace:
[521120.747520] dump_stack (lib/dump_stack.c:52)
[521120.748053] print_trailer (mm/slub.c:653)
[521120.748582] object_err (mm/slub.c:660)
[521120.749079] kasan_report_error (include/linux/kasan.h:20 mm/kasan/report.c:152 mm/kasan/report.c:194)
[521120.750834] __asan_report_load4_noabort (mm/kasan/report.c:250)
[521120.753580] dio_bio_complete (fs/direct-io.c:478)
[521120.755752] do_blockdev_direct_IO (fs/direct-io.c:494 fs/direct-io.c:1291)
[521120.759765] __blockdev_direct_IO (fs/direct-io.c:1322)
[521120.761658] blkdev_direct_IO (fs/block_dev.c:162)
[521120.762993] generic_file_read_iter (mm/filemap.c:1738)
[521120.767405] blkdev_read_iter (fs/block_dev.c:1649)
[521120.768556] __vfs_read (fs/read_write.c:423 fs/read_write.c:434)
[521120.772126] vfs_read (fs/read_write.c:454)
[521120.773118] SyS_pread64 (fs/read_write.c:607 fs/read_write.c:594)
[521120.776062] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:186)
[521120.777375] Memory state around the buggy address:
[521120.778118]  ffff880f36b38600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[521120.779211]  ffff880f36b38680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[521120.780315] >ffff880f36b38700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[521120.781465]                          ^
[521120.782083]  ffff880f36b38780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[521120.783717]  ffff880f36b38800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[521120.784818] ==================================================================

This patch fixes a few of those places that I caught while auditing the patch, but the
original patch should be audited further for more occurences of this issue since I'm
not too familiar with the code.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e3d4877..085db93 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4756,6 +4756,7 @@ static void raid5_align_endio(struct bio *bi)
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
+	int error = bi->bi_error;
 
 	bio_put(bi);
 
@@ -4766,7 +4767,7 @@ static void raid5_align_endio(struct bio *bi)
 
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (!bi->bi_error) {
+	if (!error) {
 		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
 					 raid_bi, 0);
 		bio_endio(raid_bi);
-- 
cgit v1.1


From 7ef6b12a1966f273afb750e19e1e8129bea48fec Mon Sep 17 00:00:00 2001
From: Ming Lin <ming.l@ssi.samsung.com>
Date: Wed, 6 May 2015 22:51:24 -0700
Subject: md/raid5: split bio for chunk_aligned_read

If a read request fits entirely in a chunk, it will be passed directly to the
underlying device (providing it hasn't failed of course).  If it doesn't fit,
the slightly less efficient path that uses the stripe_cache is used.
Requests that get to the stripe cache are always completely split up as
necessary.

So with RAID5, ripping out the merge_bvec_fn doesn't cause it to stop work,
but could cause it to take the less efficient path more often.

All that is needed to manage this is for 'chunk_aligned_read' do some bio
splitting, much like the RAID0 code does.

Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 085db93..256cc07 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4800,7 +4800,7 @@ static int bio_fits_rdev(struct bio *bi)
 	return 1;
 }
 
-static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 {
 	struct r5conf *conf = mddev->private;
 	int dd_idx;
@@ -4809,7 +4809,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	sector_t end_sector;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
-		pr_debug("chunk_aligned_read : non aligned\n");
+		pr_debug("%s: non aligned\n", __func__);
 		return 0;
 	}
 	/*
@@ -4886,6 +4886,31 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	}
 }
 
+static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+{
+	struct bio *split;
+
+	do {
+		sector_t sector = raid_bio->bi_iter.bi_sector;
+		unsigned chunk_sects = mddev->chunk_sectors;
+		unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+
+		if (sectors < bio_sectors(raid_bio)) {
+			split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+			bio_chain(split, raid_bio);
+		} else
+			split = raid_bio;
+
+		if (!raid5_read_one_chunk(mddev, split)) {
+			if (split != raid_bio)
+				generic_make_request(raid_bio);
+			return split;
+		}
+	} while (split != raid_bio);
+
+	return NULL;
+}
+
 /* __get_priority_stripe - get the next stripe to process
  *
  * Full stripe writes are allowed to pass preread active stripes up until
@@ -5163,9 +5188,11 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 	 * data on failed drives.
 	 */
 	if (rw == READ && mddev->degraded == 0 &&
-	     mddev->reshape_position == MaxSector &&
-	     chunk_aligned_read(mddev,bi))
-		return;
+	    mddev->reshape_position == MaxSector) {
+		bi = chunk_aligned_read(mddev, bi);
+		if (!bi)
+			return;
+	}
 
 	if (unlikely(bi->bi_rw & REQ_DISCARD)) {
 		make_discard_request(mddev, bi);
-- 
cgit v1.1


From 7140aafce2fc14c5af02fdb7859b6bea0108be3d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Wed, 25 Sep 2013 13:37:01 -0700
Subject: md/raid5: get rid of bio_fits_rdev()

Remove bio_fits_rdev() as sufficient merge_bvec_fn() handling is now
performed by blk_queue_split() in md_make_request().

Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[dpark: add more description in commit message]
Signed-off-by: Dongsu Park <dpark@posteo.net>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 256cc07..7823295 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4781,25 +4781,6 @@ static void raid5_align_endio(struct bio *bi)
 	add_bio_to_retry(raid_bi, conf);
 }
 
-static int bio_fits_rdev(struct bio *bi)
-{
-	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
-
-	if (bio_sectors(bi) > queue_max_sectors(q))
-		return 0;
-	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > queue_max_segments(q))
-		return 0;
-
-	if (q->merge_bvec_fn)
-		/* it's too hard to apply the merge_bvec_fn at this stage,
-		 * just just give up
-		 */
-		return 0;
-
-	return 1;
-}
-
 static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 {
 	struct r5conf *conf = mddev->private;
@@ -4853,11 +4834,9 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 		align_bi->bi_bdev =  rdev->bdev;
 		bio_clear_flag(align_bi, BIO_SEG_VALID);
 
-		if (!bio_fits_rdev(align_bi) ||
-		    is_badblock(rdev, align_bi->bi_iter.bi_sector,
+		if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
 				bio_sectors(align_bi),
 				&first_bad, &bad_sectors)) {
-			/* too big in some way, or has a known bad block */
 			bio_put(align_bi);
 			rdev_dec_pending(rdev, mddev);
 			return 0;
-- 
cgit v1.1


From 8ae126660fddbeebb9251a174e6fa45b6ad8f932 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 27 Apr 2015 23:48:34 -0700
Subject: block: kill merge_bvec_fn() completely

As generic_make_request() is now able to handle arbitrarily sized bios,
it's no longer necessary for each individual block driver to define its
own ->merge_bvec_fn() callback. Remove every invocation completely.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Lars Ellenberg <drbd-dev@lists.linbit.com>
Cc: drbd-user@lists.linbit.com
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Yehuda Sadeh <yehuda@inktank.com>
Cc: Sage Weil <sage@inktank.com>
Cc: Alex Elder <elder@kernel.org>
Cc: ceph-devel@vger.kernel.org
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: dm-devel@redhat.com
Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: NeilBrown <neilb@suse.de> (for the 'md' bits)
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[dpark: also remove ->merge_bvec_fn() in dm-thin as well as
 dm-era-target, and resolve merge conflicts]
Signed-off-by: Dongsu Park <dpark@posteo.net>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/raid5.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

(limited to 'drivers/md/raid5.c')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7823295..6d20692 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4663,35 +4663,6 @@ static int raid5_congested(struct mddev *mddev, int bits)
 	return 0;
 }
 
-/* We want read requests to align with chunks where possible,
- * but write requests don't need to.
- */
-static int raid5_mergeable_bvec(struct mddev *mddev,
-				struct bvec_merge_data *bvm,
-				struct bio_vec *biovec)
-{
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	int max;
-	unsigned int chunk_sectors = mddev->chunk_sectors;
-	unsigned int bio_sectors = bvm->bi_size >> 9;
-
-	/*
-	 * always allow writes to be mergeable, read as well if array
-	 * is degraded as we'll go through stripe cache anyway.
-	 */
-	if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
-		return biovec->bv_len;
-
-	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
-		chunk_sectors = mddev->new_chunk_sectors;
-	max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
-	if (max < 0) max = 0;
-	if (max <= biovec->bv_len && bio_sectors == 0)
-		return biovec->bv_len;
-	else
-		return max;
-}
-
 static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 {
 	sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
@@ -7764,7 +7735,6 @@ static struct md_personality raid6_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid6_takeover,
 	.congested	= raid5_congested,
-	.mergeable_bvec	= raid5_mergeable_bvec,
 };
 static struct md_personality raid5_personality =
 {
@@ -7788,7 +7758,6 @@ static struct md_personality raid5_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid5_takeover,
 	.congested	= raid5_congested,
-	.mergeable_bvec	= raid5_mergeable_bvec,
 };
 
 static struct md_personality raid4_personality =
@@ -7813,7 +7782,6 @@ static struct md_personality raid4_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid4_takeover,
 	.congested	= raid5_congested,
-	.mergeable_bvec	= raid5_mergeable_bvec,
 };
 
 static int __init raid5_init(void)
-- 
cgit v1.1