From 0a927c2f02a2437a57679527e42ab7cbfa14efb1 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 21 Jul 2015 13:20:46 -0400
Subject: dm thin: return -ENOSPC when erroring retry list due to out of data
 space

Otherwise -EIO would be returned when -ENOSPC should be used
consistently.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-thin.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 1c50c58..d2bbe8c 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -666,16 +666,21 @@ static void requeue_io(struct thin_c *tc)
 	requeue_deferred_cells(tc);
 }
 
-static void error_retry_list(struct pool *pool)
+static void error_retry_list_with_code(struct pool *pool, int error)
 {
 	struct thin_c *tc;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(tc, &pool->active_thins, list)
-		error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO);
+		error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
 	rcu_read_unlock();
 }
 
+static void error_retry_list(struct pool *pool)
+{
+	return error_retry_list_with_code(pool, -EIO);
+}
+
 /*
  * This section of code contains the logic for processing a thin device's IO.
  * Much of the code depends on pool object resources (lists, workqueues, etc)
@@ -2297,7 +2302,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
 		notify_of_pool_mode_change_to_oods(pool);
-		error_retry_list(pool);
+		error_retry_list_with_code(pool, -ENOSPC);
 	}
 }
 
-- 
cgit v1.1


From 134bf30c06f057d6b8d90132e8f8b3cd2be79572 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Jul 2015 16:47:59 +0100
Subject: dm cache policy smq: fix alloc_bitset check that always evaluates as
 false

static analysis by cppcheck has found a check on alloc_bitset that
always evaluates as false and hence never finds an allocation failure:

[drivers/md/dm-cache-policy-smq.c:1689]: (warning) Logical conjunction
  always evaluates to false: !EXPR && EXPR.

Fix this by removing the incorrect mq->cache_hit_bits check

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-smq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index b6f2265..48a4a82 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1686,7 +1686,7 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
 
 	if (from_cblock(cache_size)) {
 		mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
-		if (!mq->cache_hit_bits && mq->cache_hit_bits) {
+		if (!mq->cache_hit_bits) {
 			DMERR("couldn't allocate cache hit bitset");
 			goto bad_cache_hit_bits;
 		}
-- 
cgit v1.1


From 6ed443c07f1e7f819983422b16598facb152250d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 5 Jul 2015 09:55:44 +0300
Subject: dm crypt: update wiki page URL

Cryptsetup moved to gitlab.  This is a leftover from commit e44f23b32dc7
(dm crypt: update URLs to new cryptsetup project page, 2015-04-05).

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b597273..bfec3bd 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -259,7 +259,7 @@ config DM_CRYPT
 	  the ciphers you're going to use in the cryptoapi configuration.
 
 	  For further information on dm-crypt and userspace tools see:
-	  <http://code.google.com/p/cryptsetup/wiki/DMCrypt>
+	  <https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt>
 
 	  To compile this code as a module, choose M here: the module will
 	  be called dm-crypt.
-- 
cgit v1.1


From 3508e6590d4729ac07f01f7ae2256c2f9dc738b8 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 29 Jul 2015 14:11:28 -0400
Subject: Revert "dm cache: do not wake_worker() in free_migration()"

This reverts commit 386cb7cdeeef97e0bf082a8d6bbfc07a2ccce07b.

Taking the wake_worker() out of free_migration() will slow writeback
dramatically, and hence adaptability.

Say we have 10k blocks that need writing back, but are only able to
issue 5 concurrently due to the migration bandwidth: it's imperative
that we wake_worker() immediately after migration completion; waiting
for the next 1 second wake up (via do_waker) means it'll take a long
time to write that all back.

Reported-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/md')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b680da5..64e96a2 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -424,6 +424,7 @@ static void free_migration(struct dm_cache_migration *mg)
 		wake_up(&cache->migration_wait);
 
 	mempool_free(mg, cache->migration_pool);
+	wake_worker(cache);
 }
 
 static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
-- 
cgit v1.1


From 795e633a2dc6cbbeac68bc7f6006082150d38bb7 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 29 Jul 2015 13:48:23 -0400
Subject: dm cache: fix device destroy hang due to improper prealloc_used
 accounting

Commit 665022d72f9 ("dm cache: avoid calls to prealloc_free_structs() if
possible") introduced a regression that caused the removal of a DM cache
device to hang in cache_postsuspend()'s call to wait_for_migrations()
with the following stack trace:

  [<ffffffff81651457>] schedule+0x37/0x80
  [<ffffffffa041e21b>] cache_postsuspend+0xbb/0x470 [dm_cache]
  [<ffffffff810ba970>] ? prepare_to_wait_event+0xf0/0xf0
  [<ffffffffa0006f77>] dm_table_postsuspend_targets+0x47/0x60 [dm_mod]
  [<ffffffffa0001eb5>] __dm_destroy+0x215/0x250 [dm_mod]
  [<ffffffffa0004113>] dm_destroy+0x13/0x20 [dm_mod]
  [<ffffffffa00098cd>] dev_remove+0x10d/0x170 [dm_mod]
  [<ffffffffa00097c0>] ? dev_suspend+0x240/0x240 [dm_mod]
  [<ffffffffa0009f85>] ctl_ioctl+0x255/0x4d0 [dm_mod]
  [<ffffffff8127ac00>] ? SYSC_semtimedop+0x280/0xe10
  [<ffffffffa000a213>] dm_ctl_ioctl+0x13/0x20 [dm_mod]
  [<ffffffff811fd432>] do_vfs_ioctl+0x2d2/0x4b0
  [<ffffffff81117d5f>] ? __audit_syscall_entry+0xaf/0x100
  [<ffffffff81022636>] ? do_audit_syscall_entry+0x66/0x70
  [<ffffffff811fd689>] SyS_ioctl+0x79/0x90
  [<ffffffff81023e58>] ? syscall_trace_leave+0xb8/0x110
  [<ffffffff81654f6e>] entry_SYSCALL_64_fastpath+0x12/0x71

Fix this by accounting for the call to prealloc_data_structs()
immediately _before_ the call as opposed to after.  This is needed
because it is possible to break out of the control loop after the call
to prealloc_data_structs() but before prealloc_used was set to true.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 64e96a2..1fe93cf 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1967,6 +1967,7 @@ static void process_deferred_bios(struct cache *cache)
 		 * this bio might require one, we pause until there are some
 		 * prepared mappings to process.
 		 */
+		prealloc_used = true;
 		if (prealloc_data_structs(cache, &structs)) {
 			spin_lock_irqsave(&cache->lock, flags);
 			bio_list_merge(&cache->deferred_bios, &bios);
@@ -1982,7 +1983,6 @@ static void process_deferred_bios(struct cache *cache)
 			process_discard_bio(cache, &structs, bio);
 		else
 			process_bio(cache, &structs, bio);
-		prealloc_used = true;
 	}
 
 	if (prealloc_used)
@@ -2011,6 +2011,7 @@ static void process_deferred_cells(struct cache *cache)
 		 * this bio might require one, we pause until there are some
 		 * prepared mappings to process.
 		 */
+		prealloc_used = true;
 		if (prealloc_data_structs(cache, &structs)) {
 			spin_lock_irqsave(&cache->lock, flags);
 			list_splice(&cells, &cache->deferred_cells);
@@ -2019,7 +2020,6 @@ static void process_deferred_cells(struct cache *cache)
 		}
 
 		process_cell(cache, &structs, cell);
-		prealloc_used = true;
 	}
 
 	if (prealloc_used)
@@ -2081,6 +2081,7 @@ static void writeback_some_dirty_blocks(struct cache *cache)
 		if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
 			break; /* no work to do */
 
+		prealloc_used = true;
 		if (prealloc_data_structs(cache, &structs) ||
 		    get_cell(cache, oblock, &structs, &old_ocell)) {
 			policy_set_dirty(cache->policy, oblock);
@@ -2088,7 +2089,6 @@ static void writeback_some_dirty_blocks(struct cache *cache)
 		}
 
 		writeback(cache, &structs, oblock, cblock, old_ocell);
-		prealloc_used = true;
 	}
 
 	if (prealloc_used)
-- 
cgit v1.1


From 423f04d63cf421ea436bcc5be02543d549ce4b28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Jul 2015 11:48:52 +1000
Subject: md/raid1: extend spinlock to protect raid1_end_read_request against
 inconsistencies

raid1_end_read_request() assumes that the In_sync bits are consistent
with the ->degaded count.
raid1_spare_active updates the In_sync bit before the ->degraded count
and so exposes an inconsistency, as does error()
So extend the spinlock in raid1_spare_active() and error() to hide those
inconsistencies.

This should probably be part of
  Commit: 34cab6f42003 ("md/raid1: fix test for 'was read error from
  last working device'.")
as it addresses the same issue.  It fixes the same bug and should go
to -stable for same reasons.

Fixes: 76073054c95b ("md/raid1: clean up read_balance.")
Cc: stable@vger.kernel.org (v3.0+)
Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/raid1.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 94f5b55..967a4ed 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1476,6 +1476,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char b[BDEVNAME_SIZE];
 	struct r1conf *conf = mddev->private;
+	unsigned long flags;
 
 	/*
 	 * If it is not operational, then we have already marked it as dead
@@ -1495,14 +1496,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 		return;
 	}
 	set_bit(Blocked, &rdev->flags);
+	spin_lock_irqsave(&conf->device_lock, flags);
 	if (test_and_clear_bit(In_sync, &rdev->flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
 		mddev->degraded++;
 		set_bit(Faulty, &rdev->flags);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
 	} else
 		set_bit(Faulty, &rdev->flags);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 	/*
 	 * if recovery is running, make sure it aborts.
 	 */
@@ -1568,7 +1568,10 @@ static int raid1_spare_active(struct mddev *mddev)
 	 * Find all failed disks within the RAID1 configuration
 	 * and mark them readable.
 	 * Called under mddev lock, so rcu protection not needed.
+	 * device_lock used to avoid races with raid1_end_read_request
+	 * which expects 'In_sync' flags and ->degraded to be consistent.
 	 */
+	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i = 0; i < conf->raid_disks; i++) {
 		struct md_rdev *rdev = conf->mirrors[i].rdev;
 		struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
@@ -1599,7 +1602,6 @@ static int raid1_spare_active(struct mddev *mddev)
 			sysfs_notify_dirent_safe(rdev->sysfs_state);
 		}
 	}
-	spin_lock_irqsave(&conf->device_lock, flags);
 	mddev->degraded -= count;
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
-- 
cgit v1.1


From b6878d9e03043695dbf3fa1caa6dfc09db225b16 Mon Sep 17 00:00:00 2001
From: Benjamin Randazzo <benjamin@randazzo.fr>
Date: Sat, 25 Jul 2015 16:36:50 +0200
Subject: md: use kzalloc() when bitmap is disabled

In drivers/md/md.c get_bitmap_file() uses kmalloc() for creating a
mdu_bitmap_file_t called "file".

5769         file = kmalloc(sizeof(*file), GFP_NOIO);
5770         if (!file)
5771                 return -ENOMEM;

This structure is copied to user space at the end of the function.

5786         if (err == 0 &&
5787             copy_to_user(arg, file, sizeof(*file)))
5788                 err = -EFAULT

But if bitmap is disabled only the first byte of "file" is initialized
with zero, so it's possible to read some bytes (up to 4095) of kernel
space memory from user space. This is an information leak.

5775         /* bitmap disabled, zero the first byte and copy out */
5776         if (!mddev->bitmap_info.file)
5777                 file->pathname[0] = '\0';

Signed-off-by: Benjamin Randazzo <benjamin@randazzo.fr>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0c2a4e8..e25f00f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5759,7 +5759,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
 	char *ptr;
 	int err;
 
-	file = kmalloc(sizeof(*file), GFP_NOIO);
+	file = kzalloc(sizeof(*file), GFP_NOIO);
 	if (!file)
 		return -ENOMEM;
 
-- 
cgit v1.1


From 49895bcc7e566ba455eb2996607d6fbd3447ce16 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 3 Aug 2015 17:09:57 +1000
Subject: md/raid5: don't let shrink_slab shrink too far.

I have a report of drop_one_stripe() called from
raid5_cache_scan() apparently finding ->max_nr_stripes == 0.

This should not be allowed.

So add a test to keep max_nr_stripes above min_nr_stripes.

Also use a 'mask' rather than a 'mod' in drop_one_stripe
to ensure 'hash' is valid even if max_nr_stripes does reach zero.


Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.")
Cc: stable@vger.kernel.org (4.1 - please release with 2d5b569b665)
Reported-by: Tomas Papan <tomas.papan@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/raid5.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 643d217..f757023 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2256,7 +2256,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 static int drop_one_stripe(struct r5conf *conf)
 {
 	struct stripe_head *sh;
-	int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
+	int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
 
 	spin_lock_irq(conf->hash_locks + hash);
 	sh = get_free_stripe(conf, hash);
@@ -6388,7 +6388,8 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
 
 	if (mutex_trylock(&conf->cache_size_mutex)) {
 		ret= 0;
-		while (ret < sc->nr_to_scan) {
+		while (ret < sc->nr_to_scan &&
+		       conf->max_nr_stripes > conf->min_nr_stripes) {
 			if (drop_one_stripe(conf) == 0) {
 				ret = SHRINK_STOP;
 				break;
-- 
cgit v1.1


From bd4aaf8f9b85d6b2df3231fd62b219ebb75d3568 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 3 Aug 2015 09:54:58 -0400
Subject: dm: fix dm_merge_bvec regression on 32 bit systems

A DM regression on 32 bit systems was reported against v4.2-rc3 here:
https://lkml.org/lkml/2015/7/29/401

Fix this by reverting both commit 1c220c69 ("dm: fix casting bug in
dm_merge_bvec()") and 148e51ba ("dm: improve documentation and code
clarity in dm_merge_bvec").  This combined revert is done to eliminate
the possibility of a partial revert in stable@ kernels.

In hindsight the correct fix, at the time 1c220c69 was applied to fix
the regression that 148e51ba introduced, should've been to simply revert
148e51ba.

Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Tested-by: Adam Williamson <awilliam@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 3.19+
---
 drivers/md/dm.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ab37ae1..0d7ab20 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1729,7 +1729,8 @@ static int dm_merge_bvec(struct request_queue *q,
 	struct mapped_device *md = q->queuedata;
 	struct dm_table *map = dm_get_live_table_fast(md);
 	struct dm_target *ti;
-	sector_t max_sectors, max_size = 0;
+	sector_t max_sectors;
+	int max_size = 0;
 
 	if (unlikely(!map))
 		goto out;
@@ -1742,18 +1743,10 @@ static int dm_merge_bvec(struct request_queue *q,
 	 * Find maximum amount of I/O that won't need splitting
 	 */
 	max_sectors = min(max_io_len(bvm->bi_sector, ti),
-			  (sector_t) queue_max_sectors(q));
+			  (sector_t) BIO_MAX_SECTORS);
 	max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-
-	/*
-	 * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
-	 * to the targets' merge function since it holds sectors not bytes).
-	 * Just doing this as an interim fix for stable@ because the more
-	 * comprehensive cleanup of switching to sector_t will impact every
-	 * DM target that implements a ->merge hook.
-	 */
-	if (max_size > INT_MAX)
-		max_size = INT_MAX;
+	if (max_size < 0)
+		max_size = 0;
 
 	/*
 	 * merge_bvec_fn() returns number of bytes
@@ -1761,13 +1754,13 @@ static int dm_merge_bvec(struct request_queue *q,
 	 * max is precomputed maximal io size
 	 */
 	if (max_size && ti->type->merge)
-		max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
+		max_size = ti->type->merge(ti, bvm, biovec, max_size);
 	/*
 	 * If the target doesn't support merge method and some of the devices
-	 * provided their merge_bvec method (we know this by looking for the
-	 * max_hw_sectors that dm_set_device_limits may set), then we can't
-	 * allow bios with multiple vector entries.  So always set max_size
-	 * to 0, and the code below allows just one page.
+	 * provided their merge_bvec method (we know this by looking at
+	 * queue_max_hw_sectors), then we can't allow bios with multiple vector
+	 * entries.  So always set max_size to 0, and the code below allows
+	 * just one page.
 	 */
 	else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
 		max_size = 0;
-- 
cgit v1.1


From aa0cd28d057fd4cb686fbdd2475a6a3f609dd581 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Fri, 7 Aug 2015 16:33:01 +0100
Subject: dm btree remove: fix bug in remove_one()

remove_one() was not incrementing the key for the beginning of the
range, so not all entries were being removed.  This resulted in
discards that were not unmapping all blocks.

Fixes: 4ec331c3ea ("dm btree: add dm_btree_remove_leaves()")
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/persistent-data/dm-btree-remove.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/md')

diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index 9836c0a..9ca9ecc 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -689,6 +689,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
 					     value_ptr(n, index));
 
 		delete_at(n, index);
+		keys[last_level] = k + 1ull;
 
 	} else
 		r = -ENODATA;
-- 
cgit v1.1