From 0a927c2f02a2437a57679527e42ab7cbfa14efb1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 21 Jul 2015 13:20:46 -0400 Subject: dm thin: return -ENOSPC when erroring retry list due to out of data space Otherwise -EIO would be returned when -ENOSPC should be used consistently. Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1c50c58..d2bbe8c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -666,16 +666,21 @@ static void requeue_io(struct thin_c *tc) requeue_deferred_cells(tc); } -static void error_retry_list(struct pool *pool) +static void error_retry_list_with_code(struct pool *pool, int error) { struct thin_c *tc; rcu_read_lock(); list_for_each_entry_rcu(tc, &pool->active_thins, list) - error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO); + error_thin_bio_list(tc, &tc->retry_on_resume_list, error); rcu_read_unlock(); } +static void error_retry_list(struct pool *pool) +{ + return error_retry_list_with_code(pool, -EIO); +} + /* * This section of code contains the logic for processing a thin device's IO. * Much of the code depends on pool object resources (lists, workqueues, etc) @@ -2297,7 +2302,7 @@ static void do_no_space_timeout(struct work_struct *ws) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { pool->pf.error_if_no_space = true; notify_of_pool_mode_change_to_oods(pool); - error_retry_list(pool); + error_retry_list_with_code(pool, -ENOSPC); } } -- cgit v1.1 From 134bf30c06f057d6b8d90132e8f8b3cd2be79572 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jul 2015 16:47:59 +0100 Subject: dm cache policy smq: fix alloc_bitset check that always evaluates as false static analysis by cppcheck has found a check on alloc_bitset that always evaluates as false and hence never finds an allocation failure: [drivers/md/dm-cache-policy-smq.c:1689]: (warning) Logical conjunction always evaluates to false: !EXPR && EXPR. Fix this by removing the incorrect mq->cache_hit_bits check Signed-off-by: Colin Ian King Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index b6f2265..48a4a82 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1686,7 +1686,7 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, if (from_cblock(cache_size)) { mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size)); - if (!mq->cache_hit_bits && mq->cache_hit_bits) { + if (!mq->cache_hit_bits) { DMERR("couldn't allocate cache hit bitset"); goto bad_cache_hit_bits; } -- cgit v1.1 From 6ed443c07f1e7f819983422b16598facb152250d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 5 Jul 2015 09:55:44 +0300 Subject: dm crypt: update wiki page URL Cryptsetup moved to gitlab. This is a leftover from commit e44f23b32dc7 (dm crypt: update URLs to new cryptsetup project page, 2015-04-05). Signed-off-by: Baruch Siach Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b597273..bfec3bd 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -259,7 +259,7 @@ config DM_CRYPT the ciphers you're going to use in the cryptoapi configuration. For further information on dm-crypt and userspace tools see: - + To compile this code as a module, choose M here: the module will be called dm-crypt. -- cgit v1.1 From 3508e6590d4729ac07f01f7ae2256c2f9dc738b8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 29 Jul 2015 14:11:28 -0400 Subject: Revert "dm cache: do not wake_worker() in free_migration()" This reverts commit 386cb7cdeeef97e0bf082a8d6bbfc07a2ccce07b. Taking the wake_worker() out of free_migration() will slow writeback dramatically, and hence adaptability. Say we have 10k blocks that need writing back, but are only able to issue 5 concurrently due to the migration bandwidth: it's imperative that we wake_worker() immediately after migration completion; waiting for the next 1 second wake up (via do_waker) means it'll take a long time to write that all back. Reported-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b680da5..64e96a2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -424,6 +424,7 @@ static void free_migration(struct dm_cache_migration *mg) wake_up(&cache->migration_wait); mempool_free(mg, cache->migration_pool); + wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) -- cgit v1.1 From 795e633a2dc6cbbeac68bc7f6006082150d38bb7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 29 Jul 2015 13:48:23 -0400 Subject: dm cache: fix device destroy hang due to improper prealloc_used accounting Commit 665022d72f9 ("dm cache: avoid calls to prealloc_free_structs() if possible") introduced a regression that caused the removal of a DM cache device to hang in cache_postsuspend()'s call to wait_for_migrations() with the following stack trace: [] schedule+0x37/0x80 [] cache_postsuspend+0xbb/0x470 [dm_cache] [] ? prepare_to_wait_event+0xf0/0xf0 [] dm_table_postsuspend_targets+0x47/0x60 [dm_mod] [] __dm_destroy+0x215/0x250 [dm_mod] [] dm_destroy+0x13/0x20 [dm_mod] [] dev_remove+0x10d/0x170 [dm_mod] [] ? dev_suspend+0x240/0x240 [dm_mod] [] ctl_ioctl+0x255/0x4d0 [dm_mod] [] ? SYSC_semtimedop+0x280/0xe10 [] dm_ctl_ioctl+0x13/0x20 [dm_mod] [] do_vfs_ioctl+0x2d2/0x4b0 [] ? __audit_syscall_entry+0xaf/0x100 [] ? do_audit_syscall_entry+0x66/0x70 [] SyS_ioctl+0x79/0x90 [] ? syscall_trace_leave+0xb8/0x110 [] entry_SYSCALL_64_fastpath+0x12/0x71 Fix this by accounting for the call to prealloc_data_structs() immediately _before_ the call as opposed to after. This is needed because it is possible to break out of the control loop after the call to prealloc_data_structs() but before prealloc_used was set to true. Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 64e96a2..1fe93cf 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1967,6 +1967,7 @@ static void process_deferred_bios(struct cache *cache) * this bio might require one, we pause until there are some * prepared mappings to process. */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs)) { spin_lock_irqsave(&cache->lock, flags); bio_list_merge(&cache->deferred_bios, &bios); @@ -1982,7 +1983,6 @@ static void process_deferred_bios(struct cache *cache) process_discard_bio(cache, &structs, bio); else process_bio(cache, &structs, bio); - prealloc_used = true; } if (prealloc_used) @@ -2011,6 +2011,7 @@ static void process_deferred_cells(struct cache *cache) * this bio might require one, we pause until there are some * prepared mappings to process. */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs)) { spin_lock_irqsave(&cache->lock, flags); list_splice(&cells, &cache->deferred_cells); @@ -2019,7 +2020,6 @@ static void process_deferred_cells(struct cache *cache) } process_cell(cache, &structs, cell); - prealloc_used = true; } if (prealloc_used) @@ -2081,6 +2081,7 @@ static void writeback_some_dirty_blocks(struct cache *cache) if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) break; /* no work to do */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs) || get_cell(cache, oblock, &structs, &old_ocell)) { policy_set_dirty(cache->policy, oblock); @@ -2088,7 +2089,6 @@ static void writeback_some_dirty_blocks(struct cache *cache) } writeback(cache, &structs, oblock, cblock, old_ocell); - prealloc_used = true; } if (prealloc_used) -- cgit v1.1 From 423f04d63cf421ea436bcc5be02543d549ce4b28 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Jul 2015 11:48:52 +1000 Subject: md/raid1: extend spinlock to protect raid1_end_read_request against inconsistencies raid1_end_read_request() assumes that the In_sync bits are consistent with the ->degaded count. raid1_spare_active updates the In_sync bit before the ->degraded count and so exposes an inconsistency, as does error() So extend the spinlock in raid1_spare_active() and error() to hide those inconsistencies. This should probably be part of Commit: 34cab6f42003 ("md/raid1: fix test for 'was read error from last working device'.") as it addresses the same issue. It fixes the same bug and should go to -stable for same reasons. Fixes: 76073054c95b ("md/raid1: clean up read_balance.") Cc: stable@vger.kernel.org (v3.0+) Signed-off-by: NeilBrown --- drivers/md/raid1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 94f5b55..967a4ed 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1476,6 +1476,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; struct r1conf *conf = mddev->private; + unsigned long flags; /* * If it is not operational, then we have already marked it as dead @@ -1495,14 +1496,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) return; } set_bit(Blocked, &rdev->flags); + spin_lock_irqsave(&conf->device_lock, flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; set_bit(Faulty, &rdev->flags); - spin_unlock_irqrestore(&conf->device_lock, flags); } else set_bit(Faulty, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ @@ -1568,7 +1568,10 @@ static int raid1_spare_active(struct mddev *mddev) * Find all failed disks within the RAID1 configuration * and mark them readable. * Called under mddev lock, so rcu protection not needed. + * device_lock used to avoid races with raid1_end_read_request + * which expects 'In_sync' flags and ->degraded to be consistent. */ + spin_lock_irqsave(&conf->device_lock, flags); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; @@ -1599,7 +1602,6 @@ static int raid1_spare_active(struct mddev *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } } - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded -= count; spin_unlock_irqrestore(&conf->device_lock, flags); -- cgit v1.1 From b6878d9e03043695dbf3fa1caa6dfc09db225b16 Mon Sep 17 00:00:00 2001 From: Benjamin Randazzo Date: Sat, 25 Jul 2015 16:36:50 +0200 Subject: md: use kzalloc() when bitmap is disabled In drivers/md/md.c get_bitmap_file() uses kmalloc() for creating a mdu_bitmap_file_t called "file". 5769 file = kmalloc(sizeof(*file), GFP_NOIO); 5770 if (!file) 5771 return -ENOMEM; This structure is copied to user space at the end of the function. 5786 if (err == 0 && 5787 copy_to_user(arg, file, sizeof(*file))) 5788 err = -EFAULT But if bitmap is disabled only the first byte of "file" is initialized with zero, so it's possible to read some bytes (up to 4095) of kernel space memory from user space. This is an information leak. 5775 /* bitmap disabled, zero the first byte and copy out */ 5776 if (!mddev->bitmap_info.file) 5777 file->pathname[0] = '\0'; Signed-off-by: Benjamin Randazzo Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0c2a4e8..e25f00f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5759,7 +5759,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) char *ptr; int err; - file = kmalloc(sizeof(*file), GFP_NOIO); + file = kzalloc(sizeof(*file), GFP_NOIO); if (!file) return -ENOMEM; -- cgit v1.1 From 49895bcc7e566ba455eb2996607d6fbd3447ce16 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2015 17:09:57 +1000 Subject: md/raid5: don't let shrink_slab shrink too far. I have a report of drop_one_stripe() called from raid5_cache_scan() apparently finding ->max_nr_stripes == 0. This should not be allowed. So add a test to keep max_nr_stripes above min_nr_stripes. Also use a 'mask' rather than a 'mod' in drop_one_stripe to ensure 'hash' is valid even if max_nr_stripes does reach zero. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Cc: stable@vger.kernel.org (4.1 - please release with 2d5b569b665) Reported-by: Tomas Papan Signed-off-by: NeilBrown --- drivers/md/raid5.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 643d217..f757023 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2256,7 +2256,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) static int drop_one_stripe(struct r5conf *conf) { struct stripe_head *sh; - int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; + int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; spin_lock_irq(conf->hash_locks + hash); sh = get_free_stripe(conf, hash); @@ -6388,7 +6388,8 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, if (mutex_trylock(&conf->cache_size_mutex)) { ret= 0; - while (ret < sc->nr_to_scan) { + while (ret < sc->nr_to_scan && + conf->max_nr_stripes > conf->min_nr_stripes) { if (drop_one_stripe(conf) == 0) { ret = SHRINK_STOP; break; -- cgit v1.1 From bd4aaf8f9b85d6b2df3231fd62b219ebb75d3568 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 3 Aug 2015 09:54:58 -0400 Subject: dm: fix dm_merge_bvec regression on 32 bit systems A DM regression on 32 bit systems was reported against v4.2-rc3 here: https://lkml.org/lkml/2015/7/29/401 Fix this by reverting both commit 1c220c69 ("dm: fix casting bug in dm_merge_bvec()") and 148e51ba ("dm: improve documentation and code clarity in dm_merge_bvec"). This combined revert is done to eliminate the possibility of a partial revert in stable@ kernels. In hindsight the correct fix, at the time 1c220c69 was applied to fix the regression that 148e51ba introduced, should've been to simply revert 148e51ba. Reported-by: Josh Boyer Tested-by: Adam Williamson Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.19+ --- drivers/md/dm.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ab37ae1..0d7ab20 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1729,7 +1729,8 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors, max_size = 0; + sector_t max_sectors; + int max_size = 0; if (unlikely(!map)) goto out; @@ -1742,18 +1743,10 @@ static int dm_merge_bvec(struct request_queue *q, * Find maximum amount of I/O that won't need splitting */ max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) queue_max_sectors(q)); + (sector_t) BIO_MAX_SECTORS); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - - /* - * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t - * to the targets' merge function since it holds sectors not bytes). - * Just doing this as an interim fix for stable@ because the more - * comprehensive cleanup of switching to sector_t will impact every - * DM target that implements a ->merge hook. - */ - if (max_size > INT_MAX) - max_size = INT_MAX; + if (max_size < 0) + max_size = 0; /* * merge_bvec_fn() returns number of bytes @@ -1761,13 +1754,13 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); + max_size = ti->type->merge(ti, bvm, biovec, max_size); /* * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking for the - * max_hw_sectors that dm_set_device_limits may set), then we can't - * allow bios with multiple vector entries. So always set max_size - * to 0, and the code below allows just one page. + * provided their merge_bvec method (we know this by looking at + * queue_max_hw_sectors), then we can't allow bios with multiple vector + * entries. So always set max_size to 0, and the code below allows + * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) max_size = 0; -- cgit v1.1 From aa0cd28d057fd4cb686fbdd2475a6a3f609dd581 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 7 Aug 2015 16:33:01 +0100 Subject: dm btree remove: fix bug in remove_one() remove_one() was not incrementing the key for the beginning of the range, so not all entries were being removed. This resulted in discards that were not unmapping all blocks. Fixes: 4ec331c3ea ("dm btree: add dm_btree_remove_leaves()") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-remove.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 9836c0a..9ca9ecc 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -689,6 +689,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root, value_ptr(n, index)); delete_at(n, index); + keys[last_level] = k + 1ull; } else r = -ENODATA; -- cgit v1.1