diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/Kconfig | 10 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 28 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 214 | ||||
-rw-r--r-- | drivers/md/dm-flakey.c | 53 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 34 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 42 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 82 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 18 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 43 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 2 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 19 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 14 |
19 files changed, 406 insertions, 173 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 02a5345..b7767da 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -240,9 +240,17 @@ config DM_BUFIO as a cache, holding recently-read blocks in memory and performing delayed writes. +config DM_DEBUG_BLOCK_MANAGER_LOCKING + bool "Block manager locking" + depends on DM_BUFIO + ---help--- + Block manager locking can catch various metadata corruption issues. + + If unsure, say N. + config DM_DEBUG_BLOCK_STACK_TRACING bool "Keep stack trace of persistent data block lock holders" - depends on STACKTRACE_SUPPORT && DM_BUFIO + depends on STACKTRACE_SUPPORT && DM_DEBUG_BLOCK_MANAGER_LOCKING select STACKTRACE ---help--- Enable this for messages that may help debug problems with the diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 262e753..84d2f0e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -820,12 +820,14 @@ enum new_flag { static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) { struct dm_buffer *b; + bool tried_noio_alloc = false; /* * dm-bufio is resistant to allocation failures (it just keeps * one buffer reserved in cases all the allocations fail). * So set flags to not try too hard: - * GFP_NOIO: don't recurse into the I/O layer + * GFP_NOWAIT: don't wait; if we need to sleep we'll release our + * mutex and wait ourselves. * __GFP_NORETRY: don't retry and rather return failure * __GFP_NOMEMALLOC: don't use emergency reserves * __GFP_NOWARN: don't print a warning in case of failure @@ -835,7 +837,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client */ while (1) { if (dm_bufio_cache_size_latch != 1) { - b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); if (b) return b; } @@ -843,6 +845,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client if (nf == NF_PREFETCH) return NULL; + if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) { + dm_bufio_unlock(c); + b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + dm_bufio_lock(c); + if (b) + return b; + tried_noio_alloc = true; + } + if (!list_empty(&c->reserved_buffers)) { b = list_entry(c->reserved_buffers.next, struct dm_buffer, lru_list); @@ -1585,18 +1596,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c; - unsigned long count; - - c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) - return 0; + struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); - count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; - dm_bufio_unlock(c); - return count; + return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]); } /* diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 6955778..624fe43 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -383,7 +383,6 @@ static int __format_metadata(struct dm_cache_metadata *cmd) goto bad; dm_disk_bitset_init(cmd->tm, &cmd->discard_info); - r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); if (r < 0) goto bad; @@ -789,7 +788,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) { if (cmd->data_block_size != data_block_size) { - DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", + DMERR("data_block_size (%llu) different from that in metadata (%llu)", (unsigned long long) data_block_size, (unsigned long long) cmd->data_block_size); return false; diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index c33f4a6..f19c6930 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1361,7 +1361,7 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) static unsigned random_level(dm_cblock_t cblock) { - return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); + return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); } static int smq_load_mapping(struct dm_cache_policy *p, diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 59b2c50..e04c61e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -989,7 +989,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod enum cache_metadata_mode old_mode = get_cache_mode(cache); if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { - DMERR("unable to read needs_check flag, setting failure mode"); + DMERR("%s: unable to read needs_check flag, setting failure mode.", + cache_device_name(cache)); new_mode = CM_FAIL; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 68a9eb4..7c6c572 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/key.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/mempool.h> @@ -23,12 +24,14 @@ #include <linux/atomic.h> #include <linux/scatterlist.h> #include <linux/rbtree.h> +#include <linux/ctype.h> #include <asm/page.h> #include <asm/unaligned.h> #include <crypto/hash.h> #include <crypto/md5.h> #include <crypto/algapi.h> #include <crypto/skcipher.h> +#include <keys/user-type.h> #include <linux/device-mapper.h> @@ -140,8 +143,9 @@ struct crypt_config { char *cipher; char *cipher_string; + char *key_string; - struct crypt_iv_operations *iv_gen_ops; + const struct crypt_iv_operations *iv_gen_ops; union { struct iv_essiv_private essiv; struct iv_benbi_private benbi; @@ -758,15 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, return r; } -static struct crypt_iv_operations crypt_iv_plain_ops = { +static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; -static struct crypt_iv_operations crypt_iv_plain64_ops = { +static const struct crypt_iv_operations crypt_iv_plain64_ops = { .generator = crypt_iv_plain64_gen }; -static struct crypt_iv_operations crypt_iv_essiv_ops = { +static const struct crypt_iv_operations crypt_iv_essiv_ops = { .ctr = crypt_iv_essiv_ctr, .dtr = crypt_iv_essiv_dtr, .init = crypt_iv_essiv_init, @@ -774,17 +778,17 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = { .generator = crypt_iv_essiv_gen }; -static struct crypt_iv_operations crypt_iv_benbi_ops = { +static const struct crypt_iv_operations crypt_iv_benbi_ops = { .ctr = crypt_iv_benbi_ctr, .dtr = crypt_iv_benbi_dtr, .generator = crypt_iv_benbi_gen }; -static struct crypt_iv_operations crypt_iv_null_ops = { +static const struct crypt_iv_operations crypt_iv_null_ops = { .generator = crypt_iv_null_gen }; -static struct crypt_iv_operations crypt_iv_lmk_ops = { +static const struct crypt_iv_operations crypt_iv_lmk_ops = { .ctr = crypt_iv_lmk_ctr, .dtr = crypt_iv_lmk_dtr, .init = crypt_iv_lmk_init, @@ -793,7 +797,7 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = { .post = crypt_iv_lmk_post }; -static struct crypt_iv_operations crypt_iv_tcw_ops = { +static const struct crypt_iv_operations crypt_iv_tcw_ops = { .ctr = crypt_iv_tcw_ctr, .dtr = crypt_iv_tcw_dtr, .init = crypt_iv_tcw_init, @@ -994,7 +998,6 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned i, len, remaining_size; struct page *page; - struct bio_vec *bvec; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) @@ -1019,12 +1022,7 @@ retry: len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; - bvec = &clone->bi_io_vec[clone->bi_vcnt++]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = 0; - - clone->bi_iter.bi_size += len; + bio_add_page(clone, page, len, 0); remaining_size -= len; } @@ -1471,7 +1469,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) return 0; } -static int crypt_setkey_allcpus(struct crypt_config *cc) +static int crypt_setkey(struct crypt_config *cc) { unsigned subkey_size; int err = 0, i, r; @@ -1490,25 +1488,157 @@ static int crypt_setkey_allcpus(struct crypt_config *cc) return err; } +#ifdef CONFIG_KEYS + +static bool contains_whitespace(const char *str) +{ + while (*str) + if (isspace(*str++)) + return true; + return false; +} + +static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string) +{ + char *new_key_string, *key_desc; + int ret; + struct key *key; + const struct user_key_payload *ukp; + + /* + * Reject key_string with whitespace. dm core currently lacks code for + * proper whitespace escaping in arguments on DM_TABLE_STATUS path. + */ + if (contains_whitespace(key_string)) { + DMERR("whitespace chars not allowed in key string"); + return -EINVAL; + } + + /* look for next ':' separating key_type from key_description */ + key_desc = strpbrk(key_string, ":"); + if (!key_desc || key_desc == key_string || !strlen(key_desc + 1)) + return -EINVAL; + + if (strncmp(key_string, "logon:", key_desc - key_string + 1) && + strncmp(key_string, "user:", key_desc - key_string + 1)) + return -EINVAL; + + new_key_string = kstrdup(key_string, GFP_KERNEL); + if (!new_key_string) + return -ENOMEM; + + key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user, + key_desc + 1, NULL); + if (IS_ERR(key)) { + kzfree(new_key_string); + return PTR_ERR(key); + } + + rcu_read_lock(); + + ukp = user_key_payload(key); + if (!ukp) { + rcu_read_unlock(); + key_put(key); + kzfree(new_key_string); + return -EKEYREVOKED; + } + + if (cc->key_size != ukp->datalen) { + rcu_read_unlock(); + key_put(key); + kzfree(new_key_string); + return -EINVAL; + } + + memcpy(cc->key, ukp->data, cc->key_size); + + rcu_read_unlock(); + key_put(key); + + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + + ret = crypt_setkey(cc); + + /* wipe the kernel key payload copy in each case */ + memset(cc->key, 0, cc->key_size * sizeof(u8)); + + if (!ret) { + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + kzfree(cc->key_string); + cc->key_string = new_key_string; + } else + kzfree(new_key_string); + + return ret; +} + +static int get_key_size(char **key_string) +{ + char *colon, dummy; + int ret; + + if (*key_string[0] != ':') + return strlen(*key_string) >> 1; + + /* look for next ':' in key string */ + colon = strpbrk(*key_string + 1, ":"); + if (!colon) + return -EINVAL; + + if (sscanf(*key_string + 1, "%u%c", &ret, &dummy) != 2 || dummy != ':') + return -EINVAL; + + *key_string = colon; + + /* remaining key string should be :<logon|user>:<key_desc> */ + + return ret; +} + +#else + +static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string) +{ + return -EINVAL; +} + +static int get_key_size(char **key_string) +{ + return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; +} + +#endif + static int crypt_set_key(struct crypt_config *cc, char *key) { int r = -EINVAL; int key_string_len = strlen(key); - /* The key size may not be changed. */ - if (cc->key_size != (key_string_len >> 1)) - goto out; - /* Hyphen (which gives a key_size of zero) means there is no key. */ if (!cc->key_size && strcmp(key, "-")) goto out; - if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) + /* ':' means the key is in kernel keyring, short-circuit normal key processing */ + if (key[0] == ':') { + r = crypt_set_keyring_key(cc, key + 1); goto out; + } - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + /* wipe references to any kernel keyring key */ + kzfree(cc->key_string); + cc->key_string = NULL; + + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) + goto out; + + r = crypt_setkey(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ @@ -1521,8 +1651,10 @@ static int crypt_wipe_key(struct crypt_config *cc) { clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); memset(&cc->key, 0, cc->key_size * sizeof(u8)); + kzfree(cc->key_string); + cc->key_string = NULL; - return crypt_setkey_allcpus(cc); + return crypt_setkey(cc); } static void crypt_dtr(struct dm_target *ti) @@ -1558,6 +1690,7 @@ static void crypt_dtr(struct dm_target *ti) kzfree(cc->cipher); kzfree(cc->cipher_string); + kzfree(cc->key_string); /* Must zero key material before freeing */ kzfree(cc); @@ -1726,12 +1859,13 @@ bad_mem: /* * Construct an encryption mapping: - * <cipher> <key> <iv_offset> <dev_path> <start> + * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start> */ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - unsigned int key_size, opt_params; + int key_size; + unsigned int opt_params; unsigned long long tmpll; int ret; size_t iv_size_padding; @@ -1748,7 +1882,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - key_size = strlen(argv[1]) >> 1; + key_size = get_key_size(&argv[1]); + if (key_size < 0) { + ti->error = "Cannot parse key size"; + return -EINVAL; + } cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); if (!cc) { @@ -1955,10 +2093,13 @@ static void crypt_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s ", cc->cipher_string); - if (cc->key_size > 0) - for (i = 0; i < cc->key_size; i++) - DMEMIT("%02x", cc->key[i]); - else + if (cc->key_size > 0) { + if (cc->key_string) + DMEMIT(":%u:%s", cc->key_size, cc->key_string); + else + for (i = 0; i < cc->key_size; i++) + DMEMIT("%02x", cc->key[i]); + } else DMEMIT("-"); DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, @@ -2014,7 +2155,7 @@ static void crypt_resume(struct dm_target *ti) static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) { struct crypt_config *cc = ti->private; - int ret = -EINVAL; + int key_size, ret = -EINVAL; if (argc < 2) goto error; @@ -2025,6 +2166,13 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } if (argc == 3 && !strcasecmp(argv[1], "set")) { + /* The key size may not be changed. */ + key_size = get_key_size(&argv[2]); + if (key_size < 0 || cc->key_size != key_size) { + memset(argv[2], '0', strlen(argv[2])); + return -EINVAL; + } + ret = crypt_set_key(cc, argv[2]); if (ret) return ret; @@ -2068,7 +2216,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 1}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 6a2e8dd..13305a1 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -36,7 +36,8 @@ struct flakey_c { }; enum feature_flag_bits { - DROP_WRITES + DROP_WRITES, + ERROR_WRITES }; struct per_bio_data { @@ -76,6 +77,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (test_and_set_bit(DROP_WRITES, &fc->flags)) { ti->error = "Feature drop_writes duplicated"; return -EINVAL; + } else if (test_bit(ERROR_WRITES, &fc->flags)) { + ti->error = "Feature drop_writes conflicts with feature error_writes"; + return -EINVAL; + } + + continue; + } + + /* + * error_writes + */ + if (!strcasecmp(arg_name, "error_writes")) { + if (test_and_set_bit(ERROR_WRITES, &fc->flags)) { + ti->error = "Feature error_writes duplicated"; + return -EINVAL; + + } else if (test_bit(DROP_WRITES, &fc->flags)) { + ti->error = "Feature error_writes conflicts with feature drop_writes"; + return -EINVAL; } continue; @@ -135,6 +155,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; return -EINVAL; + + } else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { + ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; + return -EINVAL; } return 0; @@ -200,11 +224,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } @@ -289,22 +315,27 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Error reads if neither corrupt_bio_byte or drop_writes are set. + * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set. * Otherwise, flakey_end_io() will decide if the reads should be modified. */ if (bio_data_dir(bio) == READ) { - if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) + if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) && + !test_bit(ERROR_WRITES, &fc->flags)) return -EIO; goto map_bio; } /* - * Drop writes? + * Drop or error writes? */ if (test_bit(DROP_WRITES, &fc->flags)) { bio_endio(bio); return DM_MAPIO_SUBMITTED; } + else if (test_bit(ERROR_WRITES, &fc->flags)) { + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } /* * Corrupt matching writes. @@ -340,10 +371,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) */ corrupt_bio_data(bio, fc); - } else if (!test_bit(DROP_WRITES, &fc->flags)) { + } else if (!test_bit(DROP_WRITES, &fc->flags) && + !test_bit(ERROR_WRITES, &fc->flags)) { /* * Error read during the down_interval if drop_writes - * wasn't configured. + * and error_writes were not configured. */ return -EIO; } @@ -357,7 +389,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type, { unsigned sz = 0; struct flakey_c *fc = ti->private; - unsigned drop_writes; + unsigned drop_writes, error_writes; switch (type) { case STATUSTYPE_INFO: @@ -370,10 +402,13 @@ static void flakey_status(struct dm_target *ti, status_type_t type, fc->down_interval); drop_writes = test_bit(DROP_WRITES, &fc->flags); - DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); + error_writes = test_bit(ERROR_WRITES, &fc->flags); + DMEMIT("%u ", drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5); if (drop_writes) DMEMIT("drop_writes "); + else if (error_writes) + DMEMIT("error_writes "); if (fc->corrupt_bio_byte) DMEMIT("corrupt_bio_byte %u %c %u %u ", @@ -410,7 +445,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0bf1a12..03940bf 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -162,7 +162,10 @@ struct dpages { struct page **p, unsigned long *len, unsigned *offset); void (*next_page)(struct dpages *dp); - unsigned context_u; + union { + unsigned context_u; + struct bvec_iter context_bi; + }; void *context_ptr; void *vma_invalidate_address; @@ -204,25 +207,36 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse static void bio_get_page(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset) { - struct bio_vec *bvec = dp->context_ptr; - *p = bvec->bv_page; - *len = bvec->bv_len - dp->context_u; - *offset = bvec->bv_offset + dp->context_u; + struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr, + dp->context_bi); + + *p = bvec.bv_page; + *len = bvec.bv_len; + *offset = bvec.bv_offset; + + /* avoid figuring it out again in bio_next_page() */ + dp->context_bi.bi_sector = (sector_t)bvec.bv_len; } static void bio_next_page(struct dpages *dp) { - struct bio_vec *bvec = dp->context_ptr; - dp->context_ptr = bvec + 1; - dp->context_u = 0; + unsigned int len = (unsigned int)dp->context_bi.bi_sector; + + bvec_iter_advance((struct bio_vec *)dp->context_ptr, + &dp->context_bi, len); } static void bio_dp_init(struct dpages *dp, struct bio *bio) { dp->get_page = bio_get_page; dp->next_page = bio_next_page; - dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - dp->context_u = bio->bi_iter.bi_bvec_done; + + /* + * We just use bvec iterator to retrieve pages, so it is ok to + * access the bvec table directly here + */ + dp->context_ptr = bio->bi_io_vec; + dp->context_bi = bio->bi_iter; } /* diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 966eb4b..c72a770 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1697,7 +1697,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern { struct dm_ioctl *dmi; int secure_data; - const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); + const size_t minimum_data_size = offsetof(struct dm_ioctl, data); if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e477af8..6400cff 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -372,16 +372,13 @@ static int __pg_init_all_paths(struct multipath *m) return atomic_read(&m->pg_init_in_progress); } -static int pg_init_all_paths(struct multipath *m) +static void pg_init_all_paths(struct multipath *m) { - int r; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - r = __pg_init_all_paths(m); + __pg_init_all_paths(m); spin_unlock_irqrestore(&m->lock, flags); - - return r; } static void __switch_pg(struct multipath *m, struct priority_group *pg) @@ -583,16 +580,17 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, * .request_fn stacked on blk-mq path(s) and * blk-mq stacked on blk-mq path(s). */ - *__clone = blk_mq_alloc_request(bdev_get_queue(bdev), - rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); - if (IS_ERR(*__clone)) { - /* ENOMEM, requeue */ + clone = blk_mq_alloc_request(bdev_get_queue(bdev), + rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); + if (IS_ERR(clone)) { + /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ clear_request_fn_mpio(m, map_context); return r; } - (*__clone)->bio = (*__clone)->biotail = NULL; - (*__clone)->rq_disk = bdev->bd_disk; - (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; + clone->bio = clone->biotail = NULL; + clone->rq_disk = bdev->bd_disk; + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + *__clone = clone; } if (pgpath->pg->ps.type->start_io) @@ -852,18 +850,22 @@ retain: attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name) { /* + * Clear any hw_handler_params associated with a + * handler that isn't already attached. + */ + if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) { + kfree(m->hw_handler_params); + m->hw_handler_params = NULL; + } + + /* * Reset hw_handler_name to match the attached handler - * and clear any hw_handler_params associated with the - * ignored handler. * * NB. This modifies the table line to show the actual * handler instead of the original table passed in. */ kfree(m->hw_handler_name); m->hw_handler_name = attached_handler_name; - - kfree(m->hw_handler_params); - m->hw_handler_params = NULL; } } @@ -1002,6 +1004,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) } m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); + if (!m->hw_handler_name) + return -EINVAL; if (hw_argc > 1) { char *p; @@ -1362,7 +1366,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to switch_pg_num"); return -EINVAL; } @@ -1394,7 +1398,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to bypass_pg"); return -EINVAL; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 953159d..b8f978e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -160,7 +160,6 @@ struct raid_dev { CTR_FLAG_DAEMON_SLEEP | \ CTR_FLAG_MIN_RECOVERY_RATE | \ CTR_FLAG_MAX_RECOVERY_RATE | \ - CTR_FLAG_MAX_WRITE_BEHIND | \ CTR_FLAG_STRIPE_CACHE | \ CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ @@ -171,7 +170,6 @@ struct raid_dev { CTR_FLAG_DAEMON_SLEEP | \ CTR_FLAG_MIN_RECOVERY_RATE | \ CTR_FLAG_MAX_RECOVERY_RATE | \ - CTR_FLAG_MAX_WRITE_BEHIND | \ CTR_FLAG_STRIPE_CACHE | \ CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ @@ -2050,16 +2048,17 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) mddev->reshape_position = MaxSector; + mddev->raid_disks = le32_to_cpu(sb->num_devices); + mddev->level = le32_to_cpu(sb->level); + mddev->layout = le32_to_cpu(sb->layout); + mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors); + /* * Reshaping is supported, e.g. reshape_position is valid * in superblock and superblock content is authoritative. */ if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) { /* Superblock is authoritative wrt given raid set layout! */ - mddev->raid_disks = le32_to_cpu(sb->num_devices); - mddev->level = le32_to_cpu(sb->level); - mddev->layout = le32_to_cpu(sb->layout); - mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors); mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors); @@ -2087,38 +2086,44 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) /* * No takeover/reshaping, because we don't have the extended v1.9.0 metadata */ - if (le32_to_cpu(sb->level) != mddev->new_level) { - DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)"); - return -EINVAL; - } - if (le32_to_cpu(sb->layout) != mddev->new_layout) { - DMERR("Reshaping raid sets not yet supported. (raid layout change)"); - DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); - DMERR(" Old layout: %s w/ %d copies", - raid10_md_layout_to_format(le32_to_cpu(sb->layout)), - raid10_md_layout_to_copies(le32_to_cpu(sb->layout))); - DMERR(" New layout: %s w/ %d copies", - raid10_md_layout_to_format(mddev->layout), - raid10_md_layout_to_copies(mddev->layout)); - return -EINVAL; - } - if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) { - DMERR("Reshaping raid sets not yet supported. (stripe sectors change)"); - return -EINVAL; - } + struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout); + struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); - /* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */ - if (!rt_is_raid1(rs->raid_type) && - (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { - DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)", - sb->num_devices, mddev->raid_disks); + if (rs_takeover_requested(rs)) { + if (rt_cur && rt_new) + DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)", + rt_cur->name, rt_new->name); + else + DMERR("Takeover raid sets not yet supported by metadata. (raid level change)"); + return -EINVAL; + } else if (rs_reshape_requested(rs)) { + DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)"); + if (mddev->layout != mddev->new_layout) { + if (rt_cur && rt_new) + DMERR(" current layout %s vs new layout %s", + rt_cur->name, rt_new->name); + else + DMERR(" current layout 0x%X vs new layout 0x%X", + le32_to_cpu(sb->layout), mddev->new_layout); + } + if (mddev->chunk_sectors != mddev->new_chunk_sectors) + DMERR(" current stripe sectors %u vs new stripe sectors %u", + mddev->chunk_sectors, mddev->new_chunk_sectors); + if (rs->delta_disks) + DMERR(" current %u disks vs new %u disks", + mddev->raid_disks, mddev->raid_disks + rs->delta_disks); + if (rs_is_raid10(rs)) { + DMERR(" Old layout: %s w/ %u copies", + raid10_md_layout_to_format(mddev->layout), + raid10_md_layout_to_copies(mddev->layout)); + DMERR(" New layout: %s w/ %u copies", + raid10_md_layout_to_format(mddev->new_layout), + raid10_md_layout_to_copies(mddev->new_layout)); + } return -EINVAL; } DMINFO("Discovered old metadata format; upgrading to extended metadata format"); - - /* Table line is checked vs. authoritative superblock */ - rs_set_new(rs); } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) @@ -2211,7 +2216,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) continue; if (role != r->raid_disk) { - if (__is_raid10_near(mddev->layout)) { + if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) { if (mddev->raid_disks % __raid10_near_copies(mddev->layout) || rs->raid_disks % rs->raid10_copies) { rs->ti->error = @@ -2994,6 +2999,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } } + /* Disable/enable discard support on raid set. */ + configure_discard_support(rs); + mddev_unlock(&rs->md); return 0; @@ -3580,12 +3588,6 @@ static int raid_preresume(struct dm_target *ti) if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) rs_update_sbs(rs); - /* - * Disable/enable discard support on raid set after any - * conversion, because devices can have been added - */ - configure_discard_support(rs); - /* Load the bitmap from disk unless raid0 */ r = __load_dirty_region_bitmap(rs); if (r) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index b2a9e2d..9d7275f 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -23,11 +23,7 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; #define RESERVED_REQUEST_BASED_IOS 256 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; -#ifdef CONFIG_DM_MQ_DEFAULT -static bool use_blk_mq = true; -#else -static bool use_blk_mq = false; -#endif +static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT); bool dm_use_blk_mq_default(void) { @@ -210,6 +206,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { + struct request_queue *q = md->queue; + unsigned long flags; + atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ @@ -222,8 +221,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (!md->queue->mq_ops && run_queue) - blk_run_queue_async(md->queue); + if (!q->mq_ops && run_queue) { + spin_lock_irqsave(q->queue_lock, flags); + blk_run_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } /* * dm_put() must be at the end of this function. See the comment above @@ -798,7 +800,7 @@ static void dm_old_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); if ((dm_old_request_peeked_before_merge_deadline(md) && - md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && + md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) && md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || (ti->type->busy && ti->type->busy(ti))) { blk_delay_queue(q, 10); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c4b53b3..0a427de 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -871,7 +871,7 @@ static int dm_table_determine_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - bool verify_blk_mq = false; + unsigned sq_count = 0, mq_count = 0; struct dm_target *tgt; struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); @@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ - if (list_empty(devices) && __table_type_request_based(live_md_type)) { - /* inherit live MD type */ - t->type = live_md_type; - return 0; - } - /* * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by * having a compatible target use dm_table_set_type. @@ -948,6 +942,19 @@ verify_rq_based: return -EINVAL; } + if (list_empty(devices)) { + int srcu_idx; + struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); + + /* inherit live table's type and all_blk_mq */ + if (live_table) { + t->type = live_table->type; + t->all_blk_mq = live_table->all_blk_mq; + } + dm_put_live_table(t->md, srcu_idx); + return 0; + } + /* Non-request-stackable devices can't be used for request-based dm */ list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); @@ -959,19 +966,19 @@ verify_rq_based: } if (q->mq_ops) - verify_blk_mq = true; + mq_count++; + else + sq_count++; } + if (sq_count && mq_count) { + DMERR("table load rejected: not all devices are blk-mq request-stackable"); + return -EINVAL; + } + t->all_blk_mq = mq_count > 0; - if (verify_blk_mq) { - /* verify _all_ devices in the table are blk-mq devices */ - list_for_each_entry(dd, devices, list) - if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) { - DMERR("table load rejected: not all devices" - " are blk-mq request-stackable"); - return -EINVAL; - } - - t->all_blk_mq = true; + if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { + DMERR("table load rejected: all devices are not blk-mq request-stackable"); + return -EINVAL; } return 0; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 0aba34a..7335d8a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -868,7 +868,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev); if (r) { - ti->error = "Data device lookup failed"; + ti->error = "Hash device lookup failed"; goto bad; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ffa97b7..3086da5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1886,9 +1886,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); - spin_lock_irq(q->queue_lock); - queue_flag_set(QUEUE_FLAG_DYING, q); - spin_unlock_irq(q->queue_lock); + blk_set_queue_dying(q); if (dm_request_based(md) && md->kworker_task) kthread_flush_worker(&md->kworker); diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index e83047c..7938cd2 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -700,13 +700,11 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_ { int r; unsigned i; - uint32_t nr_entries; struct dm_btree_value_type *vt = &info->value_type; BUG_ON(le32_to_cpu(ab->nr_entries)); BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); - nr_entries = le32_to_cpu(ab->nr_entries); for (i = 0; i < new_nr; i++) { r = fn(base + i, element_at(info, ab, i), context); if (r) diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 1e33dd5..a6dde7c 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -18,6 +18,8 @@ /*----------------------------------------------------------------*/ +#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING + /* * This is a read/write semaphore with a couple of differences. * @@ -302,6 +304,18 @@ static void report_recursive_bug(dm_block_t b, int r) (unsigned long long) b); } +#else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ + +#define bl_init(x) do { } while (0) +#define bl_down_read(x) 0 +#define bl_down_read_nonblock(x) 0 +#define bl_up_read(x) do { } while (0) +#define bl_down_write(x) 0 +#define bl_up_write(x) do { } while (0) +#define report_recursive_bug(x, y) do { } while (0) + +#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ + /*----------------------------------------------------------------*/ /* @@ -330,8 +344,11 @@ EXPORT_SYMBOL_GPL(dm_block_data); struct buffer_aux { struct dm_block_validator *validator; - struct block_lock lock; int write_locked; + +#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING + struct block_lock lock; +#endif }; static void dm_block_manager_alloc_callback(struct dm_buffer *buf) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 306d2e4..4c28608 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -464,7 +464,8 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, ll->nr_allocated--; le32_add_cpu(&ie_disk.nr_free, 1); ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); - } + } else + *ev = SM_NONE; return ll->save_ie(ll, index, &ie_disk); } @@ -547,7 +548,6 @@ static int metadata_ll_init_index(struct ll_disk *ll) if (r < 0) return r; - memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le)); ll->bitmap_root = dm_block_location(b); dm_tm_unlock(ll->tm, b); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005..20557e2 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. |