summaryrefslogtreecommitdiffstats
path: root/sys/cddl
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2018-04-16 03:48:37 +0000
committermav <mav@FreeBSD.org>2018-04-16 03:48:37 +0000
commit026ce2e11f8cd3116e11814a92ca0fec88be801c (patch)
tree1559e1d4f243a5eac2cd67234c02e01dc96dd6d6 /sys/cddl
parent32253781878f276b91c064399ca60806a1a885c7 (diff)
downloadFreeBSD-src-026ce2e11f8cd3116e11814a92ca0fec88be801c.zip
FreeBSD-src-026ce2e11f8cd3116e11814a92ca0fec88be801c.tar.gz
MFC r329805: MFV r329803:
9080 recursive enter of vdev_indirect_rwlock from vdev_indirect_remap() illumos/illumos-gate@bdfded42e66b9fc1395ff2401aa2952f7c44ae34 A scenario came up where a callback executed by vdev_indirect_remap() on a vdev, calls vdev_indirect_remap() on the same vdev and tries to reacquire vdev_indirect_rwlock that was already acquired from the first call to vdev_indirect_remap(). The specific scenario, is that we want to remap a block pointer that is snapshoted but its dataset's remap_deadlist is not cached. So in order to add it we issue a read through a vdev_indirect_remap() on the same vdev, which brings up the aforementioned issue. Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> Author: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Diffstat (limited to 'sys/cddl')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c108
1 files changed, 90 insertions, 18 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c
index ee5c231..8e5a676 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c
@@ -14,7 +14,7 @@
*/
/*
- * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -854,6 +854,57 @@ rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset)
}
/*
+ * Given an indirect vdev and an extent on that vdev, it duplicates the
+ * physical entries of the indirect mapping that correspond to the extent
+ * to a new array and returns a pointer to it. In addition, copied_entries
+ * is populated with the number of mapping entries that were duplicated.
+ *
+ * Note that the function assumes that the caller holds vdev_indirect_rwlock.
+ * This ensures that the mapping won't change due to condensing as we
+ * copy over its contents.
+ *
+ * Finally, since we are doing an allocation, it is up to the caller to
+ * free the array allocated in this function.
+ */
+vdev_indirect_mapping_entry_phys_t *
+vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset,
+ uint64_t asize, uint64_t *copied_entries)
+{
+ vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL;
+ vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
+ uint64_t entries = 0;
+
+ ASSERT(RW_READ_HELD(&vd->vdev_indirect_rwlock));
+
+ vdev_indirect_mapping_entry_phys_t *first_mapping =
+ vdev_indirect_mapping_entry_for_offset(vim, offset);
+ ASSERT3P(first_mapping, !=, NULL);
+
+ vdev_indirect_mapping_entry_phys_t *m = first_mapping;
+ while (asize > 0) {
+ uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
+
+ ASSERT3U(offset, >=, DVA_MAPPING_GET_SRC_OFFSET(m));
+ ASSERT3U(offset, <, DVA_MAPPING_GET_SRC_OFFSET(m) + size);
+
+ uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m);
+ uint64_t inner_size = MIN(asize, size - inner_offset);
+
+ offset += inner_size;
+ asize -= inner_size;
+ entries++;
+ m++;
+ }
+
+ size_t copy_length = entries * sizeof (*first_mapping);
+ duplicate_mappings = kmem_alloc(copy_length, KM_SLEEP);
+ bcopy(first_mapping, duplicate_mappings, copy_length);
+ *copied_entries = entries;
+
+ return (duplicate_mappings);
+}
+
+/*
* Goes through the relevant indirect mappings until it hits a concrete vdev
* and issues the callback. On the way to the concrete vdev, if any other
* indirect vdevs are encountered, then the callback will also be called on
@@ -893,24 +944,42 @@ vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize,
for (remap_segment_t *rs = rs_alloc(vd, offset, asize, 0);
rs != NULL; rs = list_remove_head(&stack)) {
vdev_t *v = rs->rs_vd;
+ uint64_t num_entries = 0;
+
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+ ASSERT(rs->rs_asize > 0);
/*
- * Note: this can be called from open context
- * (eg. zio_read()), so we need the rwlock to prevent
- * the mapping from being changed by condensing.
+ * Note: As this function can be called from open context
+ * (e.g. zio_read()), we need the following rwlock to
+ * prevent the mapping from being changed by condensing.
+ *
+ * So we grab the lock and we make a copy of the entries
+ * that are relevant to the extent that we are working on.
+ * Once that is done, we drop the lock and iterate over
+ * our copy of the mapping. Once we are done with the with
+ * the remap segment and we free it, we also free our copy
+ * of the indirect mapping entries that are relevant to it.
+ *
+ * This way we don't need to wait until the function is
+ * finished with a segment, to condense it. In addition, we
+ * don't need a recursive rwlock for the case that a call to
+ * vdev_indirect_remap() needs to call itself (through the
+ * codepath of its callback) for the same vdev in the middle
+ * of its execution.
*/
rw_enter(&v->vdev_indirect_rwlock, RW_READER);
vdev_indirect_mapping_t *vim = v->vdev_indirect_mapping;
ASSERT3P(vim, !=, NULL);
- ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
- ASSERT(rs->rs_asize > 0);
-
vdev_indirect_mapping_entry_phys_t *mapping =
- vdev_indirect_mapping_entry_for_offset(vim, rs->rs_offset);
+ vdev_indirect_mapping_duplicate_adjacent_entries(v,
+ rs->rs_offset, rs->rs_asize, &num_entries);
ASSERT3P(mapping, !=, NULL);
+ ASSERT3U(num_entries, >, 0);
+ rw_exit(&v->vdev_indirect_rwlock);
- while (rs->rs_asize > 0) {
+ for (uint64_t i = 0; i < num_entries; i++) {
/*
* Note: the vdev_indirect_mapping can not change
* while we are running. It only changes while the
@@ -919,20 +988,23 @@ vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize,
* function is only called for frees, which also only
* happen from syncing context.
*/
+ vdev_indirect_mapping_entry_phys_t *m = &mapping[i];
+
+ ASSERT3P(m, !=, NULL);
+ ASSERT3U(rs->rs_asize, >, 0);
- uint64_t size = DVA_GET_ASIZE(&mapping->vimep_dst);
- uint64_t dst_offset =
- DVA_GET_OFFSET(&mapping->vimep_dst);
- uint64_t dst_vdev = DVA_GET_VDEV(&mapping->vimep_dst);
+ uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
+ uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst);
+ uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst);
ASSERT3U(rs->rs_offset, >=,
- DVA_MAPPING_GET_SRC_OFFSET(mapping));
+ DVA_MAPPING_GET_SRC_OFFSET(m));
ASSERT3U(rs->rs_offset, <,
- DVA_MAPPING_GET_SRC_OFFSET(mapping) + size);
+ DVA_MAPPING_GET_SRC_OFFSET(m) + size);
ASSERT3U(dst_vdev, !=, v->vdev_id);
uint64_t inner_offset = rs->rs_offset -
- DVA_MAPPING_GET_SRC_OFFSET(mapping);
+ DVA_MAPPING_GET_SRC_OFFSET(m);
uint64_t inner_size =
MIN(rs->rs_asize, size - inner_offset);
@@ -973,10 +1045,10 @@ vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize,
rs->rs_offset += inner_size;
rs->rs_asize -= inner_size;
rs->rs_split_offset += inner_size;
- mapping++;
}
+ VERIFY0(rs->rs_asize);
- rw_exit(&v->vdev_indirect_rwlock);
+ kmem_free(mapping, num_entries * sizeof (*mapping));
kmem_free(rs, sizeof (remap_segment_t));
}
list_destroy(&stack);
OpenPOWER on IntegriCloud