diff options
author | asomers <asomers@FreeBSD.org> | 2017-11-28 00:19:04 +0000 |
---|---|---|
committer | asomers <asomers@FreeBSD.org> | 2017-11-28 00:19:04 +0000 |
commit | 6c64aedd118d7d79a496e4e3c43334c1bc272617 (patch) | |
tree | 4e9ad9a1f673c6b786dab08fe606d78cdc2227c1 /cddl/contrib | |
parent | c1cebba7cf78d2638aec53a5c28621d871672663 (diff) | |
download | FreeBSD-src-6c64aedd118d7d79a496e4e3c43334c1bc272617.zip FreeBSD-src-6c64aedd118d7d79a496e4e3c43334c1bc272617.tar.gz |
MFC r322854, r323995, r324568, r324991
r322854:
zfsd(8): Close a race condition when onlining a disk paritition
When inserting a partitioned disk, devfs and geom will announce the whole
disk before they announce the partition. If the partition containing ZFS
extends to one of the disk's extents, then zfsd will see a ZFS label on the
whole disk and attempt to online it. ZFS is smart enough to activate the
partition instead of the whole disk, but only if GEOM has already created
the partition's provider.
cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
Add a zpool_read_all_labels method. It's similar to
zpool_read_label, but it will return the number of labels found.
cddl/usr.sbin/zfsd/zfsd_event.cc
When processing a DevFS CREATE event, only online a VDEV if we can
read all four ZFS labels.
Reviewed by: mav
Sponsored by: Spectra Logic Corp
Differential Revision: https://reviews.freebsd.org/D11920
r323995:
Close a memory leak when using zpool_read_all_labels
X-MFC-With: 322854
Sponsored by: Spectra Logic Corp
r324568:
Optimize zpool_read_all_labels with AIO
Read all labels in parallel instead of sequentially
X-MFC-With: 322854
Sponsored by: Spectra Logic Corp
Differential Revision: https://reviews.freebsd.org/D12495
r324991:
Fix zpool_read_all_labels when vfs.aio.enable_unsafe=0
Previously, zpool_read_all_labels was trying to do 256KB reads, which are
greater than the default MAXPHYS and therefore must go through the slow,
unsafe AIO path. Shrink these reads to 112KB so they can use the safe, fast
AIO path instead.
X-MFC-With: 324568
Sponsored by: Spectra Logic Corp
Diffstat (limited to 'cddl/contrib')
-rw-r--r-- | cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h | 1 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c | 85 |
2 files changed, 86 insertions, 0 deletions
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index f2a9ebf..9133238 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -774,6 +774,7 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, * Label manipulation. */ extern int zpool_read_label(int, nvlist_t **); +extern int zpool_read_all_labels(int, nvlist_t **); extern int zpool_clear_label(int); /* is this zvol valid for use as a dump device? */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c index 1bbb8c5..a8da5b4 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c @@ -42,6 +42,7 @@ * using our derived config, and record the results. */ +#include <aio.h> #include <ctype.h> #include <devid.h> #include <dirent.h> @@ -915,6 +916,90 @@ zpool_read_label(int fd, nvlist_t **config) return (-1); } +/* + * Given a file descriptor, read the label information and return an nvlist + * describing the configuration, if there is one. + * returns the number of valid labels found + * If a label is found, returns it via config. The caller is responsible for + * freeing it. + */ +int +zpool_read_all_labels(int fd, nvlist_t **config) +{ + struct stat64 statbuf; + struct aiocb aiocbs[VDEV_LABELS]; + struct aiocb *aiocbps[VDEV_LABELS]; + int l; + vdev_phys_t *labels; + uint64_t state, txg, size; + int nlabels = 0; + + *config = NULL; + + if (fstat64(fd, &statbuf) == -1) + return (0); + size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); + + if ((labels = calloc(VDEV_LABELS, sizeof (vdev_phys_t))) == NULL) + return (0); + + memset(aiocbs, 0, sizeof(aiocbs)); + for (l = 0; l < VDEV_LABELS; l++) { + aiocbs[l].aio_fildes = fd; + aiocbs[l].aio_offset = label_offset(size, l) + VDEV_SKIP_SIZE; + aiocbs[l].aio_buf = &labels[l]; + aiocbs[l].aio_nbytes = sizeof(vdev_phys_t); + aiocbs[l].aio_lio_opcode = LIO_READ; + aiocbps[l] = &aiocbs[l]; + } + + if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) { + if (errno == EAGAIN || errno == EINTR || errno == EIO) { + for (l = 0; l < VDEV_LABELS; l++) { + errno = 0; + int r = aio_error(&aiocbs[l]); + if (r != EINVAL) + (void)aio_return(&aiocbs[l]); + } + } + free(labels); + return (0); + } + + for (l = 0; l < VDEV_LABELS; l++) { + nvlist_t *temp = NULL; + + if (aio_return(&aiocbs[l]) != sizeof(vdev_phys_t)) + continue; + + if (nvlist_unpack(labels[l].vp_nvlist, + sizeof (labels[l].vp_nvlist), &temp, 0) != 0) + continue; + + if (nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state > POOL_STATE_L2CACHE) { + nvlist_free(temp); + temp = NULL; + continue; + } + + if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + (nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0)) { + nvlist_free(temp); + temp = NULL; + continue; + } + if (temp) + *config = temp; + + nlabels++; + } + + free(labels); + return (nlabels); +} + typedef struct rdsk_node { char *rn_name; int rn_dfd; |