summaryrefslogtreecommitdiffstats
path: root/cddl/contrib/opensolaris/cmd
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2017-07-27 10:25:18 +0000
committermav <mav@FreeBSD.org>2017-07-27 10:25:18 +0000
commita0dddc24c905013363838bb04e79443d81a2d765 (patch)
tree6919eef26445eb9c9ccb3c760b18432e9220f6f0 /cddl/contrib/opensolaris/cmd
parent2ac0a2ca26121c0aa142f78767aeb8c6350e58ad (diff)
downloadFreeBSD-src-a0dddc24c905013363838bb04e79443d81a2d765.zip
FreeBSD-src-a0dddc24c905013363838bb04e79443d81a2d765.tar.gz
MFC r320156, r320185, r320186, r320262, r320452, r321111:
MFV r318946: 8021 ARC buf data scatter-ization illumos/illumos-gate@770499e185d15678ccb0be57ebc626ad18d93383 https://github.com/illumos/illumos-gate/commit/770499e185d15678ccb0be57ebc626ad1 8d93383 https://www.illumos.org/issues/8021 The ARC buf data project (known simply as "ABD" since its genesis in the ZoL community) changes the way the ARC allocates `b_pdata` memory from using linea r `void *` buffers to using scatter/gather lists of fixed-size 1KB chunks. This improves ZFS's performance by helping to defragment the address space occupied by the ARC, in particular for cases where compressed ARC is enabled. It could also ease future work to allocate pages directly from `segkpm` for minimal- overhead memory allocations, bypassing the `kmem` subsystem. This is essentially the same change as the one which recently landed in ZFS on Linux, although they made some platform-specific changes while adapting this work to their codebase: 1. Implemented the equivalent of the `segkpm` suggestion for future work mentioned above to bypass issues that they've had with the Linux kernel memory allocator. 2. Changed the internal representation of the ABD's scatter/gather list so it could be used to pass I/O directly into Linux block device drivers. (This feature is not available in the illumos block device interface yet.) FreeBSD notes: - the actual (default) chunk size is 4KB (despite the text above saying 1KB) - we can try to reimplement ABDs, so that they are not permanently mapped into the KVA unless explicitly requested, especially on platforms with scarce KVA - we can try to use unmapped I/O and avoid intermediate allocation of a linear, virtual memory mapped buffer - we can try to avoid extra data copying by referring to chunks / pages in the original ABD Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Paul Dagnelie <pcd@delphix.com> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Prashanth Sreenivasa <pks@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Chris Williamson <chris.williamson@delphix.com> Approved by: Richard Lowe <richlowe@richlowe.net> Author: Dan Kimmel <dan.kimmel@delphix.com>
Diffstat (limited to 'cddl/contrib/opensolaris/cmd')
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c47
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c48
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c18
3 files changed, 71 insertions, 42 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index ac9b06d..711377a 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -59,6 +59,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
+#include <sys/abd.h>
#include <zfs_comutil.h>
#undef verify
#include <libzfs.h>
@@ -2410,7 +2411,7 @@ zdb_blkptr_done(zio_t *zio)
zdb_cb_t *zcb = zio->io_private;
zbookmark_phys_t *zb = &zio->io_bookmark;
- zio_data_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
@@ -2477,7 +2478,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
- void *data = zio_data_buf_alloc(size);
+ abd_t *abd = abd_alloc(size, B_FALSE);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
/* If it's an intent log block, failure is expected. */
@@ -2490,7 +2491,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);
- zio_nowait(zio_read(NULL, spa, bp, data, size,
+ zio_nowait(zio_read(NULL, spa, bp, abd, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
}
@@ -3270,6 +3271,13 @@ name:
return (NULL);
}
+/* ARGSUSED */
+static int
+random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
+{
+ return (random_get_pseudo_bytes(buf, len));
+}
+
/*
* Read a block from a pool and print it out. The syntax of the
* block descriptor is:
@@ -3301,7 +3309,8 @@ zdb_read_block(char *thing, spa_t *spa)
uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
- void *pbuf, *lbuf, *buf;
+ abd_t *pabd;
+ void *lbuf, *buf;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;
@@ -3373,7 +3382,7 @@ zdb_read_block(char *thing, spa_t *spa)
psize = size;
lsize = size;
- pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
BP_ZERO(bp);
@@ -3401,15 +3410,15 @@ zdb_read_block(char *thing, spa_t *spa)
/*
* Treat this as a normal block read.
*/
- zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
+ zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
} else {
/*
* Treat this as a vdev child I/O.
*/
- zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
- ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
+ zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
+ psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
@@ -3432,21 +3441,21 @@ zdb_read_block(char *thing, spa_t *spa)
void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
- bcopy(pbuf, pbuf2, psize);
+ abd_copy_to_buf(pbuf2, pabd, psize);
- VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
- SPA_MAXBLOCKSIZE - psize) == 0);
+ VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
+ random_get_pseudo_bytes_cb, NULL));
- VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
- SPA_MAXBLOCKSIZE - psize) == 0);
+ VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
+ SPA_MAXBLOCKSIZE - psize));
for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
lsize -= SPA_MINBLOCKSIZE) {
for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
- if (zio_decompress_data(c, pbuf, lbuf,
- psize, lsize) == 0 &&
- zio_decompress_data(c, pbuf2, lbuf2,
- psize, lsize) == 0 &&
+ if (zio_decompress_data(c, pabd,
+ lbuf, psize, lsize) == 0 &&
+ zio_decompress_data_buf(c, pbuf2,
+ lbuf2, psize, lsize) == 0 &&
bcmp(lbuf, lbuf2, lsize) == 0)
break;
}
@@ -3465,7 +3474,7 @@ zdb_read_block(char *thing, spa_t *spa)
buf = lbuf;
size = lsize;
} else {
- buf = pbuf;
+ buf = abd_to_buf(pabd);
size = psize;
}
@@ -3483,7 +3492,7 @@ zdb_read_block(char *thing, spa_t *spa)
zdb_dump_block(thing, buf, size, flags);
out:
- umem_free(pbuf, SPA_MAXBLOCKSIZE);
+ abd_free(pabd);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index 583e422..bc02b1b 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
/*
@@ -41,6 +41,7 @@
#include <sys/resource.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
+#include <sys/abd.h>
extern uint8_t dump_opt[256];
@@ -117,13 +118,27 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
}
/* ARGSUSED */
+static int
+zil_prt_rec_write_cb(void *data, size_t len, void *unused)
+{
+ char *cdata = data;
+ for (int i = 0; i < len; i++) {
+ if (isprint(*cdata))
+ (void) printf("%c ", *cdata);
+ else
+ (void) printf("%2X", *cdata);
+ cdata++;
+ }
+ return (0);
+}
+
+/* ARGSUSED */
static void
zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
- char *data, *dlimit;
+ abd_t *data;
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_phys_t zb;
- char buf[SPA_MAXBLOCKSIZE];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;
@@ -144,7 +159,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
- bzero(buf, sizeof (buf));
(void) printf("%s<hole>\n", prefix);
return;
}
@@ -157,28 +171,26 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
+ data = abd_alloc(BP_GET_LSIZE(bp), B_FALSE);
error = zio_wait(zio_read(NULL, zilog->zl_spa,
- bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
+ bp, data, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
if (error)
- return;
- data = buf;
+ goto out;
} else {
- data = (char *)(lr + 1);
+ /* data is stored after the end of the lr_write record */
+ data = abd_alloc(lr->lr_length, B_FALSE);
+ abd_copy_from_buf(data, lr + 1, lr->lr_length);
}
- dlimit = data + MIN(lr->lr_length,
- (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
-
(void) printf("%s", prefix);
- while (data < dlimit) {
- if (isprint(*data))
- (void) printf("%c ", *data);
- else
- (void) printf("%2X", *data);
- data++;
- }
+ (void) abd_iterate_func(data,
+ 0, MIN(lr->lr_length, (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)),
+ zil_prt_rec_write_cb, NULL);
(void) printf("\n");
+
+out:
+ abd_free(data);
}
/* ARGSUSED */
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index 491bd6b..a0b6858 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -112,6 +112,7 @@
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
+#include <sys/abd.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@@ -190,6 +191,7 @@ extern uint64_t metaslab_df_alloc_threshold;
extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
+extern boolean_t zfs_abd_scatter_enabled;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
@@ -5042,7 +5044,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
enum zio_checksum checksum = spa_dedup_checksum(spa);
dmu_buf_t *db;
dmu_tx_t *tx;
- void *buf;
+ abd_t *abd;
blkptr_t blk;
int copies = 2 * ZIO_DEDUPDITTO_MIN;
@@ -5122,14 +5124,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Damage the block. Dedup-ditto will save us when we read it later.
*/
psize = BP_GET_PSIZE(&blk);
- buf = zio_buf_alloc(psize);
- ztest_pattern_set(buf, psize, ~pattern);
+ abd = abd_alloc_linear(psize, B_TRUE);
+ ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
- buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
+ abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
- zio_buf_free(buf, psize);
+ abd_free(abd);
(void) rw_unlock(&ztest_name_lock);
}
@@ -5413,6 +5415,12 @@ ztest_resume_thread(void *arg)
*/
if (ztest_random(10) == 0)
zfs_compressed_arc_enabled = ztest_random(2);
+
+ /*
+ * Periodically change the zfs_abd_scatter_enabled setting.
+ */
+ if (ztest_random(10) == 0)
+ zfs_abd_scatter_enabled = ztest_random(2);
}
return (NULL);
}
OpenPOWER on IntegriCloud