diff options
author | delphij <delphij@FreeBSD.org> | 2014-12-22 20:58:51 +0000 |
---|---|---|
committer | delphij <delphij@FreeBSD.org> | 2014-12-22 20:58:51 +0000 |
commit | 1ad38ed4f01c38401f2c15151edbcfab81168db1 (patch) | |
tree | bf517de79b70790f455a71edd81bb6303f96f90e /cddl/contrib/opensolaris/lib/libzfs/common | |
parent | af2ee162da52913fdefa6d0ffae3644e39a3b369 (diff) | |
download | FreeBSD-src-1ad38ed4f01c38401f2c15151edbcfab81168db1.zip FreeBSD-src-1ad38ed4f01c38401f2c15151edbcfab81168db1.tar.gz |
MFC r274337,r274673,274681,r275515:
ZFS large block support. The default recordsize remains at 128KB.
A new tunable/sysctl variable, vfs.zfs.max_recordsize is added to
allow adjusting the permitted maximum record size, or
zfs_max_recordsize, with a default of 1MB. ZFS will not allow
setting recordsize greater than zfs_max_recordsize as a safety
belt, because larger recordsize means greater read and write
latency and more memory usage.
Please note that booting from datasets that have recordsize greater
than 128KB is not supported (but it's Okay to enable the feature on
the pool).
Limited safety belt is provided for mounted root filesystem but use
caution when using a larger value.
Illumos issue:
5027 zfs large block support
Diffstat (limited to 'cddl/contrib/opensolaris/lib/libzfs/common')
3 files changed, 39 insertions, 18 deletions
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index ef18b45..8a707d1 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -609,6 +609,9 @@ typedef struct sendflags { /* show progress (ie. -v) */ boolean_t progress; + /* large blocks (>128K) are permitted */ + boolean_t largeblock; + /* WRITE_EMBEDDED records of type DATA are permitted */ boolean_t embed_data; } sendflags_t; diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c index d7126bf..bf6bfd4 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c @@ -1080,21 +1080,36 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; } - case ZFS_PROP_RECORDSIZE: case ZFS_PROP_VOLBLOCKSIZE: - /* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */ + case ZFS_PROP_RECORDSIZE: + { + int maxbs = SPA_MAXBLOCKSIZE; + if (zhp != NULL) { + maxbs = zpool_get_prop_int(zhp->zpool_hdl, + ZPOOL_PROP_MAXBLOCKSIZE, NULL); + } + /* + * Volumes are limited to a volblocksize of 128KB, + * because they typically service workloads with + * small random writes, which incur a large performance + * penalty with large blocks. + */ + if (prop == ZFS_PROP_VOLBLOCKSIZE) + maxbs = SPA_OLD_MAXBLOCKSIZE; + /* + * The value must be a power of two between + * SPA_MINBLOCKSIZE and maxbs. + */ if (intval < SPA_MINBLOCKSIZE || - intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) { + intval > maxbs || !ISP2(intval)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be power of 2 from %u " - "to %uk"), propname, - (uint_t)SPA_MINBLOCKSIZE, - (uint_t)SPA_MAXBLOCKSIZE >> 10); + "'%s' must be power of 2 from 512B " + "to %uKB"), propname, maxbs >> 10); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; - + } case ZFS_PROP_MLSLABEL: { #ifdef sun @@ -1465,7 +1480,9 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, break; case ERANGE: - if (prop == ZFS_PROP_COMPRESSION) { + case EDOM: + if (prop == ZFS_PROP_COMPRESSION || + prop == ZFS_PROP_RECORDSIZE) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "bootable datasets")); @@ -3191,9 +3208,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, case EDOM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume block size must be power of 2 from " - "%u to %uk"), - (uint_t)SPA_MINBLOCKSIZE, - (uint_t)SPA_MAXBLOCKSIZE >> 10); + "512B to 128KB")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c index 97f18d7..91857b6 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -215,7 +215,7 @@ static void * cksummer(void *arg) { dedup_arg_t *dda = arg; - char *buf = malloc(1<<20); + char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -280,9 +280,9 @@ cksummer(void *arg) DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { int sz = drr->drr_payloadlen; - if (sz > 1<<20) { - free(buf); - buf = malloc(sz); + if (sz > SPA_MAXBLOCKSIZE) { + buf = zfs_realloc(dda->dedup_hdl, buf, + SPA_MAXBLOCKSIZE, sz); } (void) ssread(buf, sz, ofp); if (ferror(stdin)) @@ -815,7 +815,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable, progress, embed_data; + boolean_t verbose, dryrun, parsable, progress, embed_data, large_block; int outfd; boolean_t err; nvlist_t *fss; @@ -1163,6 +1163,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) } enum lzc_send_flags flags = 0; + if (sdd->large_block) + flags |= LZC_SEND_FLAG_LARGE_BLOCK; if (sdd->embed_data) flags |= LZC_SEND_FLAG_EMBED_DATA; @@ -1511,6 +1513,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.parsable = flags->parsable; sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; + sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; @@ -2545,7 +2548,7 @@ static int recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; - void *buf = malloc(1<<20); + void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, |