diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c | 228 |
1 files changed, 197 insertions, 31 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c index d3efa55..1b246c8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c @@ -49,7 +49,14 @@ #include <sys/zfs_onexit.h> #include <sys/dmu_send.h> #include <sys/dsl_destroy.h> +#include <sys/blkptr.h> #include <sys/dsl_bookmark.h> +#include <sys/zfeature.h> + +#ifdef __FreeBSD__ +#undef dump_write +#define dump_write dmu_dump_write +#endif /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ int zfs_send_corrupt_data = B_FALSE; @@ -184,7 +191,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, +dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) { struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); @@ -219,13 +226,22 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_offset = offset; drrw->drr_length = blksz; drrw->drr_toguid = dsp->dsa_toguid; - drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); - if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) - drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; - DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); - DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); - DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); - drrw->drr_key.ddk_cksum = bp->blk_cksum; + if (BP_IS_EMBEDDED(bp)) { + /* + * There's no pre-computed checksum of embedded BP's, so + * (like fletcher4-checkummed blocks) userland will have + * to compute a dedup-capable checksum itself. + */ + drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; + } else { + drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); + if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) + drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; + DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); + DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); + DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); + drrw->drr_key.ddk_cksum = bp->blk_cksum; + } if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (SET_ERROR(EINTR)); @@ -235,6 +251,43 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, } static int +dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, + int blksz, const blkptr_t *bp) +{ + char buf[BPE_PAYLOAD_SIZE]; + struct drr_write_embedded *drrw = + &(dsp->dsa_drr->drr_u.drr_write_embedded); + + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + dsp->dsa_pending_op = PENDING_NONE; + } + + ASSERT(BP_IS_EMBEDDED(bp)); + + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED; + drrw->drr_object = object; + drrw->drr_offset = offset; + drrw->drr_length = blksz; + drrw->drr_toguid = dsp->dsa_toguid; + drrw->drr_compression = BP_GET_COMPRESS(bp); + drrw->drr_etype = BPE_GET_ETYPE(bp); + drrw->drr_lsize = BPE_GET_LSIZE(bp); + drrw->drr_psize = BPE_GET_PSIZE(bp); + + decode_embedded_bp_compressed(bp, buf); + + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) + return (EINTR); + if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) + return (EINTR); + return (0); +} + +static int dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) { struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); @@ -354,6 +407,33 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) return (0); } +static boolean_t +backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) +{ + if (!BP_IS_EMBEDDED(bp)) + return (B_FALSE); + + /* + * Compression function must be legacy, or explicitly enabled. + */ + if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS && + !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4))) + return (B_FALSE); + + /* + * Embed type must be explicitly enabled. + */ + switch (BPE_GET_ETYPE(bp)) { + case BP_EMBEDDED_TYPE_DATA: + if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) + return (B_TRUE); + break; + default: + return (B_FALSE); + } + return (B_FALSE); +} + #define BP_SPAN(dnp, level) \ (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) @@ -422,11 +502,17 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); + } else if (backup_do_embed(dsp, bp)) { + /* it's an embedded level-0 block of a regular object */ + int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; + err = dump_write_embedded(dsp, zb->zb_object, + zb->zb_blkid * blksz, blksz, bp); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); + ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); ASSERT0(zb->zb_level); if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, @@ -445,7 +531,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } } - err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, + err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } @@ -459,12 +545,11 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, + zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok, #ifdef illumos - zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, - vnode_t *vp, offset_t *off) + int outfd, vnode_t *vp, offset_t *off) #else - zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, - struct file *fp, offset_t *off) + int outfd, struct file *fp, offset_t *off) #endif { objset_t *os; @@ -472,6 +557,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, dmu_sendarg_t *dsp; int err; uint64_t fromtxg = 0; + uint64_t featureflags = 0; err = dmu_objset_from_ds(ds, &os); if (err != 0) { @@ -494,13 +580,23 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, return (SET_ERROR(EINVAL)); } if (version >= ZPL_VERSION_SA) { - DMU_SET_FEATUREFLAGS( - drr->drr_u.drr_begin.drr_versioninfo, - DMU_BACKUP_FEATURE_SA_SPILL); + featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; } } #endif + if (embedok && + spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { + featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; + if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) + featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4; + } else { + embedok = B_FALSE; + } + + DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo, + featureflags); + drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); @@ -533,6 +629,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_incremental = (fromzb != NULL); + dsp->dsa_featureflags = featureflags; mutex_enter(&ds->ds_sendstream_lock); list_insert_head(&ds->ds_sendstreams, dsp); @@ -585,9 +682,9 @@ out: int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, #ifdef illumos - int outfd, vnode_t *vp, offset_t *off) + boolean_t embedok, int outfd, vnode_t *vp, offset_t *off) #else - int outfd, struct file *fp, offset_t *off) + boolean_t embedok, int outfd, struct file *fp, offset_t *off) #endif { dsl_pool_t *dp; @@ -622,10 +719,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, zb.zbm_guid = fromds->ds_phys->ds_guid; is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); - err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, outfd, fp, off); } else { - err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, outfd, fp, off); } dsl_dataset_rele(ds, FTAG); @@ -633,7 +730,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, } int -dmu_send(const char *tosnap, const char *fromsnap, +dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, #ifdef illumos int outfd, vnode_t *vp, offset_t *off) #else @@ -704,10 +801,10 @@ dmu_send(const char *tosnap, const char *fromsnap, dsl_pool_rele(dp, FTAG); return (err); } - err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, outfd, fp, off); } else { - err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, outfd, fp, off); } if (owned) @@ -877,6 +974,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) uint64_t fromguid = drrb->drr_fromguid; int flags = drrb->drr_flags; int error; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; const char *tofs = drba->drba_cookie->drc_tofs; @@ -890,11 +988,22 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EINVAL)); /* Verify pool version supports SA if SA_SPILL feature set */ - if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_SA_SPILL) && - spa_version(dp->dp_spa) < SPA_VERSION_SA) { + if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && + spa_version(dp->dp_spa) < SPA_VERSION_SA) + return (SET_ERROR(ENOTSUP)); + + /* + * The receiving code doesn't know how to translate a WRITE_EMBEDDED + * record to a plan WRITE record, so the pool must have the + * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED + * records. Same with WRITE_EMBEDDED records that use LZ4 compression. + */ + if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) + return (SET_ERROR(ENOTSUP)); + if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) return (SET_ERROR(ENOTSUP)); - } error = dsl_dataset_hold(dp, tofs, FTAG, &ds); if (error == 0) { @@ -1214,7 +1323,6 @@ backup_byteswap(dmu_replay_record_t *drr) break; case DRR_OBJECT: DO64(drr_object.drr_object); - /* DO64(drr_object.drr_allocation_txg); */ DO32(drr_object.drr_type); DO32(drr_object.drr_bonustype); DO32(drr_object.drr_blksz); @@ -1252,6 +1360,14 @@ backup_byteswap(dmu_replay_record_t *drr) DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); DO64(drr_write_byref.drr_key.ddk_prop); break; + case DRR_WRITE_EMBEDDED: + DO64(drr_write_embedded.drr_object); + DO64(drr_write_embedded.drr_offset); + DO64(drr_write_embedded.drr_length); + DO64(drr_write_embedded.drr_toguid); + DO32(drr_write_embedded.drr_lsize); + DO32(drr_write_embedded.drr_psize); + break; case DRR_FREE: DO64(drr_free.drr_object); DO64(drr_free.drr_offset); @@ -1439,7 +1555,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os, int err; guid_map_entry_t gmesrch; guid_map_entry_t *gmep; - avl_index_t where; + avl_index_t where; objset_t *ref_os = NULL; dmu_buf_t *dbp; @@ -1462,8 +1578,9 @@ restore_write_byref(struct restorearg *ra, objset_t *os, ref_os = os; } - if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, - drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) + err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, + drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); + if (err != 0) return (err); tx = dmu_tx_create(os); @@ -1483,6 +1600,48 @@ restore_write_byref(struct restorearg *ra, objset_t *os, } static int +restore_write_embedded(struct restorearg *ra, objset_t *os, + struct drr_write_embedded *drrwnp) +{ + dmu_tx_t *tx; + int err; + void *data; + + if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) + return (EINVAL); + + if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE) + return (EINVAL); + + if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES) + return (EINVAL); + if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) + return (EINVAL); + + data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); + if (data == NULL) + return (ra->err); + + tx = dmu_tx_create(os); + + dmu_tx_hold_write(tx, drrwnp->drr_object, + drrwnp->drr_offset, drrwnp->drr_length); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err != 0) { + dmu_tx_abort(tx); + return (err); + } + + dmu_write_embedded(os, drrwnp->drr_object, + drrwnp->drr_offset, data, drrwnp->drr_etype, + drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, + ra->byteswap ^ ZFS_HOST_BYTEORDER, tx); + + dmu_tx_commit(tx); + return (0); +} + +static int restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) { dmu_tx_t *tx; @@ -1677,6 +1836,13 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, ra.err = restore_write_byref(&ra, os, &drrwbr); break; } + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded drrwe = + drr->drr_u.drr_write_embedded; + ra.err = restore_write_embedded(&ra, os, &drrwe); + break; + } case DRR_FREE: { struct drr_free drrf = drr->drr_u.drr_free; |