diff options
author | melifaro <melifaro@FreeBSD.org> | 2014-10-09 15:35:28 +0000 |
---|---|---|
committer | melifaro <melifaro@FreeBSD.org> | 2014-10-09 15:35:28 +0000 |
commit | d23efba7dd470e247cbcacf9ec6cf642d02d5b8c (patch) | |
tree | 6d2822f6a5ff6c5c487b859a557d076b53b03c46 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs | |
parent | cab1d703b61e2216f130c6d1a23ca70b2b322386 (diff) | |
parent | c47600c4ec062b8225ac1d48197a2a1de778760d (diff) | |
download | FreeBSD-src-d23efba7dd470e247cbcacf9ec6cf642d02d5b8c.zip FreeBSD-src-d23efba7dd470e247cbcacf9ec6cf642d02d5b8c.tar.gz |
Sync to HEAD@r272825.
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs')
6 files changed, 140 insertions, 75 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 1a1d4d8..60d2ff9 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -5047,7 +5047,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, if (ab == NULL) ARCSTAT_BUMP(arcstat_l2_write_buffer_list_null_iter); - headroom = target_sz * l2arc_headroom; + headroom = target_sz * l2arc_headroom * 2 / ARC_BUFC_NUMLISTS; if (do_headroom_boost) headroom = (headroom * l2arc_headroom_boost) / 100; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 4e67bf6..c86d3a7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1021,8 +1021,8 @@ xuio_stat_wbuf_nocopy() } #ifdef _KERNEL -int -dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +static int +dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) { dmu_buf_t **dbp; int numbufs, i, err; @@ -1032,8 +1032,8 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, + TRUE, FTAG, &numbufs, &dbp, 0); if (err) return (err); @@ -1080,6 +1080,58 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) return (err); } +/* + * Read 'size' bytes into the uio buffer. + * From object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_read_uio(), + * because we don't have to find the dnode_t for the object. + */ +int +dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_read_uio_dnode(dn, uio, size); + DB_DNODE_EXIT(db); + + return (err); +} + +/* + * Read 'size' bytes into the uio buffer. + * From the specified object + * Starting at offset uio->uio_loffset. + */ +int +dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +{ + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_read_uio_dnode(dn, uio, size); + + dnode_rele(dn, FTAG); + + return (err); +} + static int dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) { @@ -1132,6 +1184,15 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_write_uio(), + * because we don't have to find the dnode_t for the object. + */ int dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_tx_t *tx) @@ -1151,6 +1212,11 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To the specified object. + * Starting at offset uio->uio_loffset. + */ int dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, dmu_tx_t *tx) @@ -1852,6 +1918,7 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) doi->doi_indirection = dn->dn_nlevels; doi->doi_checksum = dn->dn_checksum; doi->doi_compress = dn->dn_compress; + doi->doi_nblkptr = dn->dn_nblkptr; doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9; doi->doi_max_offset = (dn->dn_maxblkid + 1) * dn->dn_datablksz; doi->doi_fill_count = 0; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c index 5b512ad..808864a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c @@ -20,7 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright 2014 HybridCluster. All rights reserved. */ #include <sys/dmu.h> @@ -107,11 +108,9 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, - int blocksize, dmu_object_type_t bonustype, int bonuslen) + int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { dnode_t *dn; - dmu_tx_t *tx; - int nblkptr; int err; if (object == DMU_META_DNODE_OBJECT) @@ -122,44 +121,9 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, if (err) return (err); - if (dn->dn_type == ot && dn->dn_datablksz == blocksize && - dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) { - /* nothing is changing, this is a noop */ - dnode_rele(dn, FTAG); - return (0); - } - - if (bonustype == DMU_OT_SA) { - nblkptr = 1; - } else { - nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); - } - - /* - * If we are losing blkptrs or changing the block size this must - * be a new file instance. We must clear out the previous file - * contents before we can change this type of metadata in the dnode. - */ - if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) { - err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END); - if (err) - goto out; - } - - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, object); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx); - dmu_tx_commit(tx); -out: dnode_rele(dn, FTAG); - return (err); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c index 314e73c..1a0cab5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright 2014 HybridCluster. All rights reserved. */ #include <sys/dmu.h> @@ -1392,12 +1393,25 @@ backup_byteswap(dmu_replay_record_t *drr) #undef DO32 } +static inline uint8_t +deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size) +{ + if (bonus_type == DMU_OT_SA) { + return (1); + } else { + return (1 + + ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT)); + } +} + static int restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) { - int err; + dmu_object_info_t doi; dmu_tx_t *tx; void *data = NULL; + uint64_t object; + int err; if (drro->drr_type == DMU_OT_NONE || !DMU_OT_IS_VALID(drro->drr_type) || @@ -1411,10 +1425,11 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) return (SET_ERROR(EINVAL)); } - err = dmu_object_info(os, drro->drr_object, NULL); + err = dmu_object_info(os, drro->drr_object, &doi); if (err != 0 && err != ENOENT) return (SET_ERROR(EINVAL)); + object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; if (drro->drr_bonuslen) { data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); @@ -1422,37 +1437,53 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) return (ra->err); } - if (err == ENOENT) { - /* currently free, want to be allocated */ - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err != 0) { - dmu_tx_abort(tx); - return (err); + /* + * If we are losing blkptrs or changing the block size this must + * be a new file instance. We must clear out the previous file + * contents before we can change this type of metadata in the dnode. + */ + if (err == 0) { + int nblkptr; + + nblkptr = deduce_nblkptr(drro->drr_bonustype, + drro->drr_bonuslen); + + if (drro->drr_blksz != doi.doi_data_block_size || + nblkptr < doi.doi_nblkptr) { + err = dmu_free_long_range(os, drro->drr_object, + 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); } + } + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err != 0) { + dmu_tx_abort(tx); + return (err); + } + + if (object == DMU_NEW_OBJECT) { + /* currently free, want to be allocated */ err = dmu_object_claim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); - dmu_tx_commit(tx); - } else { - /* currently allocated, want to be allocated */ + } else if (drro->drr_type != doi.doi_type || + drro->drr_blksz != doi.doi_data_block_size || + drro->drr_bonustype != doi.doi_bonus_type || + drro->drr_bonuslen != doi.doi_bonus_size) { + /* currently allocated, but with different properties */ err = dmu_object_reclaim(os, drro->drr_object, drro->drr_type, drro->drr_blksz, - drro->drr_bonustype, drro->drr_bonuslen); + drro->drr_bonustype, drro->drr_bonuslen, tx); } if (err != 0) { + dmu_tx_commit(tx); return (SET_ERROR(EINVAL)); } - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, drro->drr_object); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err != 0) { - dmu_tx_abort(tx); - return (err); - } - dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, tx); dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h index 2c0b5ae..ad19266 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h @@ -25,6 +25,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2013 DEY Storage Systems, Inc. + * Copyright 2014 HybridCluster. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -341,7 +342,7 @@ uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, - int blocksize, dmu_object_type_t bonustype, int bonuslen); + int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp); /* * Free an object from this objset. @@ -616,6 +617,7 @@ void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); +int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size); int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, dmu_tx_t *tx); int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, @@ -661,7 +663,8 @@ typedef struct dmu_object_info { uint8_t doi_indirection; /* 2 = dnode->indirect->data */ uint8_t doi_checksum; uint8_t doi_compress; - uint8_t doi_pad[5]; + uint8_t doi_nblkptr; + uint8_t doi_pad[4]; uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */ uint64_t doi_max_offset; uint64_t doi_fill_count; /* number of non-empty blocks */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 109a339..5fec709 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -582,7 +582,6 @@ static int mappedread(vnode_t *vp, int nbytes, uio_t *uio) { znode_t *zp = VTOZ(vp); - objset_t *os = zp->z_zfsvfs->z_os; vm_object_t obj; int64_t start; caddr_t va; @@ -613,7 +612,8 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) page_unhold(pp); } else { zfs_vmobject_wunlock(obj); - error = dmu_read_uio(os, zp->z_id, uio, bytes); + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, bytes); zfs_vmobject_wlock(obj); } len -= bytes; @@ -650,7 +650,6 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - objset_t *os; ssize_t n, nbytes; int error = 0; rl_t *rl; @@ -658,7 +657,6 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); - os = zfsvfs->z_os; if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zfsvfs); @@ -756,10 +754,12 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) error = mappedread_sf(vp, nbytes, uio); else #endif /* __FreeBSD__ */ - if (vn_has_cached_data(vp)) + if (vn_has_cached_data(vp)) { error = mappedread(vp, nbytes, uio); - else - error = dmu_read_uio(os, zp->z_id, uio, nbytes); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes); + } if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) |