summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2016-10-14 07:40:20 +0000
committermav <mav@FreeBSD.org>2016-10-14 07:40:20 +0000
commit1f5debbf28788b08460cdb78b99f5a4f0d8bd448 (patch)
tree0cf07e3b365925875e4014646fffa08b9893cd13 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
parent6ddf83c30a58a45a62eb5ec40328af32f370b376 (diff)
downloadFreeBSD-src-1f5debbf28788b08460cdb78b99f5a4f0d8bd448.zip
FreeBSD-src-1f5debbf28788b08460cdb78b99f5a4f0d8bd448.tar.gz
MFC r305340: MFC r305337:
7004 dmu_tx_hold_zap() does dnode_hold() 7x on same object Using a benchmark which has 32 threads creating 2 million files in the same directory, on a machine with 16 CPU cores, I observed poor performance. I noticed that dmu_tx_hold_zap() was using about 30% of all CPU, and doing dnode_hold() 7 times on the same object (the ZAP object that is being held). dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the dnode_t that we already have in hand, rather than repeatedly calling dnode_hold(). To do this, we need to pass the dnode_t down through all the intermediate calls that dmu_tx_hold_zap() makes, making these routines take the dnode_t* rather than an objset_t* and a uint64_t object number. In particular, the following routines will need to have analogous *_by_dnode() variants created: dmu_buf_hold_noread() dmu_buf_hold() zap_lookup() zap_lookup_norm() zap_count_write() zap_lockdir() zap_count_write() This can improve performance on the benchmark described above by 100%, from 30,000 file creations per second to 60,000. (This improvement is on top of that provided by working around the object allocation issue. Peak performance of ~90,000 creations per second was observed with 8 CPUs; adding CPUs past that decreased performance due to lock contention.) The CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds to 40 CPU-seconds. Sponsored by: Intel Corp. Closes #109 Reviewed by: Steve Gonczi <steve.gonczi@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Ned Bass <bass6@llnl.gov> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Author: Matthew Ahrens <mahrens@delphix.com> openzfs/openzfs@d3e523d489a169ab36f9ec1b2a111a60a5563a9f
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
index cab8c92..bf68c9f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
@@ -270,6 +270,7 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
uint64_t blk, off;
int err;
dmu_buf_t *db;
+ dnode_t *dn;
int bs = FZAP_BLOCK_SHIFT(zap);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
@@ -277,8 +278,15 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
blk = idx >> (bs-3);
off = idx & ((1<<(bs-3))-1);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ /*
+ * Note: this is equivalent to dmu_buf_hold(), but we use
+ * _dnode_enter / _by_dnode because it's faster because we don't
+ * have to hold the dnode.
+ */
+ dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);
*valp = ((uint64_t *)db->db_data)[off];
@@ -292,9 +300,11 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
*/
blk = (idx*2) >> (bs-3);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_nextblk + blk) << bs, FTAG, &db,
DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err == 0)
dmu_buf_rele(db, FTAG);
}
@@ -505,8 +515,10 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);
OpenPOWER on IntegriCloud