summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2016-10-14 07:40:20 +0000
committermav <mav@FreeBSD.org>2016-10-14 07:40:20 +0000
commit1f5debbf28788b08460cdb78b99f5a4f0d8bd448 (patch)
tree0cf07e3b365925875e4014646fffa08b9893cd13 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
parent6ddf83c30a58a45a62eb5ec40328af32f370b376 (diff)
downloadFreeBSD-src-1f5debbf28788b08460cdb78b99f5a4f0d8bd448.zip
FreeBSD-src-1f5debbf28788b08460cdb78b99f5a4f0d8bd448.tar.gz
MFC r305340: MFC r305337:
7004 dmu_tx_hold_zap() does dnode_hold() 7x on same object Using a benchmark which has 32 threads creating 2 million files in the same directory, on a machine with 16 CPU cores, I observed poor performance. I noticed that dmu_tx_hold_zap() was using about 30% of all CPU, and doing dnode_hold() 7 times on the same object (the ZAP object that is being held). dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the dnode_t that we already have in hand, rather than repeatedly calling dnode_hold(). To do this, we need to pass the dnode_t down through all the intermediate calls that dmu_tx_hold_zap() makes, making these routines take the dnode_t* rather than an objset_t* and a uint64_t object number. In particular, the following routines will need to have analogous *_by_dnode() variants created: dmu_buf_hold_noread() dmu_buf_hold() zap_lookup() zap_lookup_norm() zap_count_write() zap_lockdir() zap_count_write() This can improve performance on the benchmark described above by 100%, from 30,000 file creations per second to 60,000. (This improvement is on top of that provided by working around the object allocation issue. Peak performance of ~90,000 creations per second was observed with 8 CPUs; adding CPUs past that decreased performance due to lock contention.) The CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds to 40 CPU-seconds. Sponsored by: Intel Corp. Closes #109 Reviewed by: Steve Gonczi <steve.gonczi@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Ned Bass <bass6@llnl.gov> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Author: Matthew Ahrens <mahrens@delphix.com> openzfs/openzfs@d3e523d489a169ab36f9ec1b2a111a60a5563a9f
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c49
1 files changed, 47 insertions, 2 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
index 1634b62..12a0e75 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
@@ -533,6 +533,24 @@ zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
return (0);
}
+static int
+zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
+ krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
+{
+ dmu_buf_t *db;
+ int err;
+
+ err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ if (err != 0) {
+ return (err);
+ }
+ err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
+ if (err != 0) {
+ dmu_buf_rele(db, tag);
+ }
+ return (err);
+}
+
int
zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
@@ -858,6 +876,33 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
}
int
+zap_lookup_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf)
+{
+ return (zap_lookup_norm_by_dnode(dn, name, integer_size,
+ num_integers, buf, MT_EXACT, NULL, 0, NULL));
+}
+
+int
+zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ matchtype_t mt, char *realname, int rn_len,
+ boolean_t *ncp)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
+ FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_lookup_impl(zap, name, integer_size,
+ num_integers, buf, mt, realname, rn_len, ncp);
+ zap_unlockdir(zap, FTAG);
+ return (err);
+}
+
+int
zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints)
{
@@ -1428,7 +1473,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
}
int
-zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
+zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
refcount_t *towrite, refcount_t *tooverwrite)
{
zap_t *zap;
@@ -1457,7 +1502,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
* At present we are just evaluating the possibility of this operation
* and hence we do not want to trigger an upgrade.
*/
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE,
+ err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
FTAG, &zap);
if (err != 0)
return (err);
OpenPOWER on IntegriCloud