summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-10-03 11:35:18 +0000
committermav <mav@FreeBSD.org>2015-10-03 11:35:18 +0000
commit3f77fc2885eaae036939a434b28be8c3f8b5ee87 (patch)
treef4daa96d6a365dec1c22cfb553fe3711918fc51c /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
parent2ceef49597891c2bd9ea4a774bafbf2091be51a6 (diff)
downloadFreeBSD-src-3f77fc2885eaae036939a434b28be8c3f8b5ee87.zip
FreeBSD-src-3f77fc2885eaae036939a434b28be8c3f8b5ee87.tar.gz
MFC r287702: 5987 zfs prefetch code needs work
Rewrite the ZFS prefetch code to detect only forward, sequential streams. The following kstats have been added: kstat.zfs.misc.arcstats.sync_wait_for_async How many sync reads have waited for async read to complete. (less is better) kstat.zfs.misc.arcstats.demand_hit_predictive_prefetch How many demand read didn't have to wait for I/O because of predictive prefetch. (more is better) zfetch kstats have been similified to hits, misses, and max_streams, with max_streams representing times when we were not able to create new stream because we already have the maximum number of sequences for a file. The sysctl variable/loader tunable vfs.zfs.zfetch.block_cap have been replaced by vfs.zfs.zfetch.max_distance, which controls maximum bytes to prefetch per stream. illumos/illumos-gate@cf6106c8a0d6598b045811f9650d66e07eb332af Illumos ZFS issues: 5987 zfs prefetch code needs work https://www.illumos.org/issues/5987
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index 2b61311..bdb85d8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -390,7 +390,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
- int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+ boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
@@ -400,15 +400,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
ASSERT(length <= DMU_MAX_ACCESS);
- dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
- if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
- dbuf_flags |= DB_RF_NOPREFETCH;
+ /*
+ * Note: We directly notify the prefetch code of this read, so that
+ * we can tell it about the multi-block read. dbuf_read() only knows
+ * about the one block it is accessing.
+ */
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
+ DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
- P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+ nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
+ P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;
} else {
if (offset + length > dn->dn_datablksz) {
zfs_panic_recover("zfs: accessing past end of object "
@@ -427,13 +431,14 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset);
for (i = 0; i < nblks; i++) {
- dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
+ dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
zio_nowait(zio);
return (SET_ERROR(EIO));
}
+
/* initiate async i/o */
if (read)
(void) dbuf_read(db, zio, dbuf_flags);
@@ -443,6 +448,11 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
#endif
dbp[i] = &db->db;
}
+
+ if ((flags & DMU_READ_NO_PREFETCH) == 0 && read &&
+ length < zfetch_array_rd_sz) {
+ dmu_zfetch(&dn->dn_zfetch, blkid, nblks);
+ }
rw_exit(&dn->dn_struct_rwlock);
/* wait for async i/o */
@@ -496,7 +506,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+ uint64_t length, boolean_t read, void *tag, int *numbufsp,
+ dmu_buf_t ***dbpp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
@@ -544,9 +555,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
uint64_t blkid;
int nblks, err;
- if (zfs_prefetch_disable)
- return;
-
if (len == 0) { /* they're interested in the bonus buffer */
dn = DMU_META_DNODE(os);
OpenPOWER on IntegriCloud