summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2014-08-02 03:59:35 +0000
committerdelphij <delphij@FreeBSD.org>2014-08-02 03:59:35 +0000
commitb798cd9bfe5eed52bd7d48badb239999a0d81389 (patch)
tree01383cd9af81395a0eb20cc45196540ea6bcb0db
parent6a949e106dc93d9fa177f78a454fab1d92b5c5e7 (diff)
downloadFreeBSD-src-b798cd9bfe5eed52bd7d48badb239999a0d81389.zip
FreeBSD-src-b798cd9bfe5eed52bd7d48badb239999a0d81389.tar.gz
MFC r268858: MFV r268850:
Change the interaction between the DMU and ARC so that when the DMU is shutting down an objset, we do not evict the data from the ARC. Instead we simply coordinate the destruction of the DMU's data with the ARC. The only case where we actually need to explicitly evict from the ARC is when dbuf_rele_and_unlock() determines that the administrator has requested that it not be kept in memory, via the primarycache/secondarycache properties. In this case, we evict the data from the ARC by its blkptr_t, the same way as when a block is freed we explicitly evict it from the ARC. Illumos issue: 4631 zvol_get_stats triggering too many reads
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c104
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c38
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h3
3 files changed, 64 insertions, 81 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 7978bdb..edc13f3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -104,7 +104,7 @@
* with the buffer may be evicted prior to the callback. The callback
* must be made with *no locks held* (to prevent deadlock). Additionally,
* the users of callbacks must ensure that their private data is
- * protected from simultaneous callbacks from arc_buf_evict()
+ * protected from simultaneous callbacks from arc_clear_callback()
* and arc_do_user_evicts().
*
* Note that the majority of the performance stats are manipulated
@@ -1647,8 +1647,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
}
}
+/*
+ * Free up buf->b_data and if 'remove' is set, then pull the
+ * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
+ */
static void
-arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
+arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
{
arc_buf_t **bufp;
@@ -1701,7 +1705,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
}
/* only remove the buf if requested */
- if (!all)
+ if (!remove)
return;
/* remove the buf from the hdr list */
@@ -2355,7 +2359,7 @@ restart:
mutex_exit(&buf->b_evict_lock);
if (buf->b_efunc != NULL)
- VERIFY(buf->b_efunc(buf) == 0);
+ VERIFY0(buf->b_efunc(buf->b_private));
buf->b_efunc = NULL;
buf->b_private = NULL;
@@ -3490,16 +3494,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
}
/*
- * This is used by the DMU to let the ARC know that a buffer is
- * being evicted, so the ARC should clean up. If this arc buf
- * is not yet in the evicted state, it will be put there.
+ * Clear the user eviction callback set by arc_set_callback(), first calling
+ * it if it exists. Because the presence of a callback keeps an arc_buf cached
+ * clearing the callback may result in the arc_buf being destroyed. However,
+ * it will not result in the *last* arc_buf being destroyed, hence the data
+ * will remain cached in the ARC. We make a copy of the arc buffer here so
+ * that we can process the callback without holding any locks.
+ *
+ * It's possible that the callback is already in the process of being cleared
+ * by another thread. In this case we can not clear the callback.
+ *
+ * Returns B_TRUE if the callback was successfully called and cleared.
*/
-int
-arc_buf_evict(arc_buf_t *buf)
+boolean_t
+arc_clear_callback(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr;
kmutex_t *hash_lock;
- arc_buf_t **bufp;
+ arc_evict_func_t *efunc = buf->b_efunc;
+ void *private = buf->b_private;
list_t *list, *evicted_list;
kmutex_t *lock, *evicted_lock;
@@ -3511,17 +3524,16 @@ arc_buf_evict(arc_buf_t *buf)
*/
ASSERT(buf->b_data == NULL);
mutex_exit(&buf->b_evict_lock);
- return (0);
+ return (B_FALSE);
} else if (buf->b_data == NULL) {
- arc_buf_t copy = *buf; /* structure assignment */
/*
* We are on the eviction list; process this buffer now
* but let arc_do_user_evicts() do the reaping.
*/
buf->b_efunc = NULL;
mutex_exit(&buf->b_evict_lock);
- VERIFY(copy.b_efunc(&copy) == 0);
- return (1);
+ VERIFY0(efunc(private));
+ return (B_TRUE);
}
hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
@@ -3531,50 +3543,21 @@ arc_buf_evict(arc_buf_t *buf)
ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
- /*
- * Pull this buffer off of the hdr
- */
- bufp = &hdr->b_buf;
- while (*bufp != buf)
- bufp = &(*bufp)->b_next;
- *bufp = buf->b_next;
-
- ASSERT(buf->b_data != NULL);
- arc_buf_destroy(buf, FALSE, FALSE);
-
- if (hdr->b_datacnt == 0) {
- arc_state_t *old_state = hdr->b_state;
- arc_state_t *evicted_state;
-
- ASSERT(hdr->b_buf == NULL);
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
-
- evicted_state =
- (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
-
- get_buf_info(hdr, old_state, &list, &lock);
- get_buf_info(hdr, evicted_state, &evicted_list, &evicted_lock);
- mutex_enter(lock);
- mutex_enter(evicted_lock);
-
- arc_change_state(evicted_state, hdr, hash_lock);
- ASSERT(HDR_IN_HASH_TABLE(hdr));
- hdr->b_flags |= ARC_IN_HASH_TABLE;
- hdr->b_flags &= ~ARC_BUF_AVAILABLE;
+ buf->b_efunc = NULL;
+ buf->b_private = NULL;
- mutex_exit(evicted_lock);
- mutex_exit(lock);
+ if (hdr->b_datacnt > 1) {
+ mutex_exit(&buf->b_evict_lock);
+ arc_buf_destroy(buf, FALSE, TRUE);
+ } else {
+ ASSERT(buf == hdr->b_buf);
+ hdr->b_flags |= ARC_BUF_AVAILABLE;
+ mutex_exit(&buf->b_evict_lock);
}
- mutex_exit(hash_lock);
- mutex_exit(&buf->b_evict_lock);
- VERIFY(buf->b_efunc(buf) == 0);
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_hdr = NULL;
- buf->b_next = NULL;
- kmem_cache_free(buf_cache, buf);
- return (1);
+ mutex_exit(hash_lock);
+ VERIFY0(efunc(private));
+ return (B_TRUE);
}
/*
@@ -3724,17 +3707,6 @@ arc_released(arc_buf_t *buf)
return (released);
}
-int
-arc_has_callback(arc_buf_t *buf)
-{
- int callback;
-
- mutex_enter(&buf->b_evict_lock);
- callback = (buf->b_efunc != NULL);
- mutex_exit(&buf->b_evict_lock);
- return (callback);
-}
-
#ifdef ZFS_DEBUG
int
arc_referenced(arc_buf_t *buf)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 5ab6a65..b681ea4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -181,8 +181,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}
/*
- * Remove an entry from the hash table. This operation will
- * fail if there are any existing holds on the db.
+ * Remove an entry from the hash table. It must be in the EVICTING state.
*/
static void
dbuf_hash_remove(dmu_buf_impl_t *db)
@@ -194,7 +193,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
dmu_buf_impl_t *dbf, **dbp;
/*
- * We musn't hold db_mtx to maintin lock ordering:
+ * We musn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(refcount_is_zero(&db->db_holds));
@@ -431,7 +430,6 @@ static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
db->db_buf = buf;
if (buf != NULL) {
ASSERT(buf->b_data != NULL);
@@ -1544,12 +1542,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
* when we are not holding the dn_dbufs_mtx, we can't clear the
* entry in the dn_dbufs list. We have to wait until dbuf_destroy()
* in this case. For callers from the DMU we will usually see:
- * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
+ * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
* For the arc callback, we will usually see:
* dbuf_do_evict()->dbuf_clear();dbuf_destroy()
* Sometimes, though, we will get a mix of these two:
- * DMU: dbuf_clear()->arc_buf_evict()
+ * DMU: dbuf_clear()->arc_clear_callback()
* ARC: dbuf_do_evict()->dbuf_destroy()
+ *
+ * This routine will dissociate the dbuf from the arc, by calling
+ * arc_clear_callback(), but will not evict the data from the ARC.
*/
void
dbuf_clear(dmu_buf_impl_t *db)
@@ -1557,7 +1558,7 @@ dbuf_clear(dmu_buf_impl_t *db)
dnode_t *dn;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb;
- int dbuf_gone = FALSE;
+ boolean_t dbuf_gone = B_FALSE;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(refcount_is_zero(&db->db_holds));
@@ -1603,7 +1604,7 @@ dbuf_clear(dmu_buf_impl_t *db)
}
if (db->db_buf)
- dbuf_gone = arc_buf_evict(db->db_buf);
+ dbuf_gone = arc_clear_callback(db->db_buf);
if (!dbuf_gone)
mutex_exit(&db->db_mtx);
@@ -1771,8 +1772,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
static int
dbuf_do_evict(void *private)
{
- arc_buf_t *buf = private;
- dmu_buf_impl_t *db = buf->b_private;
+ dmu_buf_impl_t *db = private;
if (!MUTEX_HELD(&db->db_mtx))
mutex_enter(&db->db_mtx);
@@ -2135,11 +2135,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* block on-disk. If so, then we simply evict
* ourselves.
*/
- if (!DBUF_IS_CACHEABLE(db) ||
- arc_buf_eviction_needed(db->db_buf))
+ if (!DBUF_IS_CACHEABLE(db)) {
+ if (db->db_blkptr != NULL &&
+ !BP_IS_HOLE(db->db_blkptr) &&
+ !BP_IS_EMBEDDED(db->db_blkptr)) {
+ spa_t *spa =
+ dmu_objset_spa(db->db_objset);
+ blkptr_t bp = *db->db_blkptr;
+ dbuf_clear(db);
+ arc_freed(spa, &bp);
+ } else {
+ dbuf_clear(db);
+ }
+ } else if (arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
- else
+ } else {
mutex_exit(&db->db_mtx);
+ }
}
} else {
mutex_exit(&db->db_mtx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
index c34db6d..8bf07c3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
@@ -95,7 +95,6 @@ boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
-int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
@@ -114,7 +113,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
void arc_freed(spa_t *spa, const blkptr_t *bp);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv);
-int arc_buf_evict(arc_buf_t *buf);
+boolean_t arc_clear_callback(arc_buf_t *buf);
void arc_flush(spa_t *spa);
void arc_tempreserve_clear(uint64_t reserve);
OpenPOWER on IntegriCloud