summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-10-03 07:29:56 +0000
committermav <mav@FreeBSD.org>2015-10-03 07:29:56 +0000
commit4a52577541ebbe34a9fa8abe3948a8514c63258c (patch)
tree8ed8ab1f80ef30fc8881b08dd4e4e6ea57f4f7c3 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
parent3529a708dddb3cca632de7b55ab67db8aa2f543d (diff)
downloadFreeBSD-src-4a52577541ebbe34a9fa8abe3948a8514c63258c.zip
FreeBSD-src-4a52577541ebbe34a9fa8abe3948a8514c63258c.tar.gz
MFC r286575: 5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> Author: Justin Gibbs <justing@spectralogic.com> illumos/illumos-gate@bc9014e6a81272073b9854d9f65dd59e18d18c35
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c81
1 files changed, 52 insertions, 29 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
index 8848677..4af50f5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -405,8 +406,9 @@ static dnode_t *
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object, dnode_handle_t *dnh)
{
- dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+ dnode_t *dn;
+ dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
ASSERT(!POINTER_IS_VALID(dn->dn_objset));
dn->dn_moved = 0;
@@ -443,13 +445,31 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock);
- list_insert_head(&os->os_dnodes, dn);
+ if (dnh->dnh_dnode != NULL) {
+ /* Lost the allocation race. */
+ mutex_exit(&os->os_lock);
+ kmem_cache_free(dnode_cache, dn);
+ return (dnh->dnh_dnode);
+ }
+
+ /*
+ * Exclude special dnodes from os_dnodes so an empty os_dnodes
+ * signifies that the special dnodes have no references from
+ * their children (the entries in os_dnodes). This allows
+ * dnode_destroy() to easily determine if the last child has
+ * been removed and then complete eviction of the objset.
+ */
+ if (!DMU_OBJECT_IS_SPECIAL(object))
+ list_insert_head(&os->os_dnodes, dn);
membar_producer();
+
/*
- * Everything else must be valid before assigning dn_objset makes the
- * dnode eligible for dnode_move().
+ * Everything else must be valid before assigning dn_objset
+ * makes the dnode eligible for dnode_move().
*/
dn->dn_objset = os;
+
+ dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock);
arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
@@ -463,12 +483,18 @@ static void
dnode_destroy(dnode_t *dn)
{
objset_t *os = dn->dn_objset;
+ boolean_t complete_os_eviction = B_FALSE;
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
mutex_enter(&os->os_lock);
POINTER_INVALIDATE(&dn->dn_objset);
- list_remove(&os->os_dnodes, dn);
+ if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ list_remove(&os->os_dnodes, dn);
+ complete_os_eviction =
+ list_is_empty(&os->os_dnodes) &&
+ list_link_active(&os->os_evicting_node);
+ }
mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */
@@ -503,6 +529,9 @@ dnode_destroy(dnode_t *dn)
dmu_zfetch_rele(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
+
+ if (complete_os_eviction)
+ dmu_objset_evict_done(os);
}
void
@@ -971,33 +1000,32 @@ dnode_special_close(dnode_handle_t *dnh)
*/
while (refcount_count(&dn->dn_holds) > 0)
delay(1);
+ ASSERT(dn->dn_dbuf == NULL ||
+ dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
zrl_add(&dnh->dnh_zrlock);
dnode_destroy(dn); /* implicit zrl_remove() */
zrl_destroy(&dnh->dnh_zrlock);
dnh->dnh_dnode = NULL;
}
-dnode_t *
+void
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_handle_t *dnh)
{
- dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
- dnh->dnh_dnode = dn;
+ dnode_t *dn;
+
+ dn = dnode_create(os, dnp, NULL, object, dnh);
zrl_init(&dnh->dnh_zrlock);
DNODE_VERIFY(dn);
- return (dn);
}
static void
-dnode_buf_pageout(dmu_buf_t *db, void *arg)
+dnode_buf_pageout(void *dbu)
{
- dnode_children_t *children_dnodes = arg;
+ dnode_children_t *children_dnodes = dbu;
int i;
- int epb = db->db_size >> DNODE_SHIFT;
- ASSERT(epb == children_dnodes->dnc_count);
-
- for (i = 0; i < epb; i++) {
+ for (i = 0; i < children_dnodes->dnc_count; i++) {
dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
dnode_t *dn;
@@ -1027,7 +1055,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
dnh->dnh_dnode = NULL;
}
kmem_free(children_dnodes, sizeof (dnode_children_t) +
- epb * sizeof (dnode_handle_t));
+ children_dnodes->dnc_count * sizeof (dnode_handle_t));
}
/*
@@ -1117,10 +1145,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dnh = &children_dnodes->dnc_children[0];
for (i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock);
- dnh[i].dnh_dnode = NULL;
}
- if (winner = dmu_buf_set_user(&db->db, children_dnodes,
- dnode_buf_pageout)) {
+ dmu_buf_init_user(&children_dnodes->dnc_dbu,
+ dnode_buf_pageout, NULL);
+ winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
+ if (winner != NULL) {
for (i = 0; i < epb; i++) {
zrl_destroy(&dnh[i].dnh_zrlock);
@@ -1135,17 +1164,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dnh = &children_dnodes->dnc_children[idx];
zrl_add(&dnh->dnh_zrlock);
- if ((dn = dnh->dnh_dnode) == NULL) {
+ dn = dnh->dnh_dnode;
+ if (dn == NULL) {
dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
- dnode_t *winner;
dn = dnode_create(os, phys, db, object, dnh);
- winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
- if (winner != NULL) {
- zrl_add(&dnh->dnh_zrlock);
- dnode_destroy(dn); /* implicit zrl_remove() */
- dn = winner;
- }
}
mutex_enter(&dn->dn_mtx);
@@ -1159,10 +1182,10 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
}
- mutex_exit(&dn->dn_mtx);
-
if (refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
+ mutex_exit(&dn->dn_mtx);
+
/* Now we can rely on the hold to prevent the dnode from moving. */
zrl_remove(&dnh->dnh_zrlock);
OpenPOWER on IntegriCloud