diff options
author | avg <avg@FreeBSD.org> | 2012-11-25 15:33:26 +0000 |
---|---|---|
committer | avg <avg@FreeBSD.org> | 2012-11-25 15:33:26 +0000 |
commit | 084a0f18984bed2d19aa53235076dfa52b421c19 (patch) | |
tree | 1331b512bc501052d2dc40eb74864f828d92d199 /sys/cddl | |
parent | 45cac5be8a3800b819ee829ace85084735f04f63 (diff) | |
download | FreeBSD-src-084a0f18984bed2d19aa53235076dfa52b421c19.zip FreeBSD-src-084a0f18984bed2d19aa53235076dfa52b421c19.tar.gz |
zfs: overhaul zfs-vfs glue for vnode life-cycle management
* There is no need for the delayed destruction of znodes via taskqueue,
now that we do not need to fear recursion from getnewvnode into
zfs_inactive and zfs_freebsd_reclaim, thus making znode/vnode state
machine a bit simpler.
* More complete porting of zfs_inactive from Solaris VFS model to FreeBSD
vop_inactive and vop_reclaim model. All destructive actions are done
in zfs_freebsd_reclaim.
This allows to simplify zfs_zget logic.
* Allow zfs_zget to return a doomed vnode if the current thread already
has an exclusive lock on the vnode.
* Clean up Solaris-isms like bailing out of reclaim/inactive on certain
values of v_usecount (aka v_count) or directly messing with this counter.
* Do not clear z_vnode while znode is still accessible.
z_vnode should be cleared only after zfs_znode_dmu_fini.
Otherwise zfs_zget may get an effectively half-deconstructed znode.
This allows to simplify zfs_zget logic further.
The above changes fix at least two known/reported problems:
o An indefinite wait in the following code path:
vgone -> VOP_RECLAIM -> zfs_freebsd_reclaim -> vnode_destroy_vobject ->
put_pages -> zfs_write -> zil_commit -> zfs_zget
This happened because vgone marks a vnode as VI_DOOMED before calling
VOP_RECLAIM, but zfs_zget would not return a doomed vnode under any
circumstances.
The fix in this change is not complete as it won't fix a deadlock between
two threads doing VOP_RECLAIM where one thread is in zil_commit trying to
zfs_zget a znode/vnode being reclaimed by the other thread, which would be
blocked trying to enter zil_commit. This type of deadlock has not been
reported as of now.
o An indefinite wait in the unmount path caused by a znode "falling through
the cracks" in inactive+reclaim. This would happen if the znode is unlinked
while its vnode is still active.
To Do: pass locking flags parameter to zfs_zget, so that the zfs-vfs
glue code doesn't have to re-lock a vnode but could ask for proper locking
from the very start. This would also allow for the higher level code to
obtain a doomed vnode when it is expected/requested. Or to avoid blocking
when it is not allowed (see zil_commit example above).
ffs_vgetf seems like a good source of inspiration.
Tested by: Willem Jan Withagen <wjw@digiware.nl>
MFC after: 6 weeks
Diffstat (limited to 'sys/cddl')
4 files changed, 58 insertions, 137 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index d3955d7..6281f8e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -207,8 +207,6 @@ typedef struct znode { list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ boolean_t z_is_sa; /* are we native sa? */ - /* FreeBSD-specific field. */ - struct task z_task; } znode_t; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index b8f6774..72c22dc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -1847,18 +1847,6 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zfsvfs->z_unmounted = B_TRUE; rrw_exit(&zfsvfs->z_teardown_lock, FTAG); rw_exit(&zfsvfs->z_teardown_inactive_lock); - -#ifdef __FreeBSD__ - /* - * Some znodes might not be fully reclaimed, wait for them. - */ - mutex_enter(&zfsvfs->z_znodes_lock); - while (list_head(&zfsvfs->z_all_znodes) != NULL) { - msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, - "zteardown", 0); - } - mutex_exit(&zfsvfs->z_znodes_lock); -#endif } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index ba45a55..99c0c29 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -4566,14 +4566,22 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) * The fs has been unmounted, or we did a * suspend/resume and this file no longer exists. */ - VI_LOCK(vp); - ASSERT(vp->v_count <= 1); - vp->v_count = 0; - VI_UNLOCK(vp); + rw_exit(&zfsvfs->z_teardown_inactive_lock); vrecycle(vp); + return; + } + + mutex_enter(&zp->z_lock); + if (zp->z_unlinked) { + /* + * Fast path to recycle a vnode of a removed file. + */ + mutex_exit(&zp->z_lock); rw_exit(&zfsvfs->z_teardown_inactive_lock); + vrecycle(vp); return; } + mutex_exit(&zp->z_lock); if (zp->z_atime_dirty && zp->z_unlinked == 0) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); @@ -4592,8 +4600,6 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) dmu_tx_commit(tx); } } - - zfs_zinactive(zp); rw_exit(&zfsvfs->z_teardown_inactive_lock); } @@ -6196,28 +6202,6 @@ zfs_freebsd_inactive(ap) return (0); } -static void -zfs_reclaim_complete(void *arg, int pending) -{ - znode_t *zp = arg; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); - if (zp->z_sa_hdl != NULL) { - ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); - zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); - } - zfs_znode_free(zp); - rw_exit(&zfsvfs->z_teardown_inactive_lock); - /* - * If the file system is being unmounted, there is a process waiting - * for us, wake it up. - */ - if (zfsvfs->z_unmounted) - wakeup_one(zfsvfs); -} - static int zfs_freebsd_reclaim(ap) struct vop_reclaim_args /* { @@ -6228,53 +6212,26 @@ zfs_freebsd_reclaim(ap) vnode_t *vp = ap->a_vp; znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - boolean_t rlocked; - - rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + int refcnt; ASSERT(zp != NULL); - /* - * Destroy the vm object and flush associated pages. - */ + /* Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); - mutex_enter(&zp->z_lock); - zp->z_vnode = NULL; - mutex_exit(&zp->z_lock); - - if (zp->z_unlinked) { - ; /* Do nothing. */ - } else if (!rlocked) { - TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); - taskqueue_enqueue(taskqueue_thread, &zp->z_task); - } else if (zp->z_sa_hdl == NULL) { + /* + * z_teardown_inactive_lock protects from a race with + * zfs_znode_dmu_fini in zfsvfs_teardown during + * force unmount. + */ + rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + if (zp->z_sa_hdl == NULL) zfs_znode_free(zp); - } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { - int locked; + else + zfs_zinactive(zp); + rw_exit(&zfsvfs->z_teardown_inactive_lock); - locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : - ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); - if (locked == 0) { - /* - * Lock can't be obtained due to deadlock possibility, - * so defer znode destruction. - */ - TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); - taskqueue_enqueue(taskqueue_thread, &zp->z_task); - } else { - zfs_znode_dmu_fini(zp); - if (locked == 1) - ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); - zfs_znode_free(zp); - } - } - VI_LOCK(vp); vp->v_data = NULL; - ASSERT(vp->v_holdcnt >= 1); - VI_UNLOCK(vp); - if (rlocked) - rw_exit(&zfsvfs->z_teardown_inactive_lock); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index c2ed964..18e54d2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -1147,14 +1147,16 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; - int err; + vnode_t *vp; sa_handle_t *hdl; - int first = 1; - - *zpp = NULL; + struct thread *td; + int locked; + int err; + td = curthread; getnewvnode_reserve(1); again: + *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); @@ -1193,48 +1195,38 @@ again: if (zp->z_unlinked) { err = ENOENT; } else { - vnode_t *vp; - int dying = 0; - vp = ZTOV(zp); - if (vp == NULL) - dying = 1; - else { - VN_HOLD(vp); - if ((vp->v_iflag & VI_DOOMED) != 0) { - dying = 1; - /* - * Don't VN_RELE() vnode here, because - * it can call vn_lock() which creates - * LOR between vnode lock and znode - * lock. We will VN_RELE() the vnode - * after droping znode lock. - */ - } - } - if (dying) { - if (first) { - ZFS_LOG(1, "dying znode detected (zp=%p)", zp); - first = 0; - } - /* - * znode is dying so we can't reuse it, we must - * wait until destruction is completed. - */ - sa_buf_rele(db, NULL); - mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - if (vp != NULL) - VN_RELE(vp); - tsleep(zp, 0, "zcollide", 1); - goto again; - } *zpp = zp; err = 0; } sa_buf_rele(db, NULL); + + /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */ + if (err == 0) + VN_HOLD(vp); + mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + + if (err == 0) { + locked = VOP_ISLOCKED(vp); + VI_LOCK(vp); + if ((vp->v_iflag & VI_DOOMED) != 0 && + locked != LK_EXCLUSIVE) { + /* + * The vnode is doomed and this thread doesn't + * hold the exclusive lock on it, so the vnode + * must be being reclaimed by another thread. + * Otherwise the doomed vnode is being reclaimed + * by this thread and zfs_zget is called from + * ZIL internals. + */ + VI_UNLOCK(vp); + VN_RELE(vp); + goto again; + } + VI_UNLOCK(vp); + } getnewvnode_drop_reserve(); return (err); } @@ -1400,7 +1392,6 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) void zfs_zinactive(znode_t *zp) { - vnode_t *vp = ZTOV(zp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; @@ -1412,19 +1403,6 @@ zfs_zinactive(znode_t *zp) ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); mutex_enter(&zp->z_lock); - VI_LOCK(vp); - if (vp->v_count > 0) { - /* - * If the hold count is greater than zero, somebody has - * obtained a new reference on this znode while we were - * processing it here, so we are done. - */ - VI_UNLOCK(vp); - mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - return; - } - VI_UNLOCK(vp); /* * If this was the last reference to a file with no links, @@ -1433,14 +1411,14 @@ zfs_zinactive(znode_t *zp) if (zp->z_unlinked) { mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - ASSERT(vp->v_count == 0); - vrecycle(vp); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); + zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + zfs_znode_free(zp); } void @@ -1448,8 +1426,8 @@ zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; - ASSERT(ZTOV(zp) == NULL); ASSERT(zp->z_sa_hdl == NULL); + zp->z_vnode = NULL; mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); list_remove(&zfsvfs->z_all_znodes, zp); |