diff options
5 files changed, 165 insertions, 4 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c index 00a10aa..bf613e5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c @@ -41,6 +41,7 @@ #include <sys/types.h> #include <sys/param.h> +#include <sys/proc.h> #include <sys/vnode.h> /* Extensible attribute (xva) routines. */ @@ -72,3 +73,139 @@ xva_getxoptattr(xvattr_t *xvap) xoap = &xvap->xva_xoptattrs; return (xoap); } + +static STAILQ_HEAD(, vnode) vn_rele_async_list; +static struct mtx vn_rele_async_lock; +static struct cv vn_rele_async_cv; +static int vn_rele_list_length; +static int vn_rele_async_thread_exit; + +typedef struct { + struct vnode *stqe_next; +} vnode_link_t; + +/* + * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it + * asynchronously using a taskq. This can avoid deadlocks caused by re-entering + * the file system as a result of releasing the vnode. Note, file systems + * already have to handle the race where the vnode is incremented before the + * inactive routine is called and does its locking. + * + * Warning: Excessive use of this routine can lead to performance problems. + * This is because taskqs throttle back allocation if too many are created. + */ +void +vn_rele_async(vnode_t *vp, taskq_t *taskq /* unused */) +{ + + KASSERT(vp != NULL, ("vrele: null vp")); + VFS_ASSERT_GIANT(vp->v_mount); + VI_LOCK(vp); + + if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && + vp->v_usecount == 1)) { + vp->v_usecount--; + vdropl(vp); + return; + } + if (vp->v_usecount != 1) { +#ifdef DIAGNOSTIC + vprint("vrele: negative ref count", vp); +#endif + VI_UNLOCK(vp); + panic("vrele: negative ref cnt"); + } + /* + * We are exiting + */ + if (vn_rele_async_thread_exit != 0) { + vrele(vp); + return; + } + + mtx_lock(&vn_rele_async_lock); + + /* STAILQ_INSERT_TAIL */ + (*(vnode_link_t *)&vp->v_cstart).stqe_next = NULL; + *vn_rele_async_list.stqh_last = vp; + vn_rele_async_list.stqh_last = + &((vnode_link_t *)&vp->v_cstart)->stqe_next; + + /****************************************/ + vn_rele_list_length++; + if ((vn_rele_list_length % 100) == 0) + cv_signal(&vn_rele_async_cv); + mtx_unlock(&vn_rele_async_lock); + VI_UNLOCK(vp); +} + +static void +vn_rele_async_init(void *arg) +{ + + mtx_init(&vn_rele_async_lock, "valock", NULL, MTX_DEF); + STAILQ_INIT(&vn_rele_async_list); + + /* cv_init(&vn_rele_async_cv, "vacv"); */ + vn_rele_async_cv.cv_description = "vacv"; + vn_rele_async_cv.cv_waiters = 0; +} + +void +vn_rele_async_fini(void) +{ + + mtx_lock(&vn_rele_async_lock); + vn_rele_async_thread_exit = 1; + cv_signal(&vn_rele_async_cv); + while (vn_rele_async_thread_exit != 0) + cv_wait(&vn_rele_async_cv, &vn_rele_async_lock); + mtx_unlock(&vn_rele_async_lock); + mtx_destroy(&vn_rele_async_lock); +} + + +static void +vn_rele_async_cleaner(void) +{ + STAILQ_HEAD(, vnode) vn_tmp_list; + struct vnode *curvnode; + + STAILQ_INIT(&vn_tmp_list); + mtx_lock(&vn_rele_async_lock); + while (vn_rele_async_thread_exit == 0) { + STAILQ_CONCAT(&vn_tmp_list, &vn_rele_async_list); + vn_rele_list_length = 0; + mtx_unlock(&vn_rele_async_lock); + + while (!STAILQ_EMPTY(&vn_tmp_list)) { + curvnode = STAILQ_FIRST(&vn_tmp_list); + + /* STAILQ_REMOVE_HEAD */ + STAILQ_FIRST(&vn_tmp_list) = + ((vnode_link_t *)&curvnode->v_cstart)->stqe_next; + if (STAILQ_FIRST(&vn_tmp_list) == NULL) + vn_tmp_list.stqh_last = &STAILQ_FIRST(&vn_tmp_list); + /***********************/ + vrele(curvnode); + } + mtx_lock(&vn_rele_async_lock); + if (vn_rele_list_length == 0) + cv_timedwait(&vn_rele_async_cv, &vn_rele_async_lock, + hz/10); + } + + vn_rele_async_thread_exit = 0; + cv_broadcast(&vn_rele_async_cv); + mtx_unlock(&vn_rele_async_lock); + thread_exit(); +} + +static struct proc *vn_rele_async_proc; +static struct kproc_desc up_kp = { + "vaclean", + vn_rele_async_cleaner, + &vn_rele_async_proc +}; +SYSINIT(vaclean, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); +SYSINIT(vn_rele_async_setup, SI_SUB_VFS, SI_ORDER_FIRST, vn_rele_async_init, NULL); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 377efb9..1fb1c6f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1199,6 +1199,7 @@ dmu_init(void) void dmu_fini(void) { + vn_rele_async_fini(); arc_fini(); dnode_fini(); dbuf_fini(); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 7792f6e..458d5bc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -93,6 +93,7 @@ * pushing cached pages (which acquires range locks) and syncing out * cached atime changes. Third, zfs_zinactive() may require a new tx, * which could deadlock the system if you were already holding one. + * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). * * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. @@ -928,7 +929,11 @@ zfs_get_done(dmu_buf_t *db, void *vzgd) vfslocked = VFS_LOCK_GIANT(vp->v_vfsp); dmu_buf_rele(db, vzgd); zfs_range_unlock(rl); - VN_RELE(vp); + /* + * Release the vnode asynchronously as we currently have the + * txg stopped from syncing. + */ + VN_RELE_ASYNC(vp, NULL); zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); kmem_free(zgd, sizeof (zgd_t)); VFS_UNLOCK_GIANT(vfslocked); @@ -959,7 +964,12 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0) return (ENOENT); if (zp->z_unlinked) { - VN_RELE(ZTOV(zp)); + /* + * Release the vnode asynchronously as we currently have the + * txg stopped from syncing. + */ + VN_RELE_ASYNC(ZTOV(zp), NULL); + return (ENOENT); } @@ -1031,7 +1041,11 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) } out: zfs_range_unlock(rl); - VN_RELE(ZTOV(zp)); + /* + * Release the vnode asynchronously as we currently have the + * txg stopped from syncing. + */ + VN_RELE_ASYNC(ZTOV(zp), NULL); return (error); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c index 1f6fa0d..a7c2b37 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c @@ -1041,7 +1041,7 @@ zil_clean(zilog_t *zilog) if ((itx != NULL) && (itx->itx_lr.lrc_txg <= spa_last_synced_txg(zilog->zl_spa))) { (void) taskq_dispatch(zilog->zl_clean_taskq, - (void (*)(void *))zil_itx_clean, zilog, TQ_NOSLEEP); + (task_func_t *)zil_itx_clean, zilog, TQ_SLEEP); } mutex_exit(&zilog->zl_lock); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h b/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h index c0e5b1b..dca3715 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h @@ -377,6 +377,15 @@ typedef struct caller_context { void xva_init(xvattr_t *); xoptattr_t *xva_getxoptattr(xvattr_t *); /* Get ptr to xoptattr_t */ +struct taskq; +void vn_rele_async(struct vnode *vp, struct taskq *taskq); +void vn_rele_async_fini(void); + + +#define VN_RELE_ASYNC(vp, taskq) { \ + vn_rele_async(vp, taskq); \ +} + /* * Flags to VOP_SETATTR/VOP_GETATTR. */ |