diff options
7 files changed, 130 insertions, 17 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c index 8e80166..e3aa01a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c @@ -286,3 +286,91 @@ rrw_tsd_destroy(void *arg) (void *)curthread, (void *)rn->rn_rrl); } } + +/* + * A reader-mostly lock implementation, tuning above reader-writer locks + * for hightly parallel read acquisitions, while pessimizing writes. + * + * The idea is to split single busy lock into array of locks, so that + * each reader can lock only one of them for read, depending on result + * of simple hash function. That proportionally reduces lock congestion. + * Writer same time has to sequentially aquire write on all the locks. + * That makes write aquisition proportionally slower, but in places where + * it is used (filesystem unmount) performance is not critical. + * + * All the functions below are direct wrappers around functions above. + */ +void +rrm_init(rrmlock_t *rrl, boolean_t track_all) +{ + int i; + + for (i = 0; i < RRM_NUM_LOCKS; i++) + rrw_init(&rrl->locks[i], track_all); +} + +void +rrm_destroy(rrmlock_t *rrl) +{ + int i; + + for (i = 0; i < RRM_NUM_LOCKS; i++) + rrw_destroy(&rrl->locks[i]); +} + +void +rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag) +{ + if (rw == RW_READER) + rrm_enter_read(rrl, tag); + else + rrm_enter_write(rrl); +} + +/* + * This maps the current thread to a specific lock. Note that the lock + * must be released by the same thread that acquired it. We do this + * mapping by taking the thread pointer mod a prime number. We examine + * only the low 32 bits of the thread pointer, because 32-bit division + * is faster than 64-bit division, and the high 32 bits have little + * entropy anyway. + */ +#define RRM_TD_LOCK() (((uint32_t)(uintptr_t)(curthread)) % RRM_NUM_LOCKS) + +void +rrm_enter_read(rrmlock_t *rrl, void *tag) +{ + rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag); +} + +void +rrm_enter_write(rrmlock_t *rrl) +{ + int i; + + for (i = 0; i < RRM_NUM_LOCKS; i++) + rrw_enter_write(&rrl->locks[i]); +} + +void +rrm_exit(rrmlock_t *rrl, void *tag) +{ + int i; + + if (rrl->locks[0].rr_writer == curthread) { + for (i = 0; i < RRM_NUM_LOCKS; i++) + rrw_exit(&rrl->locks[i], tag); + } else { + rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag); + } +} + +boolean_t +rrm_held(rrmlock_t *rrl, krw_t rw) +{ + if (rw == RW_WRITER) { + return (rrw_held(&rrl->locks[0], rw)); + } else { + return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw)); + } +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h index fb44fbe..5ac2794 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h @@ -79,6 +79,31 @@ void rrw_tsd_destroy(void *arg); #define RRW_LOCK_HELD(x) \ (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER)) +/* + * A reader-mostly lock implementation, tuning above reader-writer locks + * for hightly parallel read acquisitions, pessimizing write acquisitions. + * + * This should be a prime number. See comment in rrwlock.c near + * RRM_TD_LOCK() for details. + */ +#define RRM_NUM_LOCKS 17 +typedef struct rrmlock { + rrwlock_t locks[RRM_NUM_LOCKS]; +} rrmlock_t; + +void rrm_init(rrmlock_t *rrl, boolean_t track_all); +void rrm_destroy(rrmlock_t *rrl); +void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag); +void rrm_enter_read(rrmlock_t *rrl, void *tag); +void rrm_enter_write(rrmlock_t *rrl); +void rrm_exit(rrmlock_t *rrl, void *tag); +boolean_t rrm_held(rrmlock_t *rrl, krw_t rw); + +#define RRM_READ_HELD(x) rrm_held(x, RW_READER) +#define RRM_WRITE_HELD(x) rrm_held(x, RW_WRITER) +#define RRM_LOCK_HELD(x) \ + (rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER)) + #ifdef __cplusplus } #endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h index 65da239..4120883 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h @@ -64,7 +64,7 @@ struct zfsvfs { int z_norm; /* normalization flags */ boolean_t z_atime; /* enable atimes mount option */ boolean_t z_unmounted; /* unmounted */ - rrwlock_t z_teardown_lock; + rrmlock_t z_teardown_lock; krwlock_t z_teardown_inactive_lock; list_t z_all_znodes; /* all vnodes in the fs */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index 3439641..dae099b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -256,7 +256,7 @@ VTOZ(vnode_t *vp) /* Called on entry to each ZFS vnode and vfs operation */ #define ZFS_ENTER(zfsvfs) \ { \ - rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ + rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ if ((zfsvfs)->z_unmounted) { \ ZFS_EXIT(zfsvfs); \ return (EIO); \ @@ -265,10 +265,10 @@ VTOZ(vnode_t *vp) /* Called on entry to each ZFS vnode and vfs operation that can not return EIO */ #define ZFS_ENTER_NOERROR(zfsvfs) \ - rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG) + rrm_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG) /* Must be called before exiting the vop */ -#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG) +#define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG) /* Verifies the znode is valid */ #define ZFS_VERIFY_ZP(zp) \ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 40522e9..680c3e6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -1466,7 +1466,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) if (getzfsvfs(name, zfvp) != 0) error = zfsvfs_create(name, zfvp); if (error == 0) { - rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : + rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : RW_READER, tag); if ((*zfvp)->z_unmounted) { /* @@ -1474,7 +1474,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) * thread should be just about to disassociate the * objset from the zfsvfs. */ - rrw_exit(&(*zfvp)->z_teardown_lock, tag); + rrm_exit(&(*zfvp)->z_teardown_lock, tag); return (SET_ERROR(EBUSY)); } } @@ -1484,7 +1484,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) static void zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) { - rrw_exit(&zfsvfs->z_teardown_lock, tag); + rrm_exit(&zfsvfs->z_teardown_lock, tag); if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index 8eb8953..0c3ffdf 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -988,7 +988,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); - rrw_init(&zfsvfs->z_teardown_lock, B_FALSE); + rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) @@ -1104,7 +1104,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs) mutex_destroy(&zfsvfs->z_znodes_lock); mutex_destroy(&zfsvfs->z_lock); list_destroy(&zfsvfs->z_all_znodes); - rrw_destroy(&zfsvfs->z_teardown_lock); + rrm_destroy(&zfsvfs->z_teardown_lock); rw_destroy(&zfsvfs->z_teardown_inactive_lock); rw_destroy(&zfsvfs->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) @@ -1833,7 +1833,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) { znode_t *zp; - rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); + rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* @@ -1866,7 +1866,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) */ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { rw_exit(&zfsvfs->z_teardown_inactive_lock); - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rrm_exit(&zfsvfs->z_teardown_lock, FTAG); return (SET_ERROR(EIO)); } @@ -1893,7 +1893,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) */ if (unmounting) { zfsvfs->z_unmounted = B_TRUE; - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rrm_exit(&zfsvfs->z_teardown_lock, FTAG); rw_exit(&zfsvfs->z_teardown_inactive_lock); } @@ -1970,9 +1970,9 @@ zfs_umount(vfs_t *vfsp, int fflag) * vflush(FORCECLOSE). This way we ensure no future vnops * will be called and risk operating on DOOMED vnodes. */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); + rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); zfsvfs->z_unmounted = B_TRUE; - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rrm_exit(&zfsvfs->z_teardown_lock, FTAG); } /* @@ -2240,7 +2240,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) znode_t *zp; uint64_t sa_obj = 0; - ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); + ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); /* @@ -2296,7 +2296,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) bail: /* release the VOPs */ rw_exit(&zfsvfs->z_teardown_inactive_lock); - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rrm_exit(&zfsvfs->z_teardown_lock, FTAG); if (err) { /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 6945397..ac55996 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) * can safely ensure that the filesystem is not and will not be * unmounted. The next statement is equivalent to ZFS_ENTER(). */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); + rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); if (zfsvfs->z_unmounted) { ZFS_EXIT(zfsvfs); rw_exit(&zfsvfs_lock); |