summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2002-03-12 04:00:11 +0000
committerjeff <jeff@FreeBSD.org>2002-03-12 04:00:11 +0000
commite6d26e8880ff741e6a936a20fbcab7c5df6443c0 (patch)
treedb3ad14ad172419e9392e38ccefc609f619092d3 /sys
parentb6412800f5692591eb5568403f073bc55cc762ff (diff)
downloadFreeBSD-src-e6d26e8880ff741e6a936a20fbcab7c5df6443c0.zip
FreeBSD-src-e6d26e8880ff741e6a936a20fbcab7c5df6443c0.tar.gz
This patch adds the "LOCKSHARED" option to namei which causes it to only acquire shared locks on leafs.
The stat() and open() calls have been changed to make use of this new functionality. Using shared locks in these cases is sufficient and can significantly reduce their latency if IO is pending to these vnodes. Also, this reduces the number of exclusive locks that are floating around in the system, which helps reduce the number of deadlocks that occur. A new kernel option "LOOKUP_SHARED" has been added. It defaults to off so this patch can be turned on for testing, and should eventually go away once it is proven to be stable. I have personally been running this patch for over a year now, so it is believed to be fully stable. Reviewed by: jake, obrien Approved by: jake
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/vfs_cache.c63
-rw-r--r--sys/kern/vfs_extattr.c5
-rw-r--r--sys/kern/vfs_syscalls.c5
-rw-r--r--sys/kern/vfs_vnops.c34
-rw-r--r--sys/sys/namei.h7
6 files changed, 112 insertions, 3 deletions
diff --git a/sys/conf/options b/sys/conf/options
index c3101df..fede7fd 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -383,6 +383,7 @@ BLKDEV_IOSIZE opt_global.h
DEBUG opt_global.h
DEBUG_LOCKS opt_global.h
DEBUG_VFS_LOCKS opt_global.h
+LOOKUP_SHARED opt_global.h
DIAGNOSTIC opt_global.h
ENABLE_VFS_IOOPT opt_global.h
INVARIANT_SUPPORT opt_global.h
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 08e0397..529efd1 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -570,8 +570,35 @@ vfs_cache_lookup(ap)
error = cache_lookup(dvp, vpp, cnp);
+#ifdef LOOKUP_SHARED
+ if (!error) {
+ /* We do this because the rest of the system now expects to get
+ * a shared lock, which is later upgraded if LOCKSHARED is not
+ * set. We have so many cases here because of bugs that yield
+ * inconsistant lock states. This all badly needs to be fixed
+ */
+ error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
+ if (!error) {
+ int flock;
+
+ flock = VOP_ISLOCKED(*vpp, td);
+ if (flock != LK_EXCLUSIVE) {
+ if (flock == 0) {
+ if ((flags & ISLASTCN) &&
+ (flags & LOCKSHARED))
+ VOP_LOCK(*vpp, LK_SHARED, td);
+ else
+ VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
+ }
+ } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+ VOP_LOCK(*vpp, LK_DOWNGRADE, td);
+ }
+ return (error);
+ }
+#else
if (!error)
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
+#endif
if (error == ENOENT)
return (error);
@@ -585,13 +612,28 @@ vfs_cache_lookup(ap)
} else if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
cnp->cn_flags |= PDIRUNLOCK;
+#ifdef LOOKUP_SHARED
+ if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+ error = vget(vp, LK_SHARED, td);
+ else
+ error = vget(vp, LK_EXCLUSIVE, td);
+#else
error = vget(vp, LK_EXCLUSIVE, td);
+#endif
+
if (!error && lockparent && (flags & ISLASTCN)) {
if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
cnp->cn_flags &= ~PDIRUNLOCK;
}
} else {
+#ifdef LOOKUP_SHARED
+ if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+ error = vget(vp, LK_SHARED, td);
+ else
+ error = vget(vp, LK_EXCLUSIVE, td);
+#else
error = vget(vp, LK_EXCLUSIVE, td);
+#endif
if (!lockparent || error || !(flags & ISLASTCN)) {
VOP_UNLOCK(dvp, 0, td);
cnp->cn_flags |= PDIRUNLOCK;
@@ -616,7 +658,28 @@ vfs_cache_lookup(ap)
return (error);
cnp->cn_flags &= ~PDIRUNLOCK;
}
+#ifdef LOOKUP_SHARED
+ error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
+
+ if (!error) {
+ int flock = 0;
+
+ flock = VOP_ISLOCKED(*vpp, td);
+ if (flock != LK_EXCLUSIVE) {
+ if (flock == 0) {
+ if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+ VOP_LOCK(*vpp, LK_SHARED, td);
+ else
+ VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
+ }
+ } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+ VOP_LOCK(*vpp, LK_DOWNGRADE, td);
+ }
+
+ return (error);
+#else
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
+#endif
}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index fd22602..31b1244 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -2029,8 +2029,13 @@ stat(td, uap)
int error;
struct nameidata nd;
+#ifdef LOOKUP_SHARED
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
+ UIO_USERSPACE, SCARG(uap, path), td);
+#else
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
+#endif
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, td);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index fd22602..31b1244 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2029,8 +2029,13 @@ stat(td, uap)
int error;
struct nameidata nd;
+#ifdef LOOKUP_SHARED
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
+ UIO_USERSPACE, SCARG(uap, path), td);
+#else
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
+#endif
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, td);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 461cb1e..336086c 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -105,6 +105,11 @@ vn_open_cred(ndp, flagp, cmode, cred)
struct vattr vat;
struct vattr *vap = &vat;
int mode, fmode, error;
+#ifdef LOOKUP_SHARED
+ int exclusive; /* The current intended lock state */
+
+ exclusive = 0;
+#endif
restart:
fmode = *flagp;
@@ -143,6 +148,9 @@ restart:
ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
fmode &= ~O_TRUNC;
vp = ndp->ni_vp;
+#ifdef LOOKUP_SHARED
+ exclusive = 1;
+#endif
} else {
if (ndp->ni_dvp == ndp->ni_vp)
vrele(ndp->ni_dvp);
@@ -158,8 +166,14 @@ restart:
}
} else {
ndp->ni_cnd.cn_nameiop = LOOKUP;
+#ifdef LOOKUP_SHARED
+ ndp->ni_cnd.cn_flags =
+ ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
+ LOCKSHARED | LOCKLEAF;
+#else
ndp->ni_cnd.cn_flags =
((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
+#endif
if ((error = namei(ndp)) != 0)
return (error);
vp = ndp->ni_vp;
@@ -198,6 +212,21 @@ restart:
* Make sure that a VM object is created for VMIO support.
*/
if (vn_canvmio(vp) == TRUE) {
+#ifdef LOOKUP_SHARED
+ int flock;
+
+ if (!exclusive && vp->v_object == NULL)
+ VOP_LOCK(vp, LK_UPGRADE, td);
+ /*
+ * In cases where the object is marked as dead object_create
+ * will unlock and relock exclusive. It is safe to call in
+ * here with a shared lock because we only examine fields that
+ * the shared lock guarantees will be stable. In the UPGRADE
+ * case it is not likely that anyone has used this vnode yet
+ * so there will be no contention. The logic after this call
+ * restores the requested locking state.
+ */
+#endif
if ((error = vfs_object_create(vp, td, cred)) != 0) {
VOP_UNLOCK(vp, 0, td);
VOP_CLOSE(vp, fmode, cred, td);
@@ -206,6 +235,11 @@ restart:
*flagp = fmode;
return (error);
}
+#ifdef LOOKUP_SHARED
+ flock = VOP_ISLOCKED(vp, td);
+ if (!exclusive && flock == LK_EXCLUSIVE)
+ VOP_LOCK(vp, LK_DOWNGRADE, td);
+#endif
}
if (fmode & FWRITE)
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 5b133cc..1ae6120 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -113,8 +113,9 @@ struct nameidata {
#define NOCACHE 0x0020 /* name must not be left in cache */
#define FOLLOW 0x0040 /* follow symbolic links */
#define NOOBJ 0x0080 /* don't create object */
+#define LOCKSHARED 0x0100 /* Shared lock leaf */
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
-#define MODMASK 0x00fc /* mask of operational modifiers */
+#define MODMASK 0x01fc /* mask of operational modifiers */
/*
* Namei parameter descriptors.
*
@@ -129,7 +130,6 @@ struct nameidata {
* name being sought. The caller is responsible for releasing the
* buffer and for vrele'ing ni_startdir.
*/
-#define NOCROSSMOUNT 0x000100 /* do not cross mount points */
#define RDONLY 0x000200 /* lookup with read-only semantics */
#define HASBUF 0x000400 /* has allocated pathname buffer */
#define SAVENAME 0x000800 /* save pathname buffer */
@@ -143,7 +143,8 @@ struct nameidata {
#define WILLBEDIR 0x080000 /* new files will be dirs; allow trailing / */
#define ISUNICODE 0x100000 /* current component name is unicode*/
#define PDIRUNLOCK 0x200000 /* file system lookup() unlocked parent dir */
-#define PARAMASK 0x1fff00 /* mask of parameter descriptors */
+#define NOCROSSMOUNT 0x400000 /* do not cross mount points */
+#define PARAMASK 0x3ffe00 /* mask of parameter descriptors */
/*
* Initialization of an nameidata structure.
OpenPOWER on IntegriCloud