summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormohans <mohans@FreeBSD.org>2006-09-13 18:39:09 +0000
committermohans <mohans@FreeBSD.org>2006-09-13 18:39:09 +0000
commit21daa650a97ea27eb3fdf3915832cb2c1e49aa4f (patch)
treeecb7983045b3f7e892ee6bce4b4f36f4080332f4 /sys
parente751eb98323de041fcb2da0c3b6973368e75da67 (diff)
downloadFreeBSD-src-21daa650a97ea27eb3fdf3915832cb2c1e49aa4f.zip
FreeBSD-src-21daa650a97ea27eb3fdf3915832cb2c1e49aa4f.tar.gz
Fixes up the handling of shared vnode lock lookups in the NFS client,
adds a FS type specific flag indicating that the FS supports shared vnode lock lookups, adds some logic in vfs_lookup.c to test this flag and set lock flags appropriately. - amd on 6.x is a non-starter (without this change). Using amd under heavy load results in a deadlock (with cascading vnode locks all the way to the root) very quickly. - This change should also fix the more general problem of cascading vnode deadlocks when an NFS server goes down. Ideally, we wouldn't need these changes, as enabling shared vnode lock lookups globally would work. Unfortunately, UFS, for example isn't ready for shared vnode lock lookups, crashing pretty quickly. This change is the result of discussions with Stephan Uphoff (ups@). Reviewed by: ups@
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_lookup.c30
-rw-r--r--sys/nfs4client/nfs4_vfsops.c4
-rw-r--r--sys/nfs4client/nfs4_vnops.c12
-rw-r--r--sys/nfsclient/nfs_node.c6
-rw-r--r--sys/nfsclient/nfs_subs.c2
-rw-r--r--sys/nfsclient/nfs_vfsops.c8
-rw-r--r--sys/nfsclient/nfs_vnops.c10
-rw-r--r--sys/nfsclient/nfsnode.h2
-rw-r--r--sys/sys/mount.h1
9 files changed, 46 insertions, 29 deletions
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 137e368..a149796 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -303,6 +303,16 @@ namei(struct nameidata *ndp)
return (error);
}
+static int
+compute_cn_lkflags(struct mount *mp, int lkflags)
+{
+ if ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED)) {
+ lkflags &= ~LK_SHARED;
+ lkflags |= LK_EXCLUSIVE;
+ }
+ return lkflags;
+}
+
/*
* Search a pathname.
* This is a very central and rather complicated routine.
@@ -359,7 +369,8 @@ lookup(struct nameidata *ndp)
int vfslocked; /* VFS Giant state for child */
int dvfslocked; /* VFS Giant state for parent */
int tvfslocked;
-
+ int lkflags_save;
+
/*
* Setup: break out flag bits into variables.
*/
@@ -387,7 +398,7 @@ lookup(struct nameidata *ndp)
cnp->cn_lkflags = LK_EXCLUSIVE;
dp = ndp->ni_startdir;
ndp->ni_startdir = NULLVP;
- vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
+ vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
dirloop:
/*
@@ -524,7 +535,7 @@ dirloop:
VREF(dp);
vput(tdp);
VFS_UNLOCK_GIANT(tvfslocked);
- vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
+ vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
}
}
@@ -560,7 +571,10 @@ unionlookup:
#ifdef NAMEI_DIAGNOSTIC
vprint("lookup in", dp);
#endif
+ lkflags_save = cnp->cn_lkflags;
+ cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags);
if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
+ cnp->cn_lkflags = lkflags_save;
KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
#ifdef NAMEI_DIAGNOSTIC
printf("not found\n");
@@ -575,7 +589,7 @@ unionlookup:
VREF(dp);
vput(tdp);
VFS_UNLOCK_GIANT(tvfslocked);
- vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
+ vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
goto unionlookup;
}
@@ -612,7 +626,8 @@ unionlookup:
VREF(ndp->ni_startdir);
}
goto success;
- }
+ } else
+ cnp->cn_lkflags = lkflags_save;
#ifdef NAMEI_DIAGNOSTIC
printf("found\n");
#endif
@@ -643,9 +658,9 @@ unionlookup:
vfslocked = VFS_LOCK_GIANT(mp);
if (dp != ndp->ni_dvp)
VOP_UNLOCK(ndp->ni_dvp, 0, td);
- error = VFS_ROOT(mp, cnp->cn_lkflags, &tdp, td);
+ error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td);
vfs_unbusy(mp, td);
- vn_lock(ndp->ni_dvp, cnp->cn_lkflags | LK_RETRY, td);
+ vn_lock(ndp->ni_dvp, compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY), td);
if (error) {
dpunlocked = 1;
goto bad2;
@@ -859,6 +874,7 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
*/
return (0);
}
+
dp = *vpp;
/*
diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c
index 0eb113b..6bb1b9c 100644
--- a/sys/nfs4client/nfs4_vfsops.c
+++ b/sys/nfs4client/nfs4_vfsops.c
@@ -200,7 +200,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
#ifndef nolint
sfp = NULL;
#endif
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
vp = NFSTOV(np);
@@ -724,7 +724,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
int error;
nmp = VFSTONFS(mp);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
vp = NFSTOV(np);
diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c
index f0e920e..5867471 100644
--- a/sys/nfs4client/nfs4_vnops.c
+++ b/sys/nfs4client/nfs4_vnops.c
@@ -497,7 +497,7 @@ nfs4_openrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
if (vp == NULL) {
/* New file */
error = nfs_nget(dvp->v_mount, &getfh.fh_val,
- getfh.fh_len, &np);
+ getfh.fh_len, &np, LK_EXCLUSIVE);
if (error != 0)
goto nfsmout;
@@ -1031,7 +1031,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
if (NFS_CMPFH(np, fhp, fhsize))
return (EISDIR);
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
@@ -1047,7 +1047,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
if (error)
return (error);
@@ -1058,7 +1058,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
VREF(dvp);
newvp = dvp;
} else {
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
newvp = NFSTOV(np);
@@ -1431,7 +1431,7 @@ nfs4_createrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
nfsm_v4dissect_getattr(&cp, &ga);
nfsm_v4dissect_getfh(&cp, &gfh);
- error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np);
+ error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np, LK_EXCLUSIVE);
if (error != 0)
goto nfsmout;
@@ -2336,7 +2336,7 @@ nfs4_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
VREF(dvp);
newvp = dvp;
} else {
- error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
+ error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
index 7edf2dd..ebf453f 100644
--- a/sys/nfsclient/nfs_node.c
+++ b/sys/nfsclient/nfs_node.c
@@ -99,7 +99,7 @@ nfs_vncmpf(struct vnode *vp, void *arg)
* nfsnode structure is returned.
*/
int
-nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
+nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp, int flags)
{
struct thread *td = curthread; /* XXX */
struct nfsnode *np;
@@ -117,7 +117,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
ncmp.fhsize = fhsize;
ncmp.fh = fhp;
- error = vfs_hash_get(mntp, hash, LK_EXCLUSIVE,
+ error = vfs_hash_get(mntp, hash, flags,
td, &nvp, nfs_vncmpf, &ncmp);
if (error)
return (error);
@@ -153,7 +153,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
*/
vp->v_vnlock->lk_flags |= LK_CANRECURSE;
vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
- error = vfs_hash_insert(vp, hash, LK_EXCLUSIVE,
+ error = vfs_hash_insert(vp, hash, flags,
td, &nvp, nfs_vncmpf, &ncmp);
if (error)
return (error);
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index 8556b3c..92dd32e 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -930,7 +930,7 @@ nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f,
t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos);
if (t1 != 0)
return t1;
- t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp);
+ t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE);
if (t1 != 0)
return t1;
*v = NFSTOV(ttnp);
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index 6a29da8..480a5d6 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -254,7 +254,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
error = vfs_busy(mp, LK_NOWAIT, NULL, td);
if (error)
return (error);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error) {
vfs_unbusy(mp, td);
return (error);
@@ -785,7 +785,7 @@ nfs_mount(struct mount *mp, struct thread *td)
error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
out:
if (!error)
- mp->mnt_kern_flag |= MNTK_MPSAFE;
+ mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
return (error);
}
@@ -913,7 +913,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
* this problem, because one can identify root inodes by their
* number == ROOTINO (2).
*/
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
goto bad;
*vpp = NFSTOV(np);
@@ -995,7 +995,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
int error;
nmp = VFSTONFS(mp);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
if (error)
return error;
vp = NFSTOV(np);
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index b6ad271..e073de9 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -899,7 +899,7 @@ nfs_lookup(struct vop_lookup_args *ap)
m_freem(mrep);
return (EISDIR);
}
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);
@@ -918,7 +918,7 @@ nfs_lookup(struct vop_lookup_args *ap)
if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
if (error)
return (error);
@@ -927,7 +927,7 @@ nfs_lookup(struct vop_lookup_args *ap)
VREF(dvp);
newvp = dvp;
} else {
- error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
+ error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
if (error) {
m_freem(mrep);
return (error);
@@ -2410,7 +2410,7 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
np = dnp;
} else {
error = nfs_nget(vp->v_mount, fhp,
- fhsize, &np);
+ fhsize, &np, LK_EXCLUSIVE);
if (error)
doit = 0;
else
@@ -2604,7 +2604,7 @@ nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
VREF(dvp);
newvp = dvp;
} else {
- error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
+ error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
index e03e97c..2287235 100644
--- a/sys/nfsclient/nfsnode.h
+++ b/sys/nfsclient/nfsnode.h
@@ -189,7 +189,7 @@ int nfs_reclaim(struct vop_reclaim_args *);
/* other stuff */
int nfs_removeit(struct sillyrename *);
int nfs4_removeit(struct sillyrename *);
-int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **);
+int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **, int flags);
nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int);
uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int);
void nfs_invaldir(struct vnode *);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 0bf0701..66e3ea6 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -310,6 +310,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */
#define MNTK_MPSAFE 0x20000000 /* Filesystem is MPSAFE. */
#define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */
+#define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */
/*
* Sysctl CTL_VFS definitions.
OpenPOWER on IntegriCloud