summaryrefslogtreecommitdiffstats
path: root/sys/kern/vfs_lookup.c
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2007-01-22 11:25:22 +0000
committerkib <kib@FreeBSD.org>2007-01-22 11:25:22 +0000
commit79752b63e1e72d72a46513db9b95df486877fdd2 (patch)
tree5c7c4a1283308fecd2e3cd335a281f350ff29125 /sys/kern/vfs_lookup.c
parentbbae4f99499054b3fc06acf2da55f260fd8d3974 (diff)
downloadFreeBSD-src-79752b63e1e72d72a46513db9b95df486877fdd2.zip
FreeBSD-src-79752b63e1e72d72a46513db9b95df486877fdd2.tar.gz
Below is slightly edited description of the LOR by Tor Egge:
-------------------------- [Deadlock] is caused by a lock order reversal in vfs_lookup(), where [some] process is trying to lock a directory vnode, that is the parent directory of covered vnode) while holding an exclusive vnode lock on covering vnode. A simplified scenario: root fs var fs / A / (/var) D /var B /log (/var/log) E vfs lock C vfs lock F Within each file system, the lock order is clear: C->A->B and F->D->E When traversing across mounts, the system can choose between two lock orders, but everything must then follow that lock order: L1: C->A->B | +->F->D->E L2: F->D->E | +->C->A->B The lookup() process for namei("/var") mixes those two lock orders: VOP_LOOKUP() obtains B while A is held vfs_busy() obtains a shared lock on F while A and B are held (follows L1, violates L2) vput() releases lock on B VOP_UNLOCK() releases lock on A VFS_ROOT() obtains lock on D while shared lock on F is held vfs_unbusy() releases shared lock on F vn_lock() obtains lock on A while D is held (violates L1, follows L2) dounmount() follows L1 (B is locked while F is drained). Without unmount activity, vfs_busy() will always succeed without blocking and the deadlock isn't triggered (the system behaves as if L2 is followed). With unmount, you can get 4 processes in a deadlock: p1: holds D, want A (in lookup()) p2: holds shared lock on F, want D (in VFS_ROOT()) p3: holds B, want drain lock on F (in dounmount()) p4: holds A, want B (in VOP_LOOKUP()) You can have more than one instance of p2. The reversal was introduced in revision 1.81 of src/sys/kern/vfs_lookup.c and MFCed to revision 1.80.2.1, probably to avoid a cascade of vnode locks when nfs servers are dead (VFS_ROOT() just hangs) spreading to the root fs root vnode. - Tor Egge To fix the LOR, ups@ noted that when crossing the mount point, ni_dvp is actually not used by the callers of namei. Thus, placeholder deadfs vnode vp_crossmp is introduced that is filled into ni_dvp. Idea by: ups Reviewed by: tegge, ups, jeff, rwatson (mac interaction) Tested by: Peter Holm MFC after: 2 weeks
Diffstat (limited to 'sys/kern/vfs_lookup.c')
-rw-r--r--sys/kern/vfs_lookup.c21
1 files changed, 17 insertions, 4 deletions
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index a0cbc83..bf624d8 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -69,13 +69,18 @@ __FBSDID("$FreeBSD$");
* Allocation zone for namei
*/
uma_zone_t namei_zone;
+/*
+ * Placeholder vnode for mp traversal
+ */
+static struct vnode *vp_crossmp;
static void
nameiinit(void *dummy __unused)
{
namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
-
+ getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp);
+ vp_crossmp->v_vnlock->lk_flags &= ~LK_NOSHARE;
}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL)
@@ -559,7 +564,8 @@ unionlookup:
* If we have a shared lock we may need to upgrade the lock for the
* last operation.
*/
- if (VOP_ISLOCKED(dp, td) == LK_SHARED &&
+ if (dp != vp_crossmp &&
+ VOP_ISLOCKED(dp, td) == LK_SHARED &&
(cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT))
vn_lock(dp, LK_UPGRADE|LK_RETRY, td);
/*
@@ -658,10 +664,17 @@ unionlookup:
VFS_UNLOCK_GIANT(vfslocked);
vfslocked = VFS_LOCK_GIANT(mp);
if (dp != ndp->ni_dvp)
- VOP_UNLOCK(ndp->ni_dvp, 0, td);
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ VFS_UNLOCK_GIANT(dvfslocked);
+ dvfslocked = 0;
+ vref(vp_crossmp);
+ ndp->ni_dvp = vp_crossmp;
error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td);
vfs_unbusy(mp, td);
- vn_lock(ndp->ni_dvp, compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY), td);
+ if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT, td))
+ panic("vp_crossmp exclusively locked or reclaimed");
if (error) {
dpunlocked = 1;
goto bad2;
OpenPOWER on IntegriCloud