summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2006-10-10 09:20:54 +0000
committerkib <kib@FreeBSD.org>2006-10-10 09:20:54 +0000
commitdc1147adb06ee01012984702aebef6c27b92c7e0 (patch)
treeb21ac1bbb7acef29dace7ac99cf8f176bcc4b0fc /sys
parentc23de2fec88f3af5873cf8856e1e480b353f63cd (diff)
downloadFreeBSD-src-dc1147adb06ee01012984702aebef6c27b92c7e0.zip
FreeBSD-src-dc1147adb06ee01012984702aebef6c27b92c7e0.tar.gz
Do not translate the IN_ACCESS inode flag into the IN_MODIFIED while filesystem
is suspending/suspended. Doing so may result in deadlock. Instead, set the (new) IN_LAZYACCESS flag, that becomes IN_MODIFIED when suspend is lifted. Change the locking protocol in order to set the IN_ACCESS and timestamps without upgrading shared vnode lock to exclusive (see comments in the inode.h). Before that, inode was modified while holding only shared lock. Tested by: Peter Holm Reviewed by: tegge, bde Approved by: pjd (mentor) MFC after: 3 weeks
Diffstat (limited to 'sys')
-rw-r--r--sys/ufs/ffs/ffs_inode.c12
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c24
-rw-r--r--sys/ufs/ffs/ffs_vnops.c10
-rw-r--r--sys/ufs/ufs/inode.h9
-rw-r--r--sys/ufs/ufs/ufs_vnops.c68
5 files changed, 89 insertions, 34 deletions
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 33b65bf..ad6bff9 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -66,9 +66,11 @@ static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
* IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
* to disk if the IN_MODIFIED flag is set (it may be set initially, or by
* the timestamp update). The IN_LAZYMOD flag is set to force a write
- * later if not now. If we write now, then clear both IN_MODIFIED and
- * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is
- * set, then wait for the write to complete.
+ * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
+ * is currently being suspended (or is suspended) and vnode has been accessed.
+ * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
+ * reflect the presumably successful write, and if waitfor is set, then wait
+ * for the write to complete.
*/
int
ffs_update(vp, waitfor)
@@ -80,12 +82,12 @@ ffs_update(vp, waitfor)
struct inode *ip;
int error;
- ASSERT_VOP_LOCKED(vp, "ffs_update");
+ ASSERT_VOP_ELOCKED(vp, "ffs_update");
ufs_itimes(vp);
ip = VTOI(vp);
if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
return (0);
- ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED);
+ ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
fs = ip->i_fs;
if (fs->fs_ronly)
return (0);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 473dbfd..6ab9af2 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -2309,15 +2309,16 @@ readblock(vp, bp, lbn)
return (bp->b_error);
}
-
/*
* Process file deletes that were deferred by ufs_inactive() due to
- * the file system being suspended.
+ * the file system being suspended. Transfer IN_LAZYACCESS into
+ * IN_MODIFIED for vnodes that were accessed during suspension.
*/
static void
process_deferred_inactive(struct mount *mp)
{
struct vnode *vp, *mvp;
+ struct inode *ip;
struct thread *td;
int error;
@@ -2327,9 +2328,15 @@ process_deferred_inactive(struct mount *mp)
loop:
MNT_VNODE_FOREACH(vp, mp, mvp) {
VI_LOCK(vp);
- if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT ||
- vp->v_usecount > 0 ||
- vp->v_type == VNON) {
+ /*
+ * IN_LAZYACCESS is checked here without holding any
+ * vnode lock, but this flag is set only while holding
+ * vnode interlock.
+ */
+ if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED) != 0 ||
+ ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 &&
+ ((vp->v_iflag & VI_OWEINACT) == 0 ||
+ vp->v_usecount > 0))) {
VI_UNLOCK(vp);
continue;
}
@@ -2344,8 +2351,13 @@ process_deferred_inactive(struct mount *mp)
MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
goto loop;
}
+ ip = VTOI(vp);
+ if ((ip->i_flag & IN_LAZYACCESS) != 0) {
+ ip->i_flag &= ~IN_LAZYACCESS;
+ ip->i_flag |= IN_MODIFIED;
+ }
VI_LOCK(vp);
- if ((vp->v_iflag & VI_OWEINACT) == 0) {
+ if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) {
VI_UNLOCK(vp);
VOP_UNLOCK(vp, 0, td);
vdrop(vp);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 91d2c01..4d8d9ef 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -593,8 +593,11 @@ ffs_read(ap)
}
if ((error == 0 || uio->uio_resid != orig_resid) &&
- (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
+ VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
+ VI_UNLOCK(vp);
+ }
return (error);
}
@@ -989,8 +992,11 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
}
if ((error == 0 || uio->uio_resid != orig_resid) &&
- (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
+ VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
+ VI_UNLOCK(vp);
+ }
return (error);
}
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 38e2c30..2fc3336 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -55,6 +55,13 @@
* is the permanent meta-data associated with the file which is read in
* from the permanent dinode from long term storage when the file becomes
* active, and is put back when the file is no longer being used.
+ *
+ * An inode may only be changed while holding either the exclusive
+ * vnode lock or the shared vnode lock and the vnode interlock. We use
+ * the latter only for "read" and "get" operations that require
+ * changing i_flag, or a timestamp. This locking protocol allows executing
+ * those operations without having to upgrade the vnode lock from shared to
+ * exclusive.
*/
struct inode {
TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */
@@ -119,6 +126,8 @@ struct inode {
#define IN_RENAME 0x0010 /* Inode is being renamed. */
#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */
#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */
+#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the
+ suspension finished */
#define i_devvp i_ump->um_devvp
#define i_umbufobj i_ump->um_bo
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 768860a..687dc70 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -128,31 +128,49 @@ ufs_itimes(vp)
{
struct inode *ip;
struct timespec ts;
+ int mnt_locked;
ip = VTOI(vp);
+ mnt_locked = 0;
+ if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
+ VI_LOCK(vp);
+ goto out;
+ }
+ MNT_ILOCK(vp->v_mount); /* For reading of mnt_kern_flags. */
+ mnt_locked = 1;
+ VI_LOCK(vp);
if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
- return;
+ goto out_unl;
+
if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
ip->i_flag |= IN_LAZYMOD;
- else
+ else if (((vp->v_mount->mnt_kern_flag &
+ (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
+ (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
ip->i_flag |= IN_MODIFIED;
- if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
- vfs_timestamp(&ts);
- if (ip->i_flag & IN_ACCESS) {
- DIP_SET(ip, i_atime, ts.tv_sec);
- DIP_SET(ip, i_atimensec, ts.tv_nsec);
- }
- if (ip->i_flag & IN_UPDATE) {
- DIP_SET(ip, i_mtime, ts.tv_sec);
- DIP_SET(ip, i_mtimensec, ts.tv_nsec);
- ip->i_modrev++;
- }
- if (ip->i_flag & IN_CHANGE) {
- DIP_SET(ip, i_ctime, ts.tv_sec);
- DIP_SET(ip, i_ctimensec, ts.tv_nsec);
- }
+ else if (ip->i_flag & IN_ACCESS)
+ ip->i_flag |= IN_LAZYACCESS;
+ vfs_timestamp(&ts);
+ if (ip->i_flag & IN_ACCESS) {
+ DIP_SET(ip, i_atime, ts.tv_sec);
+ DIP_SET(ip, i_atimensec, ts.tv_nsec);
}
+ if (ip->i_flag & IN_UPDATE) {
+ DIP_SET(ip, i_mtime, ts.tv_sec);
+ DIP_SET(ip, i_mtimensec, ts.tv_nsec);
+ ip->i_modrev++;
+ }
+ if (ip->i_flag & IN_CHANGE) {
+ DIP_SET(ip, i_ctime, ts.tv_sec);
+ DIP_SET(ip, i_ctimensec, ts.tv_nsec);
+ }
+
+ out:
ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+ out_unl:
+ VI_UNLOCK(vp);
+ if (mnt_locked)
+ MNT_IUNLOCK(vp->v_mount);
}
/*
@@ -266,11 +284,13 @@ ufs_close(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ int usecount;
VI_LOCK(vp);
- if (vp->v_usecount > 1)
- ufs_itimes(vp);
+ usecount = vp->v_usecount;
VI_UNLOCK(vp);
+ if (usecount > 1)
+ ufs_itimes(vp);
return (0);
}
@@ -378,8 +398,10 @@ ufs_getattr(ap)
if (ip->i_ump->um_fstype == UFS1) {
vap->va_rdev = ip->i_din1->di_rdev;
vap->va_size = ip->i_din1->di_size;
+ VI_LOCK(vp);
vap->va_atime.tv_sec = ip->i_din1->di_atime;
vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
+ VI_UNLOCK(vp);
vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
@@ -390,8 +412,10 @@ ufs_getattr(ap)
} else {
vap->va_rdev = ip->i_din2->di_rdev;
vap->va_size = ip->i_din2->di_size;
+ VI_LOCK(vp);
vap->va_atime.tv_sec = ip->i_din2->di_atime;
vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
+ VI_UNLOCK(vp);
vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
@@ -1992,11 +2016,13 @@ ufsfifo_close(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ int usecount;
VI_LOCK(vp);
- if (vp->v_usecount > 1)
- ufs_itimes(vp);
+ usecount = vp->v_usecount;
VI_UNLOCK(vp);
+ if (usecount > 1)
+ ufs_itimes(vp);
return (fifo_specops.vop_close(ap));
}
OpenPOWER on IntegriCloud