diff options
author | tegge <tegge@FreeBSD.org> | 2006-03-08 23:43:39 +0000 |
---|---|---|
committer | tegge <tegge@FreeBSD.org> | 2006-03-08 23:43:39 +0000 |
commit | 2e0e03c06ff6c78e7d6f269c98e20d8e2eeb58dc (patch) | |
tree | 8446efea303e7dd86046218298bbbd2e8b1e8a1b /sys/ufs | |
parent | b33c92af90c5ffdd40c36b6a8094d3ae01b92331 (diff) | |
download | FreeBSD-src-2e0e03c06ff6c78e7d6f269c98e20d8e2eeb58dc.zip FreeBSD-src-2e0e03c06ff6c78e7d6f269c98e20d8e2eeb58dc.tar.gz |
Use vn_start_secondary_write() and vn_finished_secondary_write() as a
replacement for vn_write_suspend_wait() to better account for secondary write
processing.
Close race where secondary writes could be started after ffs_sync() returned
but before the file system was marked as suspended.
Detect if secondary writes or softdep processing occurred during vnode sync
loop in ffs_sync() and retry the loop if needed.
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 4 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 159 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 55 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 14 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_quota.c | 6 | ||||
-rw-r--r-- | sys/ufs/ufs/ufsmount.h | 1 |
6 files changed, 227 insertions, 12 deletions
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 8045704..21704bd 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -94,6 +94,10 @@ extern struct vop_vector ffs_fifoops2; /* * Soft update function prototypes. */ + +int softdep_check_suspend(struct mount *, struct vnode *, + int, int, int, int); +void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 600203d..53853cc 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -346,6 +346,68 @@ softdep_request_cleanup(fs, vp) return (0); } +int +softdep_check_suspend(struct mount *mp, + struct vnode *devvp, + int softdep_deps, + int softdep_accdeps, + int secondary_writes, + int secondary_accwrites) +{ + struct bufobj *bo; + int error; + + (void) softdep_deps, + (void) softdep_accdeps; + + ASSERT_VI_LOCKED(devvp, "softdep_check_suspend"); + bo = &devvp->v_bufobj; + + for (;;) { + if (!MNT_ITRYLOCK(mp)) { + VI_UNLOCK(devvp); + MNT_ILOCK(mp); + MNT_IUNLOCK(mp); + VI_LOCK(devvp); + continue; + } + if (mp->mnt_secondary_writes != 0) { + VI_UNLOCK(devvp); + msleep(&mp->mnt_secondary_writes, + MNT_MTX(mp), + (PUSER - 1) | PDROP, "secwr", 0); + VI_LOCK(devvp); + continue; + } + break; + } + + /* + * Reasons for needing more work before suspend: + * - Dirty buffers on devvp. + * - Secondary writes occurred after start of vnode sync loop + */ + error = 0; + if (bo->bo_numoutput > 0 || + bo->bo_dirty.bv_cnt > 0 || + secondary_writes != 0 || + mp->mnt_secondary_writes != 0 || + secondary_accwrites != mp->mnt_secondary_accwrites) + error = EAGAIN; + VI_UNLOCK(devvp); + return (error); +} + +void +softdep_get_depcounts(struct mount *mp, + int *softdepactivep, + int *softdepactiveaccp) +{ + (void) mp; + *softdepactivep = 0; + *softdepactiveaccp = 0; +} + #else /* * These definitions need to be adapted to the system to which @@ -500,6 +562,7 @@ static int softdep_count_dependencies(struct buf *bp, int); static struct mtx lk; MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX_DEF); +#define TRY_ACQUIRE_LOCK(lk) mtx_trylock(lk) #define ACQUIRE_LOCK(lk) mtx_lock(lk) #define FREE_LOCK(lk) mtx_unlock(lk) @@ -588,6 +651,7 @@ workitem_alloc(item, type, mp) item->wk_state = 0; ACQUIRE_LOCK(&lk); VFSTOUFS(mp)->softdep_deps++; + VFSTOUFS(mp)->softdep_accdeps++; FREE_LOCK(&lk); } @@ -873,7 +937,7 @@ process_worklist_item(mp, flags) } ump->softdep_on_worklist -= 1; FREE_LOCK(&lk); - if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) panic("process_worklist_item: suspended filesystem"); matchcnt++; switch (wk->wk_type) { @@ -903,6 +967,7 @@ process_worklist_item(mp, flags) "softdep", TYPENAME(wk->wk_type)); /* NOTREACHED */ } + vn_finished_secondary_write(mp); ACQUIRE_LOCK(&lk); return (matchcnt); } @@ -6000,6 +6065,98 @@ getdirtybuf(bp, mtx, waitfor) return (bp); } + +/* + * Check if it is safe to suspend the file system now. On entry, + * the vnode interlock for devvp should be held. Return 0 with + * the mount interlock held if the file system can be suspended now, + * otherwise return EAGAIN with the mount interlock held. + */ +int +softdep_check_suspend(struct mount *mp, + struct vnode *devvp, + int softdep_deps, + int softdep_accdeps, + int secondary_writes, + int secondary_accwrites) +{ + struct bufobj *bo; + struct ufsmount *ump; + int error; + + ASSERT_VI_LOCKED(devvp, "softdep_check_suspend"); + ump = VFSTOUFS(mp); + bo = &devvp->v_bufobj; + + for (;;) { + if (!TRY_ACQUIRE_LOCK(&lk)) { + VI_UNLOCK(devvp); + ACQUIRE_LOCK(&lk); + FREE_LOCK(&lk); + VI_LOCK(devvp); + continue; + } + if (!MNT_ITRYLOCK(mp)) { + FREE_LOCK(&lk); + VI_UNLOCK(devvp); + MNT_ILOCK(mp); + MNT_IUNLOCK(mp); + VI_LOCK(devvp); + continue; + } + if (mp->mnt_secondary_writes != 0) { + FREE_LOCK(&lk); + VI_UNLOCK(devvp); + msleep(&mp->mnt_secondary_writes, + MNT_MTX(mp), + (PUSER - 1) | PDROP, "secwr", 0); + VI_LOCK(devvp); + continue; + } + break; + } + + /* + * Reasons for needing more work before suspend: + * - Dirty buffers on devvp. + * - Softdep activity occurred after start of vnode sync loop + * - Secondary writes occurred after start of vnode sync loop + */ + error = 0; + if (bo->bo_numoutput > 0 || + bo->bo_dirty.bv_cnt > 0 || + softdep_deps != 0 || + ump->softdep_deps != 0 || + softdep_accdeps != ump->softdep_accdeps || + secondary_writes != 0 || + mp->mnt_secondary_writes != 0 || + secondary_accwrites != mp->mnt_secondary_accwrites) + error = EAGAIN; + FREE_LOCK(&lk); + VI_UNLOCK(devvp); + return (error); +} + + +/* + * Get the number of dependency structures for the file system, both + * the current number and the total number allocated. These will + * later be used to detect that softdep processing has occurred. + */ +void +softdep_get_depcounts(struct mount *mp, + int *softdep_depsp, + int *softdep_accdepsp) +{ + struct ufsmount *ump; + + ump = VFSTOUFS(mp); + ACQUIRE_LOCK(&lk); + *softdep_depsp = ump->softdep_deps; + *softdep_accdepsp = ump->softdep_accdeps; + FREE_LOCK(&lk); +} + /* * Wait for pending output on a vnode to complete. * Must be called with vnode lock and interlock locked. diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index c6c6510..f4245db 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -70,7 +70,7 @@ __FBSDID("$FreeBSD$"); static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; -static int ffs_sbupdate(struct ufsmount *, int); +static int ffs_sbupdate(struct ufsmount *, int, int); static int ffs_reload(struct mount *, struct thread *); static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, @@ -234,7 +234,7 @@ ffs_mount(struct mount *mp, struct thread *td) } if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; - if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { + if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; vn_finished_write(mp); @@ -301,7 +301,7 @@ ffs_mount(struct mount *mp, struct thread *td) fs->fs_ronly = 0; mp->mnt_flag &= ~MNT_RDONLY; fs->fs_clean = 0; - if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { + if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { vn_finished_write(mp); return (error); } @@ -806,7 +806,7 @@ ffs_mountfs(devvp, mp, td) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; - (void) ffs_sbupdate(ump, MNT_WAIT); + (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * Initialize filesystem stat information in mount struct. @@ -988,7 +988,7 @@ ffs_unmount(mp, mntflags, td) UFS_UNLOCK(ump); if (fs->fs_ronly == 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; - error = ffs_sbupdate(ump, MNT_WAIT); + error = ffs_sbupdate(ump, MNT_WAIT, 0); if (error) { fs->fs_clean = 0; return (error); @@ -1113,6 +1113,12 @@ ffs_sync(mp, waitfor, td) struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, wait, lockreq, allerror = 0; + int suspend; + int suspended; + int secondary_writes; + int secondary_accwrites; + int softdep_deps; + int softdep_accdeps; struct bufobj *bo; fs = ump->um_fs; @@ -1124,14 +1130,30 @@ ffs_sync(mp, waitfor, td) * Write back each (modified) inode. */ wait = 0; + suspend = 0; + suspended = 0; lockreq = LK_EXCLUSIVE | LK_NOWAIT; + if (waitfor == MNT_SUSPEND) { + suspend = 1; + waitfor = MNT_WAIT; + } if (waitfor == MNT_WAIT) { wait = 1; lockreq = LK_EXCLUSIVE; } lockreq |= LK_INTERLOCK | LK_SLEEPFAIL; MNT_ILOCK(mp); + MNT_REF(mp); loop: + /* Grab snapshot of secondary write counts */ + secondary_writes = mp->mnt_secondary_writes; + secondary_accwrites = mp->mnt_secondary_accwrites; + + /* Grab snapshot of softdep dependency counts */ + MNT_IUNLOCK(mp); + softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); + MNT_ILOCK(mp); + MNT_VNODE_FOREACH(vp, mp, mvp) { /* * Depend on the mntvnode_slock to keep things stable enough @@ -1165,6 +1187,7 @@ loop: vput(vp); MNT_ILOCK(mp); } + MNT_REL(mp); MNT_IUNLOCK(mp); /* * Force stale filesystem control information to be flushed. @@ -1194,12 +1217,25 @@ loop: MNT_ILOCK(mp); goto loop; } + } else if (suspend != 0) { + if (softdep_check_suspend(mp, + devvp, + softdep_deps, + softdep_accdeps, + secondary_writes, + secondary_accwrites) != 0) + goto loop; /* More work needed */ + mtx_assert(MNT_MTX(mp), MA_OWNED); + mp->mnt_kern_flag |= MNTK_SUSPENDED; + MNT_IUNLOCK(mp); + suspended = 1; } else VI_UNLOCK(devvp); /* * Write back modified superblock. */ - if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) + if (fs->fs_fmod != 0 && + (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; return (allerror); } @@ -1449,9 +1485,10 @@ ffs_uninit(vfsp) * Write a superblock and associated information back to disk. */ static int -ffs_sbupdate(mp, waitfor) +ffs_sbupdate(mp, waitfor, suspended) struct ufsmount *mp; int waitfor; + int suspended; { struct fs *fs = mp->um_fs; struct buf *sbbp; @@ -1482,6 +1519,8 @@ ffs_sbupdate(mp, waitfor) size, 0, 0, 0); bcopy(space, bp->b_data, (u_int)size); space = (char *)space + size; + if (suspended) + bp->b_flags |= B_VALIDSUSPWRT; if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) @@ -1513,6 +1552,8 @@ ffs_sbupdate(mp, waitfor) fs->fs_time = time_second; bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); ffs_oldfscompat_write((struct fs *)bp->b_data, mp); + if (suspended) + bp->b_flags |= B_VALIDSUSPWRT; if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index b7214d2..091756c 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -73,7 +73,9 @@ ufs_inactive(ap) struct thread *td = ap->a_td; mode_t mode; int error = 0; + struct mount *mp; + mp = NULL; if (prtactive && vp->v_usecount != 0) vprint("ufs_inactive: pushing active", vp); /* @@ -84,7 +86,7 @@ ufs_inactive(ap) if (ip->i_effnlink == 0 && DOINGSOFTDEP(vp)) softdep_releasefile(ip); if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - (void) vn_write_suspend_wait(vp, NULL, V_WAIT); + (void) vn_start_secondary_write(vp, &mp, V_WAIT); #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, FORCE); @@ -111,10 +113,14 @@ ufs_inactive(ap) } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - vn_write_suspend_wait(vp, NULL, V_NOWAIT)) { + mp == NULL && + vn_start_secondary_write(vp, &mp, V_NOWAIT)) { + mp = NULL; ip->i_flag &= ~IN_ACCESS; } else { - (void) vn_write_suspend_wait(vp, NULL, V_WAIT); + if (mp == NULL) + (void) vn_start_secondary_write(vp, &mp, + V_WAIT); UFS_UPDATE(vp, 0); } } @@ -125,6 +131,8 @@ out: */ if (ip->i_mode == 0) vrecycle(vp, td); + if (mp != NULL) + vn_finished_secondary_write(mp); return (error); } diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 3f6bd60..f856e02 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -980,14 +980,16 @@ dqsync(vp, dq) struct iovec aiov; struct uio auio; int error; + struct mount *mp; + mp = NULL; if (dq == NODQUOT) panic("dqsync: dquot"); if ((dq->dq_flags & DQ_MOD) == 0) return (0); if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); - (void) vn_write_suspend_wait(dqvp, NULL, V_WAIT); + (void) vn_start_secondary_write(dqvp, &mp, V_WAIT); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, td); while (dq->dq_flags & DQ_LOCK) { @@ -996,6 +998,7 @@ dqsync(vp, dq) if ((dq->dq_flags & DQ_MOD) == 0) { if (vp != dqvp) VOP_UNLOCK(dqvp, 0, td); + vn_finished_secondary_write(mp); return (0); } } @@ -1017,6 +1020,7 @@ dqsync(vp, dq) dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); if (vp != dqvp) VOP_UNLOCK(dqvp, 0, td); + vn_finished_secondary_write(mp); return (error); } diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 193e0c7..4703a9b 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -77,6 +77,7 @@ struct ufsmount { struct worklist *softdep_worklist_tail; /* Tail pointer for above */ int softdep_on_worklist; /* Items on the worklist */ int softdep_deps; /* Total dependency count */ + int softdep_accdeps; /* accumulated dep count */ int softdep_req; /* Wakeup when deps hits 0. */ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */ |