diff options
author | jeff <jeff@FreeBSD.org> | 2008-03-22 09:15:16 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2008-03-22 09:15:16 +0000 |
commit | a9d123c3ab34baa9fe2c8c25bd9acfbfb31b381e (patch) | |
tree | 5fedc50643363d96cefce7e3cd6edbdbf2d7fb2b /sys/kern | |
parent | b283b3e59a3e18ec4e7cf225a3a9922139733a73 (diff) | |
download | FreeBSD-src-a9d123c3ab34baa9fe2c8c25bd9acfbfb31b381e.zip FreeBSD-src-a9d123c3ab34baa9fe2c8c25bd9acfbfb31b381e.tar.gz |
- Complete part of the unfinished bufobj work by consistently using
BO_LOCK/UNLOCK/MTX when manipulating the bufobj.
- Create a new lock in the bufobj to lock bufobj fields independently.
This leaves the vnode interlock as an 'identity' lock while the bufobj
is an io lock. The bufobj lock is ordered before the vnode interlock
and also before the mnt ilock.
- Exploit this new lock order to simplify softdep_check_suspend().
- A few sync related functions are marked with a new XXX to note that
we may not properly interlock against a non-zero bv_cnt when
attempting to sync all vnodes on a mountlist. I do not believe this
race is important. If I'm wrong this will make these locations easier
to find.
Reviewed by: kib (earlier diff)
Tested by: kris, pho (earlier diff)
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/vfs_bio.c | 9 | ||||
-rw-r--r-- | sys/kern/vfs_cluster.c | 31 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 27 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 54 |
4 files changed, 64 insertions, 57 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 1f6c942..cc04f37 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1608,6 +1608,7 @@ done: int vfs_bio_awrite(struct buf *bp) { + struct bufobj *bo; int i; int j; daddr_t lblkno = bp->b_lblkno; @@ -1617,6 +1618,7 @@ vfs_bio_awrite(struct buf *bp) int size; int maxcl; + bo = &vp->v_bufobj; /* * right now we support clustered writing only to regular files. If * we find a clusterable block we could be in the middle of a cluster @@ -1629,7 +1631,7 @@ vfs_bio_awrite(struct buf *bp) size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; - VI_LOCK(vp); + BO_LOCK(bo); for (i = 1; i < maxcl; i++) if (vfs_bio_clcheck(vp, size, lblkno + i, bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0) @@ -1639,8 +1641,7 @@ vfs_bio_awrite(struct buf *bp) if (vfs_bio_clcheck(vp, size, lblkno - j, bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0) break; - - VI_UNLOCK(vp); + BO_UNLOCK(bo); --j; ncl = i + j; /* @@ -2454,7 +2455,7 @@ loop: lockflags |= LK_NOWAIT; error = BUF_TIMELOCK(bp, lockflags, - VI_MTX(vp), "getblk", slpflag, slptimeo); + BO_MTX(bo), "getblk", slpflag, slptimeo); /* * If we slept and got the lock we have to restart in case diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 7770bc4..a74c272 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -94,12 +94,14 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) struct buf **bpp; { struct buf *bp, *rbp, *reqbp; + struct bufobj *bo; daddr_t blkno, origblkno; int maxra, racluster; int error, ncontig; int i; error = 0; + bo = &vp->v_bufobj; /* * Try to limit the amount of read-ahead by a few @@ -130,7 +132,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) return 0; } else { bp->b_flags &= ~B_RAM; - VI_LOCK(vp); + BO_LOCK(bo); for (i = 1; i < maxra; i++) { /* * Stop if the buffer does not exist or it @@ -153,7 +155,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) BUF_UNLOCK(rbp); } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (i >= maxra) { return 0; } @@ -305,6 +307,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) int run; struct buf *fbp; { + struct bufobj *bo; struct buf *bp, *tbp; daddr_t bn; int i, inc, j; @@ -330,7 +333,6 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) tbp->b_flags |= B_ASYNC | B_RAM; tbp->b_iocmd = BIO_READ; } - tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) @@ -364,6 +366,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) bp->b_npages = 0; inc = btodb(size); + bo = &vp->v_bufobj; for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i != 0) { if ((bp->b_npages * PAGE_SIZE) + @@ -384,15 +387,15 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) * VMIO backed. The clustering code can only deal * with VMIO-backed buffers. */ - VI_LOCK(vp); + BO_LOCK(bo); if ((tbp->b_vflags & BV_BKGRDINPROG) || (tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); bqrelse(tbp); break; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); /* * The buffer must be completely invalid in order to @@ -740,26 +743,28 @@ cluster_wbuild(vp, size, start_lbn, len) int len; { struct buf *bp, *tbp; + struct bufobj *bo; int i, j; int totalwritten = 0; int dbsize = btodb(size); + bo = &vp->v_bufobj; while (len > 0) { /* * If the buffer is not delayed-write (i.e. dirty), or it * is delayed-write but either locked or inval, it cannot * partake in the clustered write. */ - VI_LOCK(vp); + BO_LOCK(bo); if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL || (tbp->b_vflags & BV_BKGRDINPROG)) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); ++start_lbn; --len; continue; } if (BUF_LOCK(tbp, - LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, VI_MTX(vp))) { + LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_MTX(bo))) { ++start_lbn; --len; continue; @@ -838,10 +843,10 @@ cluster_wbuild(vp, size, start_lbn, len) * If the adjacent data is not even in core it * can't need to be written. */ - VI_LOCK(vp); - if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL || + BO_LOCK(bo); + if ((tbp = gbincore(bo, start_lbn)) == NULL || (tbp->b_vflags & BV_BKGRDINPROG)) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); break; } @@ -854,7 +859,7 @@ cluster_wbuild(vp, size, start_lbn, len) */ if (BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, - VI_MTX(vp))) + BO_MTX(bo))) break; if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK | diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 5422530..8b4170f 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -405,12 +405,13 @@ vop_stdfsync(ap) int error = 0; int maxretry = 1000; /* large, arbitrarily chosen */ - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); loop1: /* * MARK/SCAN initialization to avoid infinite loops. */ - TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { bp->b_vflags &= ~BV_SCANNED; bp->b_error = 0; } @@ -419,16 +420,16 @@ loop1: * Flush all dirty buffers associated with a vnode. */ loop2: - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if ((bp->b_vflags & BV_SCANNED) != 0) continue; bp->b_vflags |= BV_SCANNED; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) continue; - VI_UNLOCK(vp); - KASSERT(bp->b_bufobj == &vp->v_bufobj, + BO_UNLOCK(bo); + KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", - bp, bp->b_bufobj, &vp->v_bufobj)); + bp, bp->b_bufobj, bo)); if ((bp->b_flags & B_DELWRI) == 0) panic("fsync: not dirty"); if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { @@ -437,7 +438,7 @@ loop2: bremfree(bp); bawrite(bp); } - VI_LOCK(vp); + BO_LOCK(bo); goto loop2; } @@ -448,7 +449,6 @@ loop2: * retry if dirty blocks still exist. */ if (ap->a_waitfor == MNT_WAIT) { - bo = &vp->v_bufobj; bufobj_wwait(bo, 0, 0); if (bo->bo_dirty.bv_cnt > 0) { /* @@ -464,7 +464,7 @@ loop2: error = EAGAIN; } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (error == EAGAIN) vprint("fsync: giving up on dirty", vp); @@ -571,14 +571,11 @@ vfs_stdsync(mp, waitfor, td) MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { - - VI_LOCK(vp); - if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { - VI_UNLOCK(vp); + /* bv_cnt is an acceptable race here. */ + if (vp->v_bufobj.bo_dirty.bv_cnt == 0) continue; - } + VI_LOCK(vp); MNT_IUNLOCK(mp); - if ((error = vget(vp, lockreq, td)) != 0) { MNT_ILOCK(mp); if (error == ENOENT) { diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a0df9e6..0fcff5f 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -936,7 +936,7 @@ alloc: */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; - bo->bo_mtx = &vp->v_interlock; + mtx_init(BO_MTX(bo), "bufobj interlock", NULL, MTX_DEF); bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); @@ -1236,8 +1236,8 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: - VI_LOCK(vp); bo = &vp->v_bufobj; + BO_LOCK(bo); anyfreed = 1; for (;anyfreed;) { anyfreed = 0; @@ -1246,7 +1246,7 @@ restart: continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp)) == ENOLCK) + BO_MTX(bo)) == ENOLCK) goto restart; bremfree(bp); @@ -1261,7 +1261,7 @@ restart: (nbp->b_flags & B_DELWRI))) { goto restart; } - VI_LOCK(vp); + BO_LOCK(bo); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { @@ -1269,7 +1269,7 @@ restart: continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp)) == ENOLCK) + BO_MTX(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); @@ -1282,7 +1282,7 @@ restart: (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } - VI_LOCK(vp); + BO_LOCK(bo); } } @@ -1305,13 +1305,13 @@ restartsync: bremfree(bp); bawrite(bp); - VI_LOCK(vp); + BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); - VI_UNLOCK(vp); + BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); @@ -1503,24 +1503,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno) void bgetvp(struct vnode *vp, struct buf *bp) { + struct bufobj *bo; + bo = &vp->v_bufobj; + ASSERT_BO_LOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); - ASSERT_VI_LOCKED(vp, "bgetvp"); - vholdl(vp); - if (VFS_NEEDSGIANT(vp->v_mount) || - vp->v_bufobj.bo_flag & BO_NEEDSGIANT) + vhold(vp); + if (VFS_NEEDSGIANT(vp->v_mount) || bo->bo_flag & BO_NEEDSGIANT) bp->b_flags |= B_NEEDSGIANT; bp->b_vp = vp; - bp->b_bufobj = &vp->v_bufobj; + bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ - buf_vlist_add(bp, &vp->v_bufobj, BX_VNCLEAN); + buf_vlist_add(bp, bo, BX_VNCLEAN); } /* @@ -1557,7 +1558,8 @@ brelvp(struct buf *bp) bp->b_vp = NULL; bp->b_bufobj = NULL; waiters = bp->b_waiters; - vdropl(vp); + BO_UNLOCK(bo); + vdrop(vp); return (waiters); } @@ -1668,7 +1670,7 @@ restart: (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); - VI_LOCK(vp); + BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist @@ -1678,7 +1680,8 @@ restart: */ vn_syncer_add_to_worklist(*bo, syncdelay); } - vdropl(vp); + BO_UNLOCK(*bo); + vdrop(vp); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (0); @@ -1886,7 +1889,7 @@ reassignbuf(struct buf *bp) /* * Delete from old vnode list, if on one. */ - VI_LOCK(vp); + BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else @@ -1937,7 +1940,7 @@ reassignbuf(struct buf *bp) KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif - VI_UNLOCK(vp); + BO_UNLOCK(bo); } /* @@ -3127,6 +3130,7 @@ int vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; + struct bufobj *bo; static long start, incr, next; int error; @@ -3155,14 +3159,14 @@ vfs_allocate_syncvnode(struct mount *mp) } next = start; } - VI_LOCK(vp); - vn_syncer_add_to_worklist(&vp->v_bufobj, - syncdelay > 0 ? next % syncdelay : 0); + bo = &vp->v_bufobj; + BO_LOCK(bo); + vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; mtx_unlock(&sync_mtx); - VI_UNLOCK(vp); + BO_UNLOCK(bo); mp->mnt_syncer = vp; return (0); } @@ -3244,8 +3248,8 @@ sync_reclaim(struct vop_reclaim_args *ap) struct vnode *vp = ap->a_vp; struct bufobj *bo; - VI_LOCK(vp); bo = &vp->v_bufobj; + BO_LOCK(bo); vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { mtx_lock(&sync_mtx); @@ -3255,7 +3259,7 @@ sync_reclaim(struct vop_reclaim_args *ap) mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); return (0); } |