From 87e306ad712d6a7daef1073b7fb94c0fa90908d2 Mon Sep 17 00:00:00 2001 From: jeff Date: Sun, 9 Feb 2003 11:28:35 +0000 Subject: - Cleanup unlocked accesses to buf flags by introducing a new b_vflag member that is protected by the vnode lock. - Move B_SCANNED into b_vflags and call it BV_SCANNED. - Create a vop_stdfsync() modeled after spec's sync. - Replace spec_fsync, msdos_fsync, and hpfs_fsync with the stdfsync and some fs specific processing. This gives all of these filesystems proper behavior wrt MNT_WAIT/NOWAIT and the use of the B_SCANNED flag. - Annotate the locking in buf.h --- sys/fs/hpfs/hpfs_vnops.c | 39 ++---------------- sys/fs/msdosfs/msdosfs_vnops.c | 41 ++----------------- sys/fs/specfs/spec_vnops.c | 83 +-------------------------------------- sys/kern/vfs_bio.c | 6 ++- sys/kern/vfs_default.c | 89 ++++++++++++++++++++++++++++++++++++++++++ sys/sys/buf.h | 24 ++++++++---- sys/sys/vnode.h | 1 + sys/ufs/ffs/ffs_vnops.c | 13 +++--- 8 files changed, 125 insertions(+), 171 deletions(-) (limited to 'sys') diff --git a/sys/fs/hpfs/hpfs_vnops.c b/sys/fs/hpfs/hpfs_vnops.c index d207c52..6578095 100644 --- a/sys/fs/hpfs/hpfs_vnops.c +++ b/sys/fs/hpfs/hpfs_vnops.c @@ -87,48 +87,15 @@ hpfs_fsync(ap) struct thread *a_td; } */ *ap; { - struct vnode *vp = ap->a_vp; - int s; - struct buf *bp, *nbp; - /* - * Flush all dirty buffers associated with a vnode. + * Flush our dirty buffers. */ -loop: - VI_LOCK(vp); - s = splbio(); - for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { - nbp = TAILQ_NEXT(bp, b_vnbufs); - VI_UNLOCK(vp); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { - VI_LOCK(vp); - continue; - } - if ((bp->b_flags & B_DELWRI) == 0) - panic("hpfs_fsync: not dirty"); - bremfree(bp); - splx(s); - (void) bwrite(bp); - goto loop; - } - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), PRIBIO + 1, - "hpfsn", 0); - } -#ifdef DIAGNOSTIC - if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { - vprint("hpfs_fsync: dirty", vp); - goto loop; - } -#endif - VI_UNLOCK(vp); - splx(s); + vop_stdfsync(ap); /* * Write out the on-disc version of the vnode. */ - return hpfs_update(VTOHP(vp)); + return hpfs_update(VTOHP(ap->a_vp)); } static int diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 0137920..d368775 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -806,45 +806,12 @@ msdosfs_fsync(ap) struct thread *a_td; } */ *ap; { - struct vnode *vp = ap->a_vp; - int s; - struct buf *bp, *nbp; - /* - * Flush all dirty buffers associated with a vnode. + * Flush our dirty buffers. */ -loop: - s = splbio(); - VI_LOCK(vp); - for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { - nbp = TAILQ_NEXT(bp, b_vnbufs); - VI_UNLOCK(vp); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { - VI_LOCK(vp); - continue; - } - if ((bp->b_flags & B_DELWRI) == 0) - panic("msdosfs_fsync: not dirty"); - bremfree(bp); - splx(s); - /* XXX Could do bawrite */ - (void) bwrite(bp); - goto loop; - } - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - (void) msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), - PRIBIO + 1, "msdosfsn", 0); - } -#ifdef DIAGNOSTIC - if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { - vprint("msdosfs_fsync: dirty", vp); - goto loop; - } -#endif - VI_UNLOCK(vp); - splx(s); - return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT)); + vop_stdfsync(ap); + + return (deupdat(VTODE(ap->a_vp), ap->a_waitfor == MNT_WAIT)); } static int diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index 5c8d73f..1b34ac8 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -416,89 +416,10 @@ spec_fsync(ap) struct thread *a_td; } */ *ap; { - struct vnode *vp = ap->a_vp; - struct buf *bp; - struct buf *nbp; - int s, error = 0; - int maxretry = 100; /* large, arbitrarily chosen */ - - if (!vn_isdisk(vp, NULL)) + if (!vn_isdisk(ap->a_vp, NULL)) return (0); - VI_LOCK(vp); -loop1: - /* - * MARK/SCAN initialization to avoid infinite loops. - */ - s = splbio(); - TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { - bp->b_flags &= ~B_SCANNED; - bp->b_error = 0; - } - splx(s); - - /* - * Flush all dirty buffers associated with a block device. - */ -loop2: - s = splbio(); - for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) { - nbp = TAILQ_NEXT(bp, b_vnbufs); - if ((bp->b_flags & B_SCANNED) != 0) - continue; - VI_UNLOCK(vp); - bp->b_flags |= B_SCANNED; - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { - VI_LOCK(vp); - continue; - } - if ((bp->b_flags & B_DELWRI) == 0) - panic("spec_fsync: not dirty"); - if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) { - BUF_UNLOCK(bp); - vfs_bio_awrite(bp); - splx(s); - } else { - bremfree(bp); - splx(s); - bawrite(bp); - } - VI_LOCK(vp); - goto loop2; - } - - /* - * If synchronous the caller expects us to completely resolve all - * dirty buffers in the system. Wait for in-progress I/O to - * complete (which could include background bitmap writes), then - * retry if dirty blocks still exist. - */ - if (ap->a_waitfor == MNT_WAIT) { - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), - PRIBIO + 1, "spfsyn", 0); - } - if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { - /* - * If we are unable to write any of these buffers - * then we fail now rather than trying endlessly - * to write them out. - */ - TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) - if ((error = bp->b_error) == 0) - continue; - if (error == 0 && --maxretry >= 0) { - splx(s); - goto loop1; - } - vprint("spec_fsync: giving up on dirty", vp); - error = EAGAIN; - } - } - VI_UNLOCK(vp); - splx(s); - return (error); + return (vop_stdfsync(ap)); } /* diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 4c4e14f..2f65898 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -542,6 +542,7 @@ bufinit(void) bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; bp->b_qindex = QUEUE_EMPTY; + bp->b_vflags = 0; bp->b_xflags = 0; LIST_INIT(&bp->b_dep); BUF_LOCKINIT(bp); @@ -1909,6 +1910,7 @@ restart: bp->b_flags = 0; bp->b_ioflags = 0; bp->b_xflags = 0; + bp->b_vflags = 0; bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; @@ -3216,12 +3218,12 @@ bufdone(struct buf *bp) (int) m->pindex, (int)(foff >> 32), (int) foff & 0xffffffff, resid, i); if (!vn_isdisk(vp, NULL)) - printf(" iosize: %ld, lblkno: %jd, flags: 0x%lx, npages: %d\n", + printf(" iosize: %ld, lblkno: %jd, flags: 0x%x, npages: %d\n", bp->b_vp->v_mount->mnt_stat.f_iosize, (intmax_t) bp->b_lblkno, bp->b_flags, bp->b_npages); else - printf(" VDEV, lblkno: %jd, flags: 0x%lx, npages: %d\n", + printf(" VDEV, lblkno: %jd, flags: 0x%x, npages: %d\n", (intmax_t) bp->b_lblkno, bp->b_flags, bp->b_npages); printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n", diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index f629b19..feecc3d 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -708,6 +708,95 @@ vop_stdbmap(ap) return (0); } +int +vop_stdfsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct buf *bp; + struct buf *nbp; + int s, error = 0; + int maxretry = 100; /* large, arbitrarily chosen */ + + VI_LOCK(vp); +loop1: + /* + * MARK/SCAN initialization to avoid infinite loops. + */ + s = splbio(); + TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { + bp->b_vflags &= ~BV_SCANNED; + bp->b_error = 0; + } + splx(s); + + /* + * Flush all dirty buffers associated with a block device. + */ +loop2: + s = splbio(); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if ((bp->b_vflags & BV_SCANNED) != 0) + continue; + bp->b_vflags |= BV_SCANNED; + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) + continue; + VI_UNLOCK(vp); + if ((bp->b_flags & B_DELWRI) == 0) + panic("spec_fsync: not dirty"); + if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) { + BUF_UNLOCK(bp); + vfs_bio_awrite(bp); + splx(s); + } else { + bremfree(bp); + splx(s); + bawrite(bp); + } + VI_LOCK(vp); + goto loop2; + } + + /* + * If synchronous the caller expects us to completely resolve all + * dirty buffers in the system. Wait for in-progress I/O to + * complete (which could include background bitmap writes), then + * retry if dirty blocks still exist. + */ + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_iflag |= VI_BWAIT; + msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), + PRIBIO + 1, "spfsyn", 0); + } + if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { + /* + * If we are unable to write any of these buffers + * then we fail now rather than trying endlessly + * to write them out. + */ + TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) + if ((error = bp->b_error) == 0) + continue; + if (error == 0 && --maxretry >= 0) { + splx(s); + goto loop1; + } + vprint("fsync: giving up on dirty", vp); + error = EAGAIN; + } + } + VI_UNLOCK(vp); + splx(s); + + return (error); +} /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ int vop_stdgetpages(ap) diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 9a8ab3a..ac60adf 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -95,6 +95,11 @@ typedef unsigned char b_xflags_t; * * b_resid. Number of bytes remaining in I/O. After an I/O operation * completes, b_resid is usually 0 indicating 100% success. + * + * All fields are protected by the buffer lock except those marked: + * V - Protected by owning vnode lock + * Q - Protected by the buf queue lock + * D - Protected by an dependency implementation specific lock */ struct buf { /* XXX: b_io must be the first element of struct buf for now /phk */ @@ -122,12 +127,13 @@ struct buf { #ifdef USE_BUFHASH LIST_ENTRY(buf) b_hash; /* Hash chain. */ #endif - TAILQ_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ - struct buf *b_left; /* splay tree link (V) */ - struct buf *b_right; /* splay tree link (V) */ - TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ - long b_flags; /* B_* flags. */ - unsigned short b_qindex; /* buffer queue index */ + TAILQ_ENTRY(buf) b_vnbufs; /* (V) Buffer's associated vnode. */ + struct buf *b_left; /* (V) splay tree link */ + struct buf *b_right; /* (V) splay tree link */ + uint32_t b_vflags; /* (V) BV_* flags */ + TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */ + unsigned short b_qindex; /* (Q) buffer queue index */ + uint32_t b_flags; /* B_* flags. */ b_xflags_t b_xflags; /* extra flags */ struct lock b_lock; /* Buffer lock */ long b_bufsize; /* Allocated buffer size. */ @@ -152,7 +158,7 @@ struct buf { } b_cluster; struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; - struct workhead b_dep; /* List of filesystem dependencies. */ + struct workhead b_dep; /* (D) List of filesystem dependencies. */ }; #define b_spc b_pager.pg_spc @@ -221,7 +227,7 @@ struct buf { #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_NOWDRAIN 0x00000800 /* Avoid wdrain deadlock */ -#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */ +#define B_00001000 0x00001000 /* Available flag. */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ @@ -261,6 +267,8 @@ struct buf { #define NOOFFSET (-1LL) /* No buffer offset calculated yet */ +#define BV_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */ + #ifdef _KERNEL /* * Buffer locking diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 1f8cd78..497188b 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -688,6 +688,7 @@ void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp); int vfs_write_suspend(struct mount *mp); int vop_stdbmap(struct vop_bmap_args *); +int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 6213a49..4cba12f 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -184,7 +184,7 @@ ffs_fsync(ap) VI_LOCK(vp); loop: TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) - bp->b_flags &= ~B_SCANNED; + bp->b_vflags &= ~BV_SCANNED; for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); /* @@ -195,22 +195,21 @@ loop: * it to be redirtied and it has not already been deferred, * or it is already being written. */ - if ((bp->b_flags & B_SCANNED) != 0) + if ((bp->b_vflags & BV_SCANNED) != 0) continue; - bp->b_flags |= B_SCANNED; + bp->b_vflags |= BV_SCANNED; if ((skipmeta == 1 && bp->b_lblkno < 0)) continue; + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) + continue; if (!wait && LIST_FIRST(&bp->b_dep) != NULL && (bp->b_flags & B_DEFERRED) == 0 && buf_countdeps(bp, 0)) { bp->b_flags |= B_DEFERRED; + BUF_UNLOCK(bp); continue; } VI_UNLOCK(vp); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { - VI_LOCK(vp); - continue; - } if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); if (vp != bp->b_vp) -- cgit v1.1