diff options
-rw-r--r-- | sys/fs/cd9660/cd9660_vfsops.c | 3 | ||||
-rw-r--r-- | sys/fs/specfs/spec_vnops.c | 6 | ||||
-rw-r--r-- | sys/gnu/ext2fs/ext2_bmap.c | 24 | ||||
-rw-r--r-- | sys/gnu/fs/ext2fs/ext2_bmap.c | 24 | ||||
-rw-r--r-- | sys/isofs/cd9660/cd9660_vfsops.c | 3 | ||||
-rw-r--r-- | sys/kern/vfs_export.c | 36 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 36 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 14 | ||||
-rw-r--r-- | sys/miscfs/specfs/spec_vnops.c | 6 | ||||
-rw-r--r-- | sys/nfs/nfs_common.c | 3 | ||||
-rw-r--r-- | sys/nfs/nfs_subs.c | 3 | ||||
-rw-r--r-- | sys/nfsclient/nfs_subs.c | 3 | ||||
-rw-r--r-- | sys/nfsserver/nfs_srvsubs.c | 3 | ||||
-rw-r--r-- | sys/sys/buf.h | 5 | ||||
-rw-r--r-- | sys/sys/vnode.h | 5 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 1 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_snapshot.c | 67 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 91 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 31 | ||||
-rw-r--r-- | sys/ufs/mfs/mfs_vfsops.c | 2 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_bmap.c | 24 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 6 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_quota.c | 2 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_vnops.c | 3 |
24 files changed, 298 insertions, 103 deletions
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c index 2b21ba9..50f3883 100644 --- a/sys/fs/cd9660/cd9660_vfsops.c +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -855,7 +855,8 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir) case VCHR: case VBLK: vp->v_op = cd9660_specop_p; - addaliasu(vp, ip->inode.iso_rdev); + vp = addaliasu(vp, ip->inode.iso_rdev); + ip->i_vnode = vp; break; default: break; diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index baf40c3..2b1df9e 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -421,9 +421,11 @@ spec_strategy(ap) bp = ap->a_bp; vp = ap->a_vp; if ((bp->b_iocmd == BIO_WRITE)) { - if (vp->v_mount != NULL && - (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) + if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && + bp->b_vp != NULL && bp->b_vp->v_mount != NULL && + (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("spec_strategy: bad I/O"); + bp->b_flags &= ~B_VALIDSUSPWRT; if (LIST_FIRST(&bp->b_dep) != NULL) buf_start(bp); if ((vp->v_flag & VCOPYONWRITE) && diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c index ab4ac52..40fdd65 100644 --- a/sys/gnu/ext2fs/ext2_bmap.c +++ b/sys/gnu/ext2fs/ext2_bmap.c @@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); - if (*bnp == 0) { + /* + * Since this is FFS independent code, we are out of + * scope for the definitions of BLK_NOCOPY and + * BLK_SNAP, but we do know that they will fall in + * the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts + * are made to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && + ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) { + *bnp = -1; + } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else @@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) if (bp) bqrelse(bp); + /* + * Since this is FFS independent code, we are out of scope for the + * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they + * will fall in the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts are made + * to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ + *bnp = -1; + return (0); + } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c index ab4ac52..40fdd65 100644 --- a/sys/gnu/fs/ext2fs/ext2_bmap.c +++ b/sys/gnu/fs/ext2fs/ext2_bmap.c @@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); - if (*bnp == 0) { + /* + * Since this is FFS independent code, we are out of + * scope for the definitions of BLK_NOCOPY and + * BLK_SNAP, but we do know that they will fall in + * the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts + * are made to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && + ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) { + *bnp = -1; + } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else @@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) if (bp) bqrelse(bp); + /* + * Since this is FFS independent code, we are out of scope for the + * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they + * will fall in the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts are made + * to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ + *bnp = -1; + return (0); + } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c index 2b21ba9..50f3883 100644 --- a/sys/isofs/cd9660/cd9660_vfsops.c +++ b/sys/isofs/cd9660/cd9660_vfsops.c @@ -855,7 +855,8 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir) case VCHR: case VBLK: vp->v_op = cd9660_specop_p; - addaliasu(vp, ip->inode.iso_rdev); + vp = addaliasu(vp, ip->inode.iso_rdev); + ip->i_vnode = vp; break; default: break; diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 0e5ec3f..db16d9f 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -1296,15 +1296,45 @@ bdevvp(dev, vpp) * how many users there are is inadequate; the v_usecount for * the vnodes need to be accumulated. vcount() does that. */ -void +struct vnode * addaliasu(nvp, nvp_rdev) struct vnode *nvp; udev_t nvp_rdev; { + struct vnode *ovp; + vop_t **ops; + dev_t dev; if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addaliasu on non-special vnode"); - addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); + dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); + /* + * Check to see if we have a bdevvp vnode with no associated + * filesystem. If so, we want to associate the filesystem of + * the new newly instigated vnode with the bdevvp vnode and + * discard the newly created vnode rather than leaving the + * bdevvp vnode lying around with no associated filesystem. + */ + if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) { + addalias(nvp, dev); + return (nvp); + } + /* + * Discard unneeded vnode, but save its node specific data. + * Note that if there is a lock, it is carried over in the + * node specific data to the replacement vnode. + */ + vref(ovp); + ovp->v_data = nvp->v_data; + ovp->v_tag = nvp->v_tag; + nvp->v_data = NULL; + ops = nvp->v_op; + nvp->v_op = ovp->v_op; + ovp->v_op = ops; + insmntque(ovp, nvp->v_mount); + vrele(nvp); + vgone(nvp); + return (ovp); } void @@ -1648,7 +1678,7 @@ vclean(vp, flags, p) */ if (flags & DOCLOSE) { if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 0e5ec3f..db16d9f 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1296,15 +1296,45 @@ bdevvp(dev, vpp) * how many users there are is inadequate; the v_usecount for * the vnodes need to be accumulated. vcount() does that. */ -void +struct vnode * addaliasu(nvp, nvp_rdev) struct vnode *nvp; udev_t nvp_rdev; { + struct vnode *ovp; + vop_t **ops; + dev_t dev; if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addaliasu on non-special vnode"); - addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); + dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); + /* + * Check to see if we have a bdevvp vnode with no associated + * filesystem. If so, we want to associate the filesystem of + * the new newly instigated vnode with the bdevvp vnode and + * discard the newly created vnode rather than leaving the + * bdevvp vnode lying around with no associated filesystem. + */ + if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) { + addalias(nvp, dev); + return (nvp); + } + /* + * Discard unneeded vnode, but save its node specific data. + * Note that if there is a lock, it is carried over in the + * node specific data to the replacement vnode. + */ + vref(ovp); + ovp->v_data = nvp->v_data; + ovp->v_tag = nvp->v_tag; + nvp->v_data = NULL; + ops = nvp->v_op; + nvp->v_op = ovp->v_op; + ovp->v_op = ops; + insmntque(ovp, nvp->v_mount); + vrele(nvp); + vgone(nvp); + return (ovp); } void @@ -1648,7 +1678,7 @@ vclean(vp, flags, p) */ if (flags & DOCLOSE) { if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 0708f7c..0c4707b 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -729,17 +729,19 @@ vn_start_write(vp, mpp, flags) * time, these operations are halted until the suspension is over. */ int -vn_write_suspend_wait(vp, flags) +vn_write_suspend_wait(vp, mp, flags) struct vnode *vp; + struct mount *mp; int flags; { - struct mount *mp; int error; - if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { - if (error != EOPNOTSUPP) - return (error); - return (0); + if (vp != NULL) { + if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { + if (error != EOPNOTSUPP) + return (error); + return (0); + } } /* * If we are not suspended or have not yet reached suspended diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index baf40c3..2b1df9e 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -421,9 +421,11 @@ spec_strategy(ap) bp = ap->a_bp; vp = ap->a_vp; if ((bp->b_iocmd == BIO_WRITE)) { - if (vp->v_mount != NULL && - (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) + if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && + bp->b_vp != NULL && bp->b_vp->v_mount != NULL && + (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("spec_strategy: bad I/O"); + bp->b_flags &= ~B_VALIDSUSPWRT; if (LIST_FIRST(&bp->b_dep) != NULL) buf_start(bp); if ((vp->v_flag & VCOPYONWRITE) && diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index 70e871f..5934465 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; - addaliasu(vp, rdev); + vp = addaliasu(vp, rdev); + np->n_vnode = vp; } np->n_mtime = mtime.tv_sec; } diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index 70e871f..5934465 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; - addaliasu(vp, rdev); + vp = addaliasu(vp, rdev); + np->n_vnode = vp; } np->n_mtime = mtime.tv_sec; } diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 70e871f..5934465 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; - addaliasu(vp, rdev); + vp = addaliasu(vp, rdev); + np->n_vnode = vp; } np->n_mtime = mtime.tv_sec; } diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index 70e871f..5934465 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -1282,7 +1282,8 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; - addaliasu(vp, rdev); + vp = addaliasu(vp, rdev); + np->n_vnode = vp; } np->n_mtime = mtime.tv_sec; } diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 116e011..4cb2bba 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -183,7 +183,7 @@ struct buf { #define B_UNUSED0 0x00000008 /* Old B_BAD */ #define B_DEFERRED 0x00000010 /* Skipped over for cleaning */ #define B_CACHE 0x00000020 /* Bread found us in the cache. */ -#define B_UNUSED40 0x00000040 /* Old B_CALL */ +#define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ @@ -237,7 +237,7 @@ extern char *buf_wmesg; /* Default buffer lock message */ * Initialize a lock. */ #define BUF_LOCKINIT(bp) \ - lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0) + lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_CANRECURSE) /* * * Get a lock sleeping non-interruptably until it becomes available. @@ -467,6 +467,7 @@ buf_countdeps(struct buf *bp, int i) #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ #define B_SYNC 0x02 /* Do all allocations synchronously. */ #define B_METAONLY 0x04 /* Return indirect block buffer. */ +#define B_NOWAIT 0x08 /* do not sleep to await lock */ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 3da7897..819681c 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -537,7 +537,7 @@ struct vop_bwrite_args; extern int (*lease_check_hook) __P((struct vop_lease_args *)); void addalias __P((struct vnode *vp, dev_t nvp_rdev)); -void addaliasu __P((struct vnode *vp, udev_t nvp_rdev)); +struct vnode *addaliasu __P((struct vnode *vp, udev_t nvp_rdev)); int bdevvp __P((dev_t dev, struct vnode **vpp)); /* cache_* may belong in namei.h. */ void cache_enter __P((struct vnode *dvp, struct vnode *vp, @@ -593,7 +593,8 @@ int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); int vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags)); dev_t vn_todev __P((struct vnode *vp)); -int vn_write_suspend_wait __P((struct vnode *vp, int flags)); +int vn_write_suspend_wait __P((struct vnode *vp, struct mount *mp, + int flags)); int vn_writechk __P((struct vnode *vp)); int vfs_cache_lookup __P((struct vop_lookup_args *ap)); int vfs_object_create __P((struct vnode *vp, struct proc *p, diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 8e011bb..1d52ec7 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -116,6 +116,7 @@ extern vop_t **ffs_fifoop_p; void softdep_initialize __P((void)); int softdep_mount __P((struct vnode *, struct mount *, struct fs *, struct ucred *)); +int softdep_flushworklist __P((struct mount *, int *, struct proc *)); int softdep_flushfiles __P((struct mount *, int, struct proc *)); void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); void softdep_load_inodeblock __P((struct inode *)); diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index d749abe..af03143 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)ffs_snapshot.c 8.10 (McKusick) 7/11/00 + * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 * $FreeBSD$ */ @@ -290,6 +290,7 @@ restart: if (fs->fs_cgsize < fs->fs_bsize) bzero(&nbp->b_data[fs->fs_cgsize], fs->fs_bsize - fs->fs_cgsize); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); base = cg * fs->fs_fpg / fs->fs_frag; if (base + len > numblks) @@ -311,6 +312,7 @@ restart: indiroff = (base + loc - NDADDR) % NINDIR(fs); for ( ; loc < len; loc++, indiroff++) { if (indiroff >= NINDIR(fs)) { + ibp->b_flags |= B_VALIDSUSPWRT; bawrite(ibp); error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), @@ -325,7 +327,8 @@ restart: continue; ((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; } - brelse(bp); + bqrelse(bp); + ibp->b_flags |= B_VALIDSUSPWRT; bdwrite(ibp); } /* @@ -340,6 +343,7 @@ restart: if (fs->fs_sbsize < fs->fs_bsize) bzero(&nbp->b_data[fs->fs_sbsize], fs->fs_bsize - fs->fs_sbsize); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); blkno = fragstoblks(fs, fs->fs_csaddr); len = howmany(fs->fs_cssize, fs->fs_bsize) - 1; @@ -354,6 +358,7 @@ restart: size = fs->fs_cssize % fs->fs_bsize; } bcopy(fs->fs_csp[loc], nbp->b_data, size); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); } /* @@ -366,6 +371,7 @@ restart: if (error) goto out1; readblock(nbp, inoblks[loc]); + nbp->b_flags |= B_VALIDSUSPWRT; bdwrite(nbp); } /* @@ -410,6 +416,7 @@ restart: dip->di_blocks = 0; dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT); bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t)); + nbp->b_flags |= B_VALIDSUSPWRT; bdwrite(nbp); } /* @@ -422,7 +429,7 @@ restart: if (error) goto out1; copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno)); - brelse(ibp); + bqrelse(ibp); error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno), fs->fs_bsize, p->p_ucred, 0, &nbp); if (error) @@ -434,7 +441,8 @@ restart: goto out1; } bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize); - brelse(ibp); + bqrelse(ibp); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); } /* @@ -518,7 +526,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir) } else { MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); - brelse(bp); + bqrelse(bp); } error = snapacct(snapvp, &bap[0], &bap[last]); if (error || level == 0) @@ -539,7 +547,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir) } out: if (snapvp != cancelvp) - brelse(bp); + bqrelse(bp); else FREE(bap, M_DEVBUF); return (error); @@ -578,8 +586,10 @@ snapacct(vp, oldblkp, lastblkp) if (*blkp != 0) panic("snapacct: bad block"); *blkp = BLK_SNAP; - if (lbn >= NDADDR) + if (lbn >= NDADDR) { + ibp->b_flags |= B_VALIDSUSPWRT; bdwrite(ibp); + } } return (0); } @@ -732,7 +742,7 @@ ffs_snapblkfree(freeip, bno, size) default: case BLK_NOCOPY: if (lbn >= NDADDR) - brelse(ibp); + bqrelse(ibp); continue; /* * No previous snapshot claimed the block, so it will be @@ -787,7 +797,7 @@ ffs_snapblkfree(freeip, bno, size) return (1); } if (lbn >= NDADDR) - brelse(ibp); + bqrelse(ibp); /* * Allocate the block into which to do the copy. Note that this * allocation will never require any additional allocations for @@ -933,40 +943,57 @@ ffs_copyonwrite(ap) if (bp->b_vp == vp) continue; /* - * Check to see if block needs to be copied. + * Check to see if block needs to be copied. We have to + * be able to do the VOP_BALLOC without blocking, otherwise + * we may get in a deadlock with another process also + * trying to allocate. If we find outselves unable to + * get the buffer lock, we unlock the snapshot vnode, + * sleep briefly, and try again. */ +retry: + vn_lock(vp, LK_SHARED | LK_RETRY, p); if (lbn < NDADDR) { blkno = ip->i_db[lbn]; } else { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); p->p_flag |= P_COWINPROGRESS; error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, B_METAONLY, &ibp); + fs->fs_bsize, KERNCRED, B_METAONLY | B_NOWAIT, &ibp); p->p_flag &= ~P_COWINPROGRESS; - VOP_UNLOCK(vp, 0, p); - if (error) - break; + if (error) { + VOP_UNLOCK(vp, 0, p); + if (error != EWOULDBLOCK) + break; + tsleep(vp, p->p_usrpri, "nap", 1); + goto retry; + } indiroff = (lbn - NDADDR) % NINDIR(fs); blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff]; - brelse(ibp); + bqrelse(ibp); } #ifdef DIAGNOSTIC if (blkno == BLK_SNAP && bp->b_lblkno >= 0) panic("ffs_copyonwrite: bad copy block"); #endif - if (blkno != 0) + if (blkno != 0) { + VOP_UNLOCK(vp, 0, p); continue; + } /* * Allocate the block into which to do the copy. Note that this * allocation will never require any additional allocations for * the snapshot inode. */ - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); p->p_flag |= P_COWINPROGRESS; error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &cbp); + fs->fs_bsize, KERNCRED, B_NOWAIT, &cbp); p->p_flag &= ~P_COWINPROGRESS; VOP_UNLOCK(vp, 0, p); + if (error) { + if (error != EWOULDBLOCK) + break; + tsleep(vp, p->p_usrpri, "nap", 1); + goto retry; + } #ifdef DEBUG if (snapdebug) { printf("Copyonwrite: snapino %d lbn %d for ", @@ -979,8 +1006,6 @@ ffs_copyonwrite(ap) cbp->b_blkno); } #endif - if (error) - break; /* * If we have already read the old block contents, then * simply copy them to the new block. diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index d9e6414..cbc37ad 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -548,41 +548,45 @@ softdep_process_worklist(matchmnt) case D_DIRREM: /* removal of a directory entry */ mp = WK_DIRREM(wk)->dm_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: dirrem on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_remove(WK_DIRREM(wk)); - vn_finished_write(mp); break; case D_FREEBLKS: /* releasing blocks and/or fragments from a file */ mp = WK_FREEBLKS(wk)->fb_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freeblks on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freeblocks(WK_FREEBLKS(wk)); - vn_finished_write(mp); break; case D_FREEFRAG: /* releasing a fragment when replaced as a file grows */ mp = WK_FREEFRAG(wk)->ff_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freefrag on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freefrag(WK_FREEFRAG(wk)); - vn_finished_write(mp); break; case D_FREEFILE: /* releasing an inode when its link count drops to 0 */ mp = WK_FREEFILE(wk)->fx_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freefile on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freefile(WK_FREEFILE(wk)); - vn_finished_write(mp); break; default: @@ -646,13 +650,13 @@ softdep_move_dependencies(oldbp, newbp) * Purge the work list of all items associated with a particular mount point. */ int -softdep_flushfiles(oldmnt, flags, p) +softdep_flushworklist(oldmnt, countp, p) struct mount *oldmnt; - int flags; + int *countp; struct proc *p; { struct vnode *devvp; - int error, loopcnt; + int count, error = 0; /* * Await our turn to clear out the queue. @@ -660,32 +664,16 @@ softdep_flushfiles(oldmnt, flags, p) while (softdep_worklist_busy) tsleep(&lbolt, PRIBIO, "softflush", 0); softdep_worklist_busy = 1; - if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) { - softdep_worklist_busy = 0; - return (error); - } /* * Alternately flush the block device associated with the mount * point and process any dependencies that the flushing - * creates. In theory, this loop can happen at most twice, - * but we give it a few extra just to be sure. + * creates. We continue until no more worklist dependencies + * are found. */ + *countp = 0; devvp = VFSTOUFS(oldmnt)->um_devvp; - for (loopcnt = 10; loopcnt > 0; ) { - if (softdep_process_worklist(oldmnt) == 0) { - loopcnt--; - /* - * Do another flush in case any vnodes were brought in - * as part of the cleanup operations. - */ - if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) - break; - /* - * If we still found nothing to do, we are really done. - */ - if (softdep_process_worklist(oldmnt) == 0) - break; - } + while ((count = softdep_process_worklist(oldmnt)) > 0) { + *countp += count; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p); VOP_UNLOCK(devvp, 0, p); @@ -693,6 +681,37 @@ softdep_flushfiles(oldmnt, flags, p) break; } softdep_worklist_busy = 0; + return (error); +} + +/* + * Flush all vnodes and worklist items associated with a specified mount point. + */ +int +softdep_flushfiles(oldmnt, flags, p) + struct mount *oldmnt; + int flags; + struct proc *p; +{ + int error, count, loopcnt; + + /* + * Alternately flush the vnodes associated with the mount + * point and process any dependencies that the flushing + * creates. In theory, this loop can happen at most twice, + * but we give it a few extra just to be sure. + */ + for (loopcnt = 10; loopcnt > 0; loopcnt--) { + /* + * Do another flush in case any vnodes were brought in + * as part of the cleanup operations. + */ + if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) + break; + if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 || + count == 0) + break; + } /* * If we are unmounting then it is an error to fail. If we * are simply trying to downgrade to read-only, then filesystem @@ -4432,8 +4451,8 @@ clear_remove(p) mp = pagedep->pd_mnt; ino = pagedep->pd_ino; FREE_LOCK(&lk); - if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0) - return; + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) + continue; if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_remove: vget", error); vn_finished_write(mp); @@ -4503,8 +4522,8 @@ clear_inodedeps(p) if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) continue; FREE_LOCK(&lk); - if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0) - return; + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) + continue; if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_inodedeps: vget", error); vn_finished_write(mp); diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index cf0e220..c40be45 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -908,7 +908,7 @@ ffs_sync(mp, waitfor, cred, p) struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; - int error, allerror = 0; + int error, count, wait, lockreq, allerror = 0; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ @@ -918,6 +918,12 @@ ffs_sync(mp, waitfor, cred, p) /* * Write back each (modified) inode. */ + wait = 0; + lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK; + if (waitfor == MNT_WAIT) { + wait = 1; + lockreq = LK_EXCLUSIVE | LK_INTERLOCK; + } simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { @@ -938,9 +944,7 @@ loop: } if (vp->v_type != VCHR) { simple_unlock(&mntvnode_slock); - error = - vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { + if ((error = vget(vp, lockreq, p)) != 0) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; @@ -948,14 +952,12 @@ loop: } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; - VOP_UNLOCK(vp, 0, p); - vrele(vp); + vput(vp); simple_lock(&mntvnode_slock); } else { simple_unlock(&mntvnode_slock); simple_unlock(&vp->v_interlock); - /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ - UFS_UPDATE(vp, 0); + UFS_UPDATE(vp, wait); simple_lock(&mntvnode_slock); } } @@ -963,9 +965,16 @@ loop: /* * Force stale file system control information to be flushed. */ - if (waitfor != MNT_LAZY) { - if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) - waitfor = MNT_NOWAIT; + if (waitfor == MNT_WAIT) { + if ((error = softdep_flushworklist(ump->um_mountp, &count, p))) + allerror = error; + /* Flushed work items may create new vnodes to clean */ + if (count) { + simple_lock(&mntvnode_slock); + goto loop; + } + } + if (waitfor == MNT_NOWAIT) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) allerror = error; diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c index 5c5e010..429f029 100644 --- a/sys/ufs/mfs/mfs_vfsops.c +++ b/sys/ufs/mfs/mfs_vfsops.c @@ -248,7 +248,7 @@ mfs_mount(mp, path, data, ndp, p) /* It is not clear that these will get initialized otherwise */ dev->si_bsize_phys = DEV_BSIZE; dev->si_iosize_max = DFLTPHYS; - addaliasu(devvp, makeudev(253, mfs_minor++)); + devvp = addaliasu(devvp, makeudev(253, mfs_minor++)); devvp->v_data = mfsp; mfsp->mfs_baseoff = args.base; mfsp->mfs_size = args.size; diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index ab4ac52..40fdd65 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); - if (*bnp == 0) { + /* + * Since this is FFS independent code, we are out of + * scope for the definitions of BLK_NOCOPY and + * BLK_SNAP, but we do know that they will fall in + * the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts + * are made to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && + ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) { + *bnp = -1; + } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else @@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) if (bp) bqrelse(bp); + /* + * Since this is FFS independent code, we are out of scope for the + * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they + * will fall in the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts are made + * to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ + *bnp = -1; + return (0); + } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 485a6d2..b700fd3 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -77,7 +77,7 @@ ufs_inactive(ap) if (ip->i_mode == 0) goto out; if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, 0); @@ -94,10 +94,10 @@ ufs_inactive(ap) } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - vn_write_suspend_wait(vp, V_NOWAIT)) { + vn_write_suspend_wait(vp, NULL, V_NOWAIT)) { ip->i_flag &= ~IN_ACCESS; } else { - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); UFS_UPDATE(vp, 0); } } diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 6396f67..19b3dad 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -898,7 +898,7 @@ dqsync(vp, dq) return (0); if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); - (void) vn_write_suspend_wait(dqvp, V_WAIT); + (void) vn_write_suspend_wait(dqvp, NULL, V_WAIT); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); while (dq->dq_flags & DQ_LOCK) { diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index d97568c..0fac626 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -2036,7 +2036,8 @@ ufs_vinit(mntp, specops, fifoops, vpp) case VCHR: case VBLK: vp->v_op = specops; - addaliasu(vp, ip->i_rdev); + vp = addaliasu(vp, ip->i_rdev); + ip->i_vnode = vp; break; case VFIFO: vp->v_op = fifoops; |