diff options
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 1 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_snapshot.c | 67 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 91 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 31 | ||||
-rw-r--r-- | sys/ufs/mfs/mfs_vfsops.c | 2 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_bmap.c | 24 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 6 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_quota.c | 2 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_vnops.c | 3 |
9 files changed, 152 insertions, 75 deletions
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 8e011bb..1d52ec7 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -116,6 +116,7 @@ extern vop_t **ffs_fifoop_p; void softdep_initialize __P((void)); int softdep_mount __P((struct vnode *, struct mount *, struct fs *, struct ucred *)); +int softdep_flushworklist __P((struct mount *, int *, struct proc *)); int softdep_flushfiles __P((struct mount *, int, struct proc *)); void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); void softdep_load_inodeblock __P((struct inode *)); diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index d749abe..af03143 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)ffs_snapshot.c 8.10 (McKusick) 7/11/00 + * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 * $FreeBSD$ */ @@ -290,6 +290,7 @@ restart: if (fs->fs_cgsize < fs->fs_bsize) bzero(&nbp->b_data[fs->fs_cgsize], fs->fs_bsize - fs->fs_cgsize); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); base = cg * fs->fs_fpg / fs->fs_frag; if (base + len > numblks) @@ -311,6 +312,7 @@ restart: indiroff = (base + loc - NDADDR) % NINDIR(fs); for ( ; loc < len; loc++, indiroff++) { if (indiroff >= NINDIR(fs)) { + ibp->b_flags |= B_VALIDSUSPWRT; bawrite(ibp); error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), @@ -325,7 +327,8 @@ restart: continue; ((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; } - brelse(bp); + bqrelse(bp); + ibp->b_flags |= B_VALIDSUSPWRT; bdwrite(ibp); } /* @@ -340,6 +343,7 @@ restart: if (fs->fs_sbsize < fs->fs_bsize) bzero(&nbp->b_data[fs->fs_sbsize], fs->fs_bsize - fs->fs_sbsize); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); blkno = fragstoblks(fs, fs->fs_csaddr); len = howmany(fs->fs_cssize, fs->fs_bsize) - 1; @@ -354,6 +358,7 @@ restart: size = fs->fs_cssize % fs->fs_bsize; } bcopy(fs->fs_csp[loc], nbp->b_data, size); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); } /* @@ -366,6 +371,7 @@ restart: if (error) goto out1; readblock(nbp, inoblks[loc]); + nbp->b_flags |= B_VALIDSUSPWRT; bdwrite(nbp); } /* @@ -410,6 +416,7 @@ restart: dip->di_blocks = 0; dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT); bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t)); + nbp->b_flags |= B_VALIDSUSPWRT; bdwrite(nbp); } /* @@ -422,7 +429,7 @@ restart: if (error) goto out1; copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno)); - brelse(ibp); + bqrelse(ibp); error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno), fs->fs_bsize, p->p_ucred, 0, &nbp); if (error) @@ -434,7 +441,8 @@ restart: goto out1; } bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize); - brelse(ibp); + bqrelse(ibp); + nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); } /* @@ -518,7 +526,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir) } else { MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); - brelse(bp); + bqrelse(bp); } error = snapacct(snapvp, &bap[0], &bap[last]); if (error || level == 0) @@ -539,7 +547,7 @@ indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir) } out: if (snapvp != cancelvp) - brelse(bp); + bqrelse(bp); else FREE(bap, M_DEVBUF); return (error); @@ -578,8 +586,10 @@ snapacct(vp, oldblkp, lastblkp) if (*blkp != 0) panic("snapacct: bad block"); *blkp = BLK_SNAP; - if (lbn >= NDADDR) + if (lbn >= NDADDR) { + ibp->b_flags |= B_VALIDSUSPWRT; bdwrite(ibp); + } } return (0); } @@ -732,7 +742,7 @@ ffs_snapblkfree(freeip, bno, size) default: case BLK_NOCOPY: if (lbn >= NDADDR) - brelse(ibp); + bqrelse(ibp); continue; /* * No previous snapshot claimed the block, so it will be @@ -787,7 +797,7 @@ ffs_snapblkfree(freeip, bno, size) return (1); } if (lbn >= NDADDR) - brelse(ibp); + bqrelse(ibp); /* * Allocate the block into which to do the copy. Note that this * allocation will never require any additional allocations for @@ -933,40 +943,57 @@ ffs_copyonwrite(ap) if (bp->b_vp == vp) continue; /* - * Check to see if block needs to be copied. + * Check to see if block needs to be copied. We have to + * be able to do the VOP_BALLOC without blocking, otherwise + * we may get in a deadlock with another process also + * trying to allocate. If we find outselves unable to + * get the buffer lock, we unlock the snapshot vnode, + * sleep briefly, and try again. */ +retry: + vn_lock(vp, LK_SHARED | LK_RETRY, p); if (lbn < NDADDR) { blkno = ip->i_db[lbn]; } else { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); p->p_flag |= P_COWINPROGRESS; error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, B_METAONLY, &ibp); + fs->fs_bsize, KERNCRED, B_METAONLY | B_NOWAIT, &ibp); p->p_flag &= ~P_COWINPROGRESS; - VOP_UNLOCK(vp, 0, p); - if (error) - break; + if (error) { + VOP_UNLOCK(vp, 0, p); + if (error != EWOULDBLOCK) + break; + tsleep(vp, p->p_usrpri, "nap", 1); + goto retry; + } indiroff = (lbn - NDADDR) % NINDIR(fs); blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff]; - brelse(ibp); + bqrelse(ibp); } #ifdef DIAGNOSTIC if (blkno == BLK_SNAP && bp->b_lblkno >= 0) panic("ffs_copyonwrite: bad copy block"); #endif - if (blkno != 0) + if (blkno != 0) { + VOP_UNLOCK(vp, 0, p); continue; + } /* * Allocate the block into which to do the copy. Note that this * allocation will never require any additional allocations for * the snapshot inode. */ - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); p->p_flag |= P_COWINPROGRESS; error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &cbp); + fs->fs_bsize, KERNCRED, B_NOWAIT, &cbp); p->p_flag &= ~P_COWINPROGRESS; VOP_UNLOCK(vp, 0, p); + if (error) { + if (error != EWOULDBLOCK) + break; + tsleep(vp, p->p_usrpri, "nap", 1); + goto retry; + } #ifdef DEBUG if (snapdebug) { printf("Copyonwrite: snapino %d lbn %d for ", @@ -979,8 +1006,6 @@ ffs_copyonwrite(ap) cbp->b_blkno); } #endif - if (error) - break; /* * If we have already read the old block contents, then * simply copy them to the new block. diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index d9e6414..cbc37ad 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -548,41 +548,45 @@ softdep_process_worklist(matchmnt) case D_DIRREM: /* removal of a directory entry */ mp = WK_DIRREM(wk)->dm_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: dirrem on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_remove(WK_DIRREM(wk)); - vn_finished_write(mp); break; case D_FREEBLKS: /* releasing blocks and/or fragments from a file */ mp = WK_FREEBLKS(wk)->fb_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freeblks on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freeblocks(WK_FREEBLKS(wk)); - vn_finished_write(mp); break; case D_FREEFRAG: /* releasing a fragment when replaced as a file grows */ mp = WK_FREEFRAG(wk)->ff_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freefrag on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freefrag(WK_FREEFRAG(wk)); - vn_finished_write(mp); break; case D_FREEFILE: /* releasing an inode when its link count drops to 0 */ mp = WK_FREEFILE(wk)->fx_mnt; + if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) + panic("%s: freefile on suspended filesystem", + "softdep_process_worklist"); if (mp == matchmnt) matchcnt += 1; - vn_start_write(NULL, &mp, V_WAIT); handle_workitem_freefile(WK_FREEFILE(wk)); - vn_finished_write(mp); break; default: @@ -646,13 +650,13 @@ softdep_move_dependencies(oldbp, newbp) * Purge the work list of all items associated with a particular mount point. */ int -softdep_flushfiles(oldmnt, flags, p) +softdep_flushworklist(oldmnt, countp, p) struct mount *oldmnt; - int flags; + int *countp; struct proc *p; { struct vnode *devvp; - int error, loopcnt; + int count, error = 0; /* * Await our turn to clear out the queue. @@ -660,32 +664,16 @@ softdep_flushfiles(oldmnt, flags, p) while (softdep_worklist_busy) tsleep(&lbolt, PRIBIO, "softflush", 0); softdep_worklist_busy = 1; - if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) { - softdep_worklist_busy = 0; - return (error); - } /* * Alternately flush the block device associated with the mount * point and process any dependencies that the flushing - * creates. In theory, this loop can happen at most twice, - * but we give it a few extra just to be sure. + * creates. We continue until no more worklist dependencies + * are found. */ + *countp = 0; devvp = VFSTOUFS(oldmnt)->um_devvp; - for (loopcnt = 10; loopcnt > 0; ) { - if (softdep_process_worklist(oldmnt) == 0) { - loopcnt--; - /* - * Do another flush in case any vnodes were brought in - * as part of the cleanup operations. - */ - if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) - break; - /* - * If we still found nothing to do, we are really done. - */ - if (softdep_process_worklist(oldmnt) == 0) - break; - } + while ((count = softdep_process_worklist(oldmnt)) > 0) { + *countp += count; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p); VOP_UNLOCK(devvp, 0, p); @@ -693,6 +681,37 @@ softdep_flushfiles(oldmnt, flags, p) break; } softdep_worklist_busy = 0; + return (error); +} + +/* + * Flush all vnodes and worklist items associated with a specified mount point. + */ +int +softdep_flushfiles(oldmnt, flags, p) + struct mount *oldmnt; + int flags; + struct proc *p; +{ + int error, count, loopcnt; + + /* + * Alternately flush the vnodes associated with the mount + * point and process any dependencies that the flushing + * creates. In theory, this loop can happen at most twice, + * but we give it a few extra just to be sure. + */ + for (loopcnt = 10; loopcnt > 0; loopcnt--) { + /* + * Do another flush in case any vnodes were brought in + * as part of the cleanup operations. + */ + if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) + break; + if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 || + count == 0) + break; + } /* * If we are unmounting then it is an error to fail. If we * are simply trying to downgrade to read-only, then filesystem @@ -4432,8 +4451,8 @@ clear_remove(p) mp = pagedep->pd_mnt; ino = pagedep->pd_ino; FREE_LOCK(&lk); - if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0) - return; + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) + continue; if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_remove: vget", error); vn_finished_write(mp); @@ -4503,8 +4522,8 @@ clear_inodedeps(p) if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) continue; FREE_LOCK(&lk); - if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0) - return; + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) + continue; if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_inodedeps: vget", error); vn_finished_write(mp); diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index cf0e220..c40be45 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -908,7 +908,7 @@ ffs_sync(mp, waitfor, cred, p) struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; - int error, allerror = 0; + int error, count, wait, lockreq, allerror = 0; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ @@ -918,6 +918,12 @@ ffs_sync(mp, waitfor, cred, p) /* * Write back each (modified) inode. */ + wait = 0; + lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK; + if (waitfor == MNT_WAIT) { + wait = 1; + lockreq = LK_EXCLUSIVE | LK_INTERLOCK; + } simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { @@ -938,9 +944,7 @@ loop: } if (vp->v_type != VCHR) { simple_unlock(&mntvnode_slock); - error = - vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { + if ((error = vget(vp, lockreq, p)) != 0) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; @@ -948,14 +952,12 @@ loop: } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; - VOP_UNLOCK(vp, 0, p); - vrele(vp); + vput(vp); simple_lock(&mntvnode_slock); } else { simple_unlock(&mntvnode_slock); simple_unlock(&vp->v_interlock); - /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ - UFS_UPDATE(vp, 0); + UFS_UPDATE(vp, wait); simple_lock(&mntvnode_slock); } } @@ -963,9 +965,16 @@ loop: /* * Force stale file system control information to be flushed. */ - if (waitfor != MNT_LAZY) { - if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) - waitfor = MNT_NOWAIT; + if (waitfor == MNT_WAIT) { + if ((error = softdep_flushworklist(ump->um_mountp, &count, p))) + allerror = error; + /* Flushed work items may create new vnodes to clean */ + if (count) { + simple_lock(&mntvnode_slock); + goto loop; + } + } + if (waitfor == MNT_NOWAIT) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) allerror = error; diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c index 5c5e010..429f029 100644 --- a/sys/ufs/mfs/mfs_vfsops.c +++ b/sys/ufs/mfs/mfs_vfsops.c @@ -248,7 +248,7 @@ mfs_mount(mp, path, data, ndp, p) /* It is not clear that these will get initialized otherwise */ dev->si_bsize_phys = DEV_BSIZE; dev->si_iosize_max = DFLTPHYS; - addaliasu(devvp, makeudev(253, mfs_minor++)); + devvp = addaliasu(devvp, makeudev(253, mfs_minor++)); devvp->v_data = mfsp; mfsp->mfs_baseoff = args.base; mfsp->mfs_size = args.size; diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index ab4ac52..40fdd65 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -147,7 +147,18 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); - if (*bnp == 0) { + /* + * Since this is FFS independent code, we are out of + * scope for the definitions of BLK_NOCOPY and + * BLK_SNAP, but we do know that they will fall in + * the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts + * are made to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && + ip->i_db[bn] > 0 && ip->i_db[bn] < ump->um_seqinc) { + *bnp = -1; + } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else @@ -230,6 +241,17 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) if (bp) bqrelse(bp); + /* + * Since this is FFS independent code, we are out of scope for the + * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they + * will fall in the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts are made + * to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ + *bnp = -1; + return (0); + } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 485a6d2..b700fd3 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -77,7 +77,7 @@ ufs_inactive(ap) if (ip->i_mode == 0) goto out; if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, 0); @@ -94,10 +94,10 @@ ufs_inactive(ap) } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - vn_write_suspend_wait(vp, V_NOWAIT)) { + vn_write_suspend_wait(vp, NULL, V_NOWAIT)) { ip->i_flag &= ~IN_ACCESS; } else { - (void) vn_write_suspend_wait(vp, V_WAIT); + (void) vn_write_suspend_wait(vp, NULL, V_WAIT); UFS_UPDATE(vp, 0); } } diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 6396f67..19b3dad 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -898,7 +898,7 @@ dqsync(vp, dq) return (0); if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); - (void) vn_write_suspend_wait(dqvp, V_WAIT); + (void) vn_write_suspend_wait(dqvp, NULL, V_WAIT); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); while (dq->dq_flags & DQ_LOCK) { diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index d97568c..0fac626 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -2036,7 +2036,8 @@ ufs_vinit(mntp, specops, fifoops, vpp) case VCHR: case VBLK: vp->v_op = specops; - addaliasu(vp, ip->i_rdev); + vp = addaliasu(vp, ip->i_rdev); + ip->i_vnode = vp; break; case VFIFO: vp->v_op = fifoops; |