summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2001-04-25 08:11:18 +0000
committermckusick <mckusick@FreeBSD.org>2001-04-25 08:11:18 +0000
commitf863141979de490194a5cb124d97ed1e3a000632 (patch)
tree0ea574f62c45c0ac1caa946e11d97f05e3fd2b80 /sys/ufs
parentfa10273007fa4451f879e6b720eb020c082d4453 (diff)
downloadFreeBSD-src-f863141979de490194a5cb124d97ed1e3a000632.zip
FreeBSD-src-f863141979de490194a5cb124d97ed1e3a000632.tar.gz
When closing the last reference to an unlinked file, it is freed
by the inactive routine. Because the freeing causes the filesystem to be modified, the close must be held up during periods when the filesystem is suspended. For snapshots to be consistent across crashes, they must write blocks that they copy and claim those written blocks in their on-disk block pointers before the old blocks that they referenced can be allowed to be written. Close a loophole that allowed unwritten blocks to be skipped when doing ffs_sync with a request to wait for all I/O activity to be completed.
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c83
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c21
-rw-r--r--sys/ufs/ufs/ufs_vnops.c26
3 files changed, 104 insertions, 26 deletions
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 2b09162..0e1e68f 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -198,10 +198,14 @@ restart:
}
/*
* Allocate shadow blocks to copy all of the other snapshot inodes
- * so that we will be able to expunge them from this snapshot.
+ * so that we will be able to expunge them from this snapshot. Also
+ * include a copy of ourselves so that we do not deadlock trying
+ * to copyonwrite ourselves when VOP_FSYNC'ing below.
*/
- for (loc = 0, inoblkcnt = 0; loc < snaploc; loc++) {
+ fs->fs_snapinum[snaploc] = ip->i_number;
+ for (loc = snaploc, inoblkcnt = 0; loc >= 0; loc--) {
blkno = fragstoblks(fs, ino_to_fsba(fs, fs->fs_snapinum[loc]));
+ fs->fs_snapinum[snaploc] = 0;
for (i = 0; i < inoblkcnt; i++)
if (inoblks[i] == blkno)
break;
@@ -652,14 +656,14 @@ ffs_snapremove(vp)
ip = VTOI(vp);
fs = ip->i_fs;
/*
- * Delete from incore list.
+ * If active, delete from incore list (this snapshot may
+ * already have been in the process of being deleted, so
+ * would not have been active).
+ *
* Clear copy-on-write flag if last snapshot.
*/
- devvp = ip->i_devvp;
- if (ip->i_nextsnap.tqe_prev == 0) {
- printf("ffs_snapremove: lost snapshot vnode %d\n",
- ip->i_number);
- } else {
+ if (ip->i_nextsnap.tqe_prev != 0) {
+ devvp = ip->i_devvp;
TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap);
ip->i_nextsnap.tqe_prev = 0;
if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) == 0) {
@@ -832,9 +836,10 @@ ffs_snapblkfree(freeip, bno, size)
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
fs->fs_bsize, KERNCRED, 0, &cbp);
p->p_flag &= ~P_COWINPROGRESS;
- VOP_UNLOCK(vp, 0, p);
- if (error)
+ if (error) {
+ VOP_UNLOCK(vp, 0, p);
break;
+ }
#ifdef DEBUG
if (snapdebug)
printf("%s%d lbn %d for inum %d size %ld to blkno %d\n",
@@ -843,22 +848,44 @@ ffs_snapblkfree(freeip, bno, size)
#endif
/*
* If we have already read the old block contents, then
- * simply copy them to the new block.
+ * simply copy them to the new block. Note that we need
+ * to synchronously write snapshots that have not been
+ * unlinked, and hence will be visible after a crash,
+ * to ensure their integrity.
*/
if (savedcbp != 0) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
+ if (ip->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
continue;
}
/*
* Otherwise, read the old block contents into the buffer.
*/
- if ((error = readblock(cbp, lbn)) != 0)
+ if ((error = readblock(cbp, lbn)) != 0) {
+ bzero(cbp->b_data, fs->fs_bsize);
+ bawrite(cbp);
+ if (ip->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
break;
+ }
savedcbp = cbp;
}
- if (savedcbp)
+ /*
+ * Note that we need to synchronously write snapshots that
+ * have not been unlinked, and hence will be visible after
+ * a crash, to ensure their integrity.
+ */
+ if (savedcbp) {
+ vp = savedcbp->b_vp;
bawrite(savedcbp);
+ if (VTOI(vp)->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
+ }
/*
* If we have been unable to allocate a block in which to do
* the copy, then return non-zero so that the fragment will
@@ -1014,8 +1041,8 @@ retry:
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
fs->fs_bsize, KERNCRED, B_NOWAIT, &cbp);
p->p_flag &= ~P_COWINPROGRESS;
- VOP_UNLOCK(vp, 0, p);
if (error) {
+ VOP_UNLOCK(vp, 0, p);
if (error != EWOULDBLOCK)
break;
tsleep(vp, p->p_pri.pri_user, "nap", 1);
@@ -1035,22 +1062,44 @@ retry:
#endif
/*
* If we have already read the old block contents, then
- * simply copy them to the new block.
+ * simply copy them to the new block. Note that we need
+ * to synchronously write snapshots that have not been
+ * unlinked, and hence will be visible after a crash,
+ * to ensure their integrity.
*/
if (savedcbp != 0) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
+ if (ip->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
continue;
}
/*
* Otherwise, read the old block contents into the buffer.
*/
- if ((error = readblock(cbp, lbn)) != 0)
+ if ((error = readblock(cbp, lbn)) != 0) {
+ bzero(cbp->b_data, fs->fs_bsize);
+ bawrite(cbp);
+ if (ip->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
break;
+ }
savedcbp = cbp;
}
- if (savedcbp)
+ /*
+ * Note that we need to synchronously write snapshots that
+ * have not been unlinked, and hence will be visible after
+ * a crash, to ensure their integrity.
+ */
+ if (savedcbp) {
+ vp = savedcbp->b_vp;
bawrite(savedcbp);
+ if (VTOI(vp)->i_effnlink > 0)
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
+ }
return (error);
}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 7d48fdb..02c3eaf 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -952,7 +952,7 @@ ffs_sync(mp, waitfor, cred, p)
struct ucred *cred;
struct proc *p;
{
- struct vnode *nvp, *vp;
+ struct vnode *nvp, *vp, *devvp;
struct inode *ip;
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs;
@@ -1026,12 +1026,21 @@ loop:
#ifdef QUOTA
qsync(mp);
#endif
- if (waitfor != MNT_LAZY) {
- vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
- if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
+ devvp = ump->um_devvp;
+ mtx_lock(&devvp->v_interlock);
+ if (waitfor != MNT_LAZY &&
+ (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
+ mtx_unlock(&devvp->v_interlock);
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ if ((error = VOP_FSYNC(devvp, cred, waitfor, p)) != 0)
allerror = error;
- VOP_UNLOCK(ump->um_devvp, 0, p);
- }
+ VOP_UNLOCK(devvp, 0, p);
+ if (waitfor == MNT_WAIT) {
+ mtx_lock(&mntvnode_mtx);
+ goto loop;
+ }
+ } else
+ mtx_unlock(&devvp->v_interlock);
/*
* Write back modified superblock.
*/
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 7c36263..ad4e019 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -292,12 +292,32 @@ ufs_close(ap)
struct proc *a_p;
} */ *ap;
{
- register struct vnode *vp = ap->a_vp;
+ struct vnode *vp = ap->a_vp;
+ struct mount *mp;
mtx_lock(&vp->v_interlock);
- if (vp->v_usecount > 1)
+ if (vp->v_usecount > 1) {
ufs_itimes(vp);
- mtx_unlock(&vp->v_interlock);
+ mtx_unlock(&vp->v_interlock);
+ } else {
+ mtx_unlock(&vp->v_interlock);
+ /*
+ * If we are closing the last reference to an unlinked
+ * file, then it will be freed by the inactive routine.
+ * Because the freeing causes a the filesystem to be
+ * modified, it must be held up during periods when the
+ * filesystem is suspended.
+ *
+ * XXX - EAGAIN is returned to prevent vn_close from
+ * repeating the vrele operation.
+ */
+ if (vp->v_type == VREG && VTOI(vp)->i_effnlink == 0) {
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vrele(vp);
+ vn_finished_write(mp);
+ return (EAGAIN);
+ }
+ }
return (0);
}
OpenPOWER on IntegriCloud