summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2003-02-22 00:59:34 +0000
committermckusick <mckusick@FreeBSD.org>2003-02-22 00:59:34 +0000
commit9ed5a11f37a83d8330dd79757d41fe9b50a0fb42 (patch)
tree7591ade57520ea788cd8fbbeecae1717902e1df9 /sys/ufs
parent5340f28b154d4fa0a65fbe142b2d7dec10966348 (diff)
downloadFreeBSD-src-9ed5a11f37a83d8330dd79757d41fe9b50a0fb42.zip
FreeBSD-src-9ed5a11f37a83d8330dd79757d41fe9b50a0fb42.tar.gz
This patch fixes a deadlock between the bufdaemon and a process taking
a snapshot. As part of taking a snapshot of a filesystem, the kernel builds up a list of the filesystem metadata (such as the cylinder group bitmaps) that are contained in the snapshot. When doing a copy-on-write check, the list is first consulted. If the block being written is found on the list, then the full snapshot lookup can be avoided. Besides providing an important performance speedup this check also avoids a potential deadlock between the code creating the snapshot and the bufdaemon trying to cleanup snapshot related buffers. This fix creates a temporary list containing the key metadata blocks that can cause the deadlock. This temporary list is used between the time that the snapshot is first enabled and the time that the fully complete list is built. Reported by: Attila Nagy <bra@fsn.hu> Sponsored by: DARPA & NAI Labs.
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c50
1 files changed, 43 insertions, 7 deletions
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 3bfc1b9..afbea57 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -119,16 +119,15 @@ ffs_snapshot(mp, snapfile)
struct mount *mp;
char *snapfile;
{
- ufs2_daddr_t numblks, blkno;
+ ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
int error, cg, snaploc;
int i, size, len, loc;
int flag = mp->mnt_flag;
struct timespec starttime = {0, 0}, endtime;
char saved_nice = 0;
- long redo = 0, snaplistsize;
+ long redo = 0, snaplistsize = 0;
int32_t *lp;
void *space;
- daddr_t *snapblklist;
struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
struct snaphead *snaphead;
struct thread *td = curthread;
@@ -394,7 +393,11 @@ restart:
* spec_strategy about writing on a suspended filesystem.
* Note that we skip unlinked snapshot files as they will
* be handled separately below.
+ *
+ * We also calculate the needed size for the snapshot list.
*/
+ snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
+ FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
mtx_lock(&mntvnode_mtx);
loop:
@@ -438,6 +441,7 @@ loop:
DIP(xp, i_db[loc]) = 0;
}
}
+ snaplistsize += 1;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
BLK_NOCOPY);
@@ -487,6 +491,36 @@ loop:
transferlockers(&vp->v_lock, vp->v_vnlock);
lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
/*
+ * If this is the first snapshot on this filesystem, then we need
+ * to allocate the space for the list of preallocated snapshot blocks.
+ * This list will be refined below, but this preliminary one will
+ * keep us out of deadlock until the full one is ready.
+ */
+ if (xp == NULL) {
+ MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
+ M_UFSMNT, M_WAITOK);
+ blkp = &snapblklist[1];
+ *blkp++ = lblkno(fs, fs->fs_sblockloc);
+ blkno = fragstoblks(fs, fs->fs_csaddr);
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ if (fragstoblks(fs, cgtod(fs, cg) > blkno))
+ break;
+ *blkp++ = fragstoblks(fs, cgtod(fs, cg));
+ }
+ len = howmany(fs->fs_cssize, fs->fs_bsize);
+ for (loc = 0; loc < len; loc++)
+ *blkp++ = blkno + loc;
+ for (; cg < fs->fs_ncg; cg++)
+ *blkp++ = fragstoblks(fs, cgtod(fs, cg));
+ snapblklist[0] = blkp - snapblklist;
+ VI_LOCK(devvp);
+ if (devvp->v_rdev->si_snapblklist != NULL)
+ panic("ffs_snapshot: non-empty list");
+ devvp->v_rdev->si_snapblklist = snapblklist;
+ devvp->v_rdev->si_snaplistsize = blkp - snapblklist;
+ VI_UNLOCK(devvp);
+ }
+ /*
* Record snapshot inode. Since this is the newest snapshot,
* it must be placed at the end of the list.
*/
@@ -535,10 +569,8 @@ out1:
}
}
/*
- * Allocate the space for the list of preallocated snapshot blocks.
+ * Allocate space for the full list of preallocated snapshot blocks.
*/
- snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
- FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
M_UFSMNT, M_WAITOK);
ip->i_snapblklist = &snapblklist[1];
@@ -556,6 +588,8 @@ out1:
FREE(snapblklist, M_UFSMNT);
goto done;
}
+ if (snaplistsize < ip->i_snapblklist - snapblklist)
+ panic("ffs_snapshot: list too small");
snaplistsize = ip->i_snapblklist - snapblklist;
snapblklist[0] = snaplistsize;
ip->i_snapblklist = 0;
@@ -600,9 +634,11 @@ out1:
* should replace the previous list.
*/
VI_LOCK(devvp);
- FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT);
+ space = devvp->v_rdev->si_snapblklist;
devvp->v_rdev->si_snapblklist = snapblklist;
devvp->v_rdev->si_snaplistsize = snaplistsize;
+ if (space != NULL)
+ FREE(space, M_UFSMNT);
VI_UNLOCK(devvp);
done:
free(copy_fs->fs_csp, M_UFSMNT);
OpenPOWER on IntegriCloud