summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2011-06-15 23:19:09 +0000
committermckusick <mckusick@FreeBSD.org>2011-06-15 23:19:09 +0000
commitef6ee3faed283f9910e11c5d56af10d587a6803a (patch)
tree1b8b16a7cb08122f5940fce8ca5c6380ec168b35 /sys/ufs
parent5725aadb3786d9aadc53ad14bf1fef07fa822d5c (diff)
downloadFreeBSD-src-ef6ee3faed283f9910e11c5d56af10d587a6803a.zip
FreeBSD-src-ef6ee3faed283f9910e11c5d56af10d587a6803a.tar.gz
Ensure that filesystem metadata contained within persistent snapshots
is always kept consistent. Suggested by: Jeff Roberson
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c19
-rw-r--r--sys/ufs/ffs/ffs_balloc.c4
-rw-r--r--sys/ufs/ffs/ffs_extern.h4
-rw-r--r--sys/ufs/ffs/ffs_inode.c12
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c64
-rw-r--r--sys/ufs/ffs/ffs_softdep.c11
-rw-r--r--sys/ufs/ffs/softdep.h2
7 files changed, 74 insertions, 42 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index f1db84d..7d7866c 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -390,7 +390,7 @@ retry:
bp->b_blkno = fsbtodb(fs, bno);
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize,
- ip->i_number, NULL);
+ ip->i_number, vp->v_type, NULL);
delta = btodb(nsize - osize);
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
if (flags & IO_EXT)
@@ -670,7 +670,7 @@ ffs_reallocblks_ufs1(ap)
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
- fs->fs_bsize, ip->i_number, NULL);
+ fs->fs_bsize, ip->i_number, vp->v_type, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
@@ -878,7 +878,7 @@ ffs_reallocblks_ufs2(ap)
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
- fs->fs_bsize, ip->i_number, NULL);
+ fs->fs_bsize, ip->i_number, vp->v_type, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
@@ -1880,7 +1880,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize,
size, fs->fs_fsmnt);
- panic("ffs_blkfree: bad size");
+ panic("ffs_blkfree_cg: bad size");
}
#endif
if ((u_int)bno >= fs->fs_size) {
@@ -1914,7 +1914,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
}
printf("dev = %s, block = %jd, fs = %s\n",
devtoname(dev), (intmax_t)bno, fs->fs_fsmnt);
- panic("ffs_blkfree: freeing free block");
+ panic("ffs_blkfree_cg: freeing free block");
}
ffs_setblock(fs, blksfree, fragno);
ffs_clusteracct(fs, cgp, fragno, 1);
@@ -1937,7 +1937,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
printf("dev = %s, block = %jd, fs = %s\n",
devtoname(dev), (intmax_t)(bno + i),
fs->fs_fsmnt);
- panic("ffs_blkfree: freeing free frag");
+ panic("ffs_blkfree_cg: freeing free frag");
}
setbit(blksfree, cgbno + i);
}
@@ -2013,13 +2013,14 @@ ffs_blkfree_trim_completed(bip)
}
void
-ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
+ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
+ enum vtype vtype;
struct workhead *dephd;
{
struct mount *mp;
@@ -2034,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
*/
if (devvp->v_type != VREG &&
(devvp->v_vflag & VV_COPYONWRITE) &&
- ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) {
+ ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) {
return;
}
if (!ump->um_candelete) {
@@ -2571,7 +2572,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
if (blksize > blkcnt)
blksize = blkcnt;
ffs_blkfree(ump, fs, ump->um_devvp, blkno,
- blksize * fs->fs_fsize, ROOTINO, NULL);
+ blksize * fs->fs_fsize, ROOTINO, VDIR, NULL);
blkno += blksize;
blkcnt -= blksize;
blksize = fs->fs_frag;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 4e8b76b..63a4eba 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -506,7 +506,7 @@ fail:
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
- ip->i_number, NULL);
+ ip->i_number, vp->v_type, NULL);
}
return (error);
}
@@ -1052,7 +1052,7 @@ fail:
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
- ip->i_number, NULL);
+ ip->i_number, vp->v_type, NULL);
}
return (error);
}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 58c72ee..fb02605 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -57,7 +57,7 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size,
struct ucred *a_cred, int a_flags, struct buf **a_bpp);
int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **);
void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *,
- ufs2_daddr_t, long, ino_t, struct workhead *);
+ ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *);
ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
int ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
@@ -82,7 +82,7 @@ int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
int ffs_sbupdate(struct ufsmount *, int, int);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
- struct workhead *);
+ enum vtype, struct workhead *);
void ffs_snapremove(struct vnode *vp);
int ffs_snapshot(struct mount *mp, char *snapfile);
void ffs_snapshot_mount(struct mount *mp);
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 67e91c7..a7b43e2 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -235,7 +235,8 @@ ffs_truncate(vp, length, flags, cred, td)
if (oldblks[i] == 0)
continue;
ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i],
- sblksize(fs, osize, i), ip->i_number, NULL);
+ sblksize(fs, osize, i), ip->i_number,
+ vp->v_type, NULL);
}
}
}
@@ -435,7 +436,8 @@ ffs_truncate(vp, length, flags, cred, td)
if (lastiblock[level] < 0) {
DIP_SET(ip, i_ib[level], 0);
ffs_blkfree(ump, fs, ip->i_devvp, bn,
- fs->fs_bsize, ip->i_number, NULL);
+ fs->fs_bsize, ip->i_number,
+ vp->v_type, NULL);
blocksreleased += nblocks;
}
}
@@ -455,7 +457,7 @@ ffs_truncate(vp, length, flags, cred, td)
DIP_SET(ip, i_db[i], 0);
bsize = blksize(fs, ip, i);
ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number,
- NULL);
+ vp->v_type, NULL);
blocksreleased += btodb(bsize);
}
if (lastblock < 0)
@@ -487,7 +489,7 @@ ffs_truncate(vp, length, flags, cred, td)
*/
bn += numfrags(fs, newspace);
ffs_blkfree(ump, fs, ip->i_devvp, bn,
- oldspace - newspace, ip->i_number, NULL);
+ oldspace - newspace, ip->i_number, vp->v_type, NULL);
blocksreleased += btodb(oldspace - newspace);
}
}
@@ -634,7 +636,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
blocksreleased += blkcount;
}
ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize,
- ip->i_number, NULL);
+ ip->i_number, vp->v_type, NULL);
blocksreleased += nblocks;
}
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 89fc596..8d236bd 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile)
}
int
-ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
+ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
+ enum vtype vtype;
struct workhead *wkhd;
{
return (EINVAL);
@@ -174,8 +175,10 @@ static int ffs_bp_snapblk(struct vnode *, struct buf *);
* To ensure the consistency of snapshots across crashes, we must
* synchronously write out copied blocks before allowing the
* originals to be modified. Because of the rather severe speed
- * penalty that this imposes, the following flag allows this
- * crash persistence to be disabled.
+ * penalty that this imposes, the code normally only ensures
+ * persistence for the filesystem metadata contained within a
+ * snapshot. Setting the following flag allows this crash
+ * persistence to be enabled for file contents.
*/
int dopersistence = 0;
@@ -582,7 +585,7 @@ loop:
if (len != 0 && len < fs->fs_bsize) {
ffs_blkfree(ump, copy_fs, vp,
DIP(xp, i_db[loc]), len, xp->i_number,
- NULL);
+ xvp->v_type, NULL);
blkno = DIP(xp, i_db[loc]);
DIP_SET(xp, i_db[loc], 0);
}
@@ -1245,7 +1248,8 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
*ip->i_snapblklist++ = lblkno;
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
- ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
+ ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum,
+ vp->v_type, NULL);
}
return (0);
}
@@ -1528,7 +1532,8 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
*ip->i_snapblklist++ = lblkno;
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
- ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
+ ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum,
+ vp->v_type, NULL);
}
return (0);
}
@@ -1633,7 +1638,7 @@ ffs_snapremove(vp)
DIP_SET(ip, i_db[blkno], 0);
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
- ip->i_number, NULL))) {
+ ip->i_number, vp->v_type, NULL))) {
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
btodb(fs->fs_bsize));
DIP_SET(ip, i_db[blkno], 0);
@@ -1658,7 +1663,8 @@ ffs_snapremove(vp)
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
- fs->fs_bsize, ip->i_number, NULL))) {
+ fs->fs_bsize, ip->i_number, vp->v_type,
+ NULL))) {
ip->i_din1->di_blocks -=
btodb(fs->fs_bsize);
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
@@ -1672,7 +1678,7 @@ ffs_snapremove(vp)
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
- fs->fs_bsize, ip->i_number, NULL))) {
+ fs->fs_bsize, ip->i_number, vp->v_type, NULL))) {
ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
}
@@ -1720,12 +1726,13 @@ ffs_snapremove(vp)
* must always have been allocated from a BLK_NOCOPY location.
*/
int
-ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
+ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
+ enum vtype vtype;
struct workhead *wkhd;
{
struct buf *ibp, *cbp, *savedcbp = 0;
@@ -1874,12 +1881,16 @@ retry:
* simply copy them to the new block. Note that we need
* to synchronously write snapshots that have not been
* unlinked, and hence will be visible after a crash,
- * to ensure their integrity.
+ * to ensure their integrity. At a minimum we ensure the
+ * integrity of the filesystem metadata, but use the
+ * dopersistence sysctl-setable flag to decide on the
+ * persistence needed for file content data.
*/
if (savedcbp != 0) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
- if (dopersistence && ip->i_effnlink > 0)
+ if ((vtype == VDIR || dopersistence) &&
+ ip->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
continue;
}
@@ -1889,7 +1900,8 @@ retry:
if ((error = readblock(vp, cbp, lbn)) != 0) {
bzero(cbp->b_data, fs->fs_bsize);
bawrite(cbp);
- if (dopersistence && ip->i_effnlink > 0)
+ if ((vtype == VDIR || dopersistence) &&
+ ip->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
break;
}
@@ -1898,12 +1910,15 @@ retry:
/*
* Note that we need to synchronously write snapshots that
* have not been unlinked, and hence will be visible after
- * a crash, to ensure their integrity.
+ * a crash, to ensure their integrity. At a minimum we
+ * ensure the integrity of the filesystem metadata, but
+ * use the dopersistence sysctl-setable flag to decide on
+ * the persistence needed for file content data.
*/
if (savedcbp) {
vp = savedcbp->b_vp;
bawrite(savedcbp);
- if (dopersistence && VTOI(vp)->i_effnlink > 0)
+ if ((vtype == VDIR || dopersistence) && ip->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
}
/*
@@ -2358,12 +2373,16 @@ ffs_copyonwrite(devvp, bp)
* simply copy them to the new block. Note that we need
* to synchronously write snapshots that have not been
* unlinked, and hence will be visible after a crash,
- * to ensure their integrity.
+ * to ensure their integrity. At a minimum we ensure the
+ * integrity of the filesystem metadata, but use the
+ * dopersistence sysctl-setable flag to decide on the
+ * persistence needed for file content data.
*/
if (savedcbp != 0) {
bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
bawrite(cbp);
- if (dopersistence && ip->i_effnlink > 0)
+ if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+ dopersistence) && ip->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
else
launched_async_io = 1;
@@ -2375,7 +2394,8 @@ ffs_copyonwrite(devvp, bp)
if ((error = readblock(vp, cbp, lbn)) != 0) {
bzero(cbp->b_data, fs->fs_bsize);
bawrite(cbp);
- if (dopersistence && ip->i_effnlink > 0)
+ if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+ dopersistence) && ip->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
else
launched_async_io = 1;
@@ -2386,12 +2406,16 @@ ffs_copyonwrite(devvp, bp)
/*
* Note that we need to synchronously write snapshots that
* have not been unlinked, and hence will be visible after
- * a crash, to ensure their integrity.
+ * a crash, to ensure their integrity. At a minimum we
+ * ensure the integrity of the filesystem metadata, but
+ * use the dopersistence sysctl-setable flag to decide on
+ * the persistence needed for file content data.
*/
if (savedcbp) {
vp = savedcbp->b_vp;
bawrite(savedcbp);
- if (dopersistence && VTOI(vp)->i_effnlink > 0)
+ if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+ dopersistence) && VTOI(vp)->i_effnlink > 0)
(void) ffs_syncvnode(vp, MNT_WAIT);
else
launched_async_io = 1;
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 6730dae..3734a5d 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -5172,6 +5172,7 @@ newfreefrag(ip, blkno, size, lbn)
freefrag->ff_state = ATTACHED;
LIST_INIT(&freefrag->ff_jwork);
freefrag->ff_inum = ip->i_number;
+ freefrag->ff_vtype = ITOV(ip)->v_type;
freefrag->ff_blkno = blkno;
freefrag->ff_fragsize = size;
@@ -5216,7 +5217,7 @@ handle_workitem_freefrag(freefrag)
}
FREE_LOCK(&lk);
ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
- freefrag->ff_fragsize, freefrag->ff_inum, &wkhd);
+ freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
ACQUIRE_LOCK(&lk);
WORKITEM_FREE(freefrag, D_FREEFRAG);
FREE_LOCK(&lk);
@@ -5724,6 +5725,7 @@ newfreeblks(mp, ip)
freeblks->fb_state = ATTACHED;
freeblks->fb_uid = ip->i_uid;
freeblks->fb_inum = ip->i_number;
+ freeblks->fb_vtype = ITOV(ip)->v_type;
freeblks->fb_modrev = DIP(ip, i_modrev);
freeblks->fb_devvp = ip->i_devvp;
freeblks->fb_chkcnt = 0;
@@ -7263,7 +7265,7 @@ freework_freeblock(freework)
freeblks->fb_freecnt += btodb(bsize);
FREE_LOCK(&lk);
ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
- freeblks->fb_inum, &wkhd);
+ freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
ACQUIRE_LOCK(&lk);
/*
* The jnewblk will be discarded and the bits in the map never
@@ -7669,7 +7671,8 @@ indir_trunc(freework, dbn, lbn)
freedeps++;
}
ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
- fs->fs_bsize, freeblks->fb_inum, &wkhd);
+ fs->fs_bsize, freeblks->fb_inum,
+ freeblks->fb_vtype, &wkhd);
}
}
if (goingaway) {
@@ -7702,7 +7705,7 @@ indir_trunc(freework, dbn, lbn)
fs_pendingblocks += nblocks;
dbn = dbtofsb(fs, dbn);
ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
- freeblks->fb_inum, NULL);
+ freeblks->fb_inum, freeblks->fb_vtype, NULL);
/* Non SUJ softdep does single-threaded truncations. */
freeblks->fb_freecnt += fs_pendingblocks;
if (freework->fw_blkno == dbn) {
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 9be175c..80c7315 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -511,6 +511,7 @@ struct freefrag {
ufs2_daddr_t ff_blkno; /* fragment physical block number */
long ff_fragsize; /* size of fragment being deleted */
ino_t ff_inum; /* owning inode number */
+ enum vtype ff_vtype; /* owning inode's file type */
};
/*
@@ -538,6 +539,7 @@ struct freeblks {
ufs2_daddr_t fb_chkcnt; /* Expected blks released. */
ufs2_daddr_t fb_freecnt; /* Actual blocks released. */
ino_t fb_inum; /* inode owner of blocks */
+ enum vtype fb_vtype; /* inode owner's file type */
uid_t fb_uid; /* uid of previous owner of blocks */
int fb_ref; /* Children outstanding. */
int fb_cgwait; /* cg writes outstanding. */
OpenPOWER on IntegriCloud