summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2011-06-12 19:27:05 +0000
committermckusick <mckusick@FreeBSD.org>2011-06-12 19:27:05 +0000
commit5f2600c6a978923a3904716ce73814baca63fd0f (patch)
tree7d0ae8580a1a7287703c44a2b3ddbe41275f5102 /sys/ufs
parentee628d4978ff89cbd55dfe07804276f1f1c5f76a (diff)
downloadFreeBSD-src-5f2600c6a978923a3904716ce73814baca63fd0f.zip
FreeBSD-src-5f2600c6a978923a3904716ce73814baca63fd0f.tar.gz
Update to soft updates journaling to properly track freed blocks
that get claimed by snapshots. Submitted by: Jeff Roberson Tested by: Peter Holm
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c2
-rw-r--r--sys/ufs/ffs/ffs_extern.h17
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c78
-rw-r--r--sys/ufs/ffs/ffs_softdep.c105
-rw-r--r--sys/ufs/ufs/ufs_vnops.c8
5 files changed, 180 insertions, 30 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 685b9e1..de34bb8 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -2035,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
*/
if (devvp->v_type != VREG &&
(devvp->v_vflag & VV_COPYONWRITE) &&
- ffs_snapblkfree(fs, devvp, bno, size, inum)) {
+ ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) {
return;
}
if (!ump->um_candelete) {
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 58d6121..fbda5a8 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -80,12 +80,14 @@ int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
int ffs_sbupdate(struct ufsmount *, int, int);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
-int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
+int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
+ struct workhead *);
void ffs_snapremove(struct vnode *vp);
int ffs_snapshot(struct mount *mp, char *snapfile);
void ffs_snapshot_mount(struct mount *mp);
void ffs_snapshot_unmount(struct mount *mp);
void process_deferred_inactive(struct mount *mp);
+void ffs_sync_snap(struct mount *, int);
int ffs_syncvnode(struct vnode *vp, int waitfor);
int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
int ffs_update(struct vnode *, int);
@@ -149,6 +151,9 @@ int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **);
void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
void softdep_journal_fsync(struct inode *);
+void softdep_buf_append(struct buf *, struct workhead *);
+void softdep_inode_append(struct inode *, struct ucred *, struct workhead *);
+void softdep_freework(struct workhead *);
/*
@@ -161,4 +166,14 @@ void softdep_journal_fsync(struct inode *);
int ffs_rdonly(struct inode *);
+TAILQ_HEAD(snaphead, inode);
+
+struct snapdata {
+ LIST_ENTRY(snapdata) sn_link;
+ struct snaphead sn_head;
+ daddr_t sn_listsize;
+ daddr_t *sn_blklist;
+ struct lock sn_lock;
+};
+
#endif /* !_UFS_FFS_EXTERN_H */
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 968be8a..89fc596 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile)
}
int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
+ struct workhead *wkhd;
{
return (EINVAL);
}
@@ -123,19 +124,16 @@ ffs_copyonwrite(devvp, bp)
return (EINVAL);
}
+void
+ffs_sync_snap(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+}
+
#else
FEATURE(ffs_snapshot, "FFS snapshot support");
-TAILQ_HEAD(snaphead, inode);
-
-struct snapdata {
- LIST_ENTRY(snapdata) sn_link;
- struct snaphead sn_head;
- daddr_t sn_listsize;
- daddr_t *sn_blklist;
- struct lock sn_lock;
-};
-
LIST_HEAD(, snapdata) snapfree;
static struct mtx snapfree_lock;
MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
@@ -1635,7 +1633,7 @@ ffs_snapremove(vp)
DIP_SET(ip, i_db[blkno], 0);
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
- ip->i_number))) {
+ ip->i_number, NULL))) {
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
btodb(fs->fs_bsize));
DIP_SET(ip, i_db[blkno], 0);
@@ -1660,7 +1658,7 @@ ffs_snapremove(vp)
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
- fs->fs_bsize, ip->i_number))) {
+ fs->fs_bsize, ip->i_number, NULL))) {
ip->i_din1->di_blocks -=
btodb(fs->fs_bsize);
((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
@@ -1674,7 +1672,7 @@ ffs_snapremove(vp)
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
else if ((dblk == blkstofrags(fs, blkno) &&
ffs_snapblkfree(fs, ip->i_devvp, dblk,
- fs->fs_bsize, ip->i_number))) {
+ fs->fs_bsize, ip->i_number, NULL))) {
ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
}
@@ -1722,12 +1720,13 @@ ffs_snapremove(vp)
* must always have been allocated from a BLK_NOCOPY location.
*/
int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
+ struct workhead *wkhd;
{
struct buf *ibp, *cbp, *savedcbp = 0;
struct thread *td = curthread;
@@ -1825,6 +1824,17 @@ retry:
"Grabonremove: snapino", ip->i_number,
(intmax_t)lbn, inum);
#endif
+ /*
+ * If journaling is tracking this write we must add
+ * the work to the inode or indirect being written.
+ */
+ if (wkhd != NULL) {
+ if (lbn < NDADDR)
+ softdep_inode_append(ip,
+ curthread->td_ucred, wkhd);
+ else
+ softdep_buf_append(ibp, wkhd);
+ }
if (lbn < NDADDR) {
DIP_SET(ip, i_db[lbn], bno);
} else if (ip->i_ump->um_fstype == UFS1) {
@@ -1902,6 +1912,8 @@ retry:
* not be freed. Although space will be lost, the snapshot
* will stay consistent.
*/
+ if (error != 0 && wkhd != NULL)
+ softdep_freework(wkhd);
lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
return (error);
}
@@ -2400,6 +2412,42 @@ ffs_copyonwrite(devvp, bp)
}
/*
+ * sync snapshots to force freework records waiting on snapshots to claim
+ * blocks to free.
+ */
+void
+ffs_sync_snap(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+ struct snapdata *sn;
+ struct vnode *devvp;
+ struct vnode *vp;
+ struct inode *ip;
+
+ devvp = VFSTOUFS(mp)->um_devvp;
+ if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
+ return;
+ for (;;) {
+ VI_LOCK(devvp);
+ sn = devvp->v_rdev->si_snapdata;
+ if (sn == NULL) {
+ VI_UNLOCK(devvp);
+ return;
+ }
+ if (lockmgr(&sn->sn_lock,
+ LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+ VI_MTX(devvp)) == 0)
+ break;
+ }
+ TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+ vp = ITOV(ip);
+ ffs_syncvnode(vp, waitfor);
+ }
+ lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+}
+
+/*
* Read the specified block into the given buffer.
* Much of this boiler-plate comes from bwrite().
*/
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 4e75455..1197f48 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -584,6 +584,33 @@ softdep_get_depcounts(struct mount *mp,
*softdepactiveaccp = 0;
}
+void
+softdep_buf_append(bp, wkhd)
+ struct buf *bp;
+ struct workhead *wkhd;
+{
+
+ panic("softdep_buf_appendwork called");
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+ struct inode *ip;
+ struct ucred *cred;
+ struct workhead *wkhd;
+{
+
+ panic("softdep_inode_appendwork called");
+}
+
+void
+softdep_freework(wkhd)
+ struct workhead *wkhd;
+{
+
+ panic("softdep_freework called");
+}
+
#else
FEATURE(softupdates, "FFS soft-updates support");
@@ -867,7 +894,7 @@ static void freework_enqueue(struct freework *);
static int handle_workitem_freeblocks(struct freeblks *, int);
static int handle_complete_freeblocks(struct freeblks *, int);
static void handle_workitem_indirblk(struct freework *);
-static void handle_written_freework(struct freework *, int);
+static void handle_written_freework(struct freework *);
static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
struct workhead *);
@@ -1632,6 +1659,7 @@ process_truncates(vp)
if (cgwait) {
FREE_LOCK(&lk);
sync_cgs(mp, MNT_WAIT);
+ ffs_sync_snap(mp, MNT_WAIT);
ACQUIRE_LOCK(&lk);
continue;
}
@@ -5922,7 +5950,7 @@ complete_trunc_indir(freework)
*/
if (bp == NULL) {
if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
- handle_written_freework(freework, 0);
+ handle_written_freework(freework);
else
WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
&freework->fw_list);
@@ -5974,7 +6002,7 @@ out:
*/
if (totblks > datablocks)
return (0);
- return (totblks - datablocks);
+ return (datablocks - totblks);
}
/*
@@ -7228,6 +7256,7 @@ freework_freeblock(freework)
cancel_jnewblk(jnewblk, &wkhd);
needj = 0;
} else if (needj) {
+ freework->fw_state |= DELAYEDFREE;
freeblks->fb_cgwait++;
WORKLIST_INSERT(&wkhd, &freework->fw_list);
}
@@ -7241,7 +7270,7 @@ freework_freeblock(freework)
* made it to disk. We can immediately free the freeblk.
*/
if (needj == 0)
- handle_written_freework(freework, 0);
+ handle_written_freework(freework);
}
/*
@@ -7256,7 +7285,8 @@ freework_enqueue(freework)
struct freeblks *freeblks;
freeblks = freework->fw_freeblks;
- WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
+ if ((freework->fw_state & INPROGRESS) == 0)
+ WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
if ((freeblks->fb_state &
(ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
LIST_EMPTY(&freeblks->fb_jblkdephd))
@@ -7282,13 +7312,14 @@ handle_workitem_indirblk(freework)
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
fs = ump->um_fs;
if (freework->fw_state & DEPCOMPLETE) {
- handle_written_freework(freework, 0);
+ handle_written_freework(freework);
return;
}
if (freework->fw_off == NINDIR(fs)) {
freework_freeblock(freework);
return;
}
+ freework->fw_state |= INPROGRESS;
FREE_LOCK(&lk);
indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
freework->fw_lbn);
@@ -7301,16 +7332,16 @@ handle_workitem_indirblk(freework)
* the freeblks is added back to the worklist if there is more work to do.
*/
static void
-handle_written_freework(freework, cgwrite)
+handle_written_freework(freework)
struct freework *freework;
- int cgwrite;
{
struct freeblks *freeblks;
struct freework *parent;
freeblks = freework->fw_freeblks;
parent = freework->fw_parent;
- freeblks->fb_cgwait -= cgwrite;
+ if (freework->fw_state & DELAYEDFREE)
+ freeblks->fb_cgwait--;
freework->fw_state |= COMPLETE;
if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
WORKITEM_FREE(freework, D_FREEWORK);
@@ -7552,6 +7583,8 @@ indir_trunc(freework, dbn, lbn)
return;
}
ACQUIRE_LOCK(&lk);
+ /* Protects against a race with complete_trunc_indir(). */
+ freework->fw_state &= ~INPROGRESS;
/*
* If we have an indirdep we need to enforce the truncation order
* and discard it when it is complete.
@@ -7675,7 +7708,7 @@ indir_trunc(freework, dbn, lbn)
if (freework->fw_blkno == dbn) {
freework->fw_state |= ALLCOMPLETE;
ACQUIRE_LOCK(&lk);
- handle_written_freework(freework, 0);
+ handle_written_freework(freework);
FREE_LOCK(&lk);
}
return;
@@ -10368,8 +10401,7 @@ softdep_disk_write_complete(bp)
continue;
case D_FREEWORK:
- /* Freework on an indirect block, not bmsafemap. */
- handle_written_freework(WK_FREEWORK(wk), 0);
+ handle_written_freework(WK_FREEWORK(wk));
break;
case D_JSEGDEP:
@@ -10540,7 +10572,7 @@ handle_jwork(wkhd)
free_freedep(WK_FREEDEP(wk));
continue;
case D_FREEWORK:
- handle_written_freework(WK_FREEWORK(wk), 1);
+ handle_written_freework(WK_FREEWORK(wk));
continue;
default:
panic("handle_jwork: Unknown type %s\n",
@@ -12738,6 +12770,53 @@ clear_inodedeps(td)
}
}
+void
+softdep_buf_append(bp, wkhd)
+ struct buf *bp;
+ struct workhead *wkhd;
+{
+ struct worklist *wk;
+
+ ACQUIRE_LOCK(&lk);
+ while ((wk = LIST_FIRST(wkhd)) != NULL) {
+ WORKLIST_REMOVE(wk);
+ WORKLIST_INSERT(&bp->b_dep, wk);
+ }
+ FREE_LOCK(&lk);
+
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+ struct inode *ip;
+ struct ucred *cred;
+ struct workhead *wkhd;
+{
+ struct buf *bp;
+ struct fs *fs;
+ int error;
+
+ fs = ip->i_fs;
+ error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+ (int)fs->fs_bsize, cred, &bp);
+ if (error) {
+ softdep_freework(wkhd);
+ return;
+ }
+ softdep_buf_append(bp, wkhd);
+ bqrelse(bp);
+}
+
+void
+softdep_freework(wkhd)
+ struct workhead *wkhd;
+{
+
+ ACQUIRE_LOCK(&lk);
+ handle_jwork(wkhd);
+ FREE_LOCK(&lk);
+}
+
/*
* Function to determine if the buffer has outstanding dependencies
* that will cause a roll-back if the buffer is written. If wantcount
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 34b1758..733413d 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1838,6 +1838,8 @@ ufs_mkdir(ap)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, ucp, 0))) {
+ if (DOINGSOFTDEP(tvp))
+ softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
vput(tvp);
return (error);
@@ -1850,6 +1852,8 @@ ufs_mkdir(ap)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+ if (DOINGSOFTDEP(tvp))
+ softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
vput(tvp);
return (error);
@@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, ucp, 0))) {
+ if (DOINGSOFTDEP(tvp))
+ softdep_revert_link(pdir, ip);
UFS_VFREE(tvp, ip->i_number, mode);
vput(tvp);
return (error);
@@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
#ifdef QUOTA
if ((error = getinoquota(ip)) ||
(error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+ if (DOINGSOFTDEP(tvp))
+ softdep_revert_link(pdir, ip);
UFS_VFREE(tvp, ip->i_number, mode);
vput(tvp);
return (error);
OpenPOWER on IntegriCloud