Cylinder group bitmaps and blocks containing inode for a snapshot

file are after snaplock, while other ffs device buffers are before snaplock in global lock order. By itself, this could cause deadlock when bdwrite() tries to flush dirty buffers on snapshotted ffs. If, during the flush, COW activity for snapshot needs to allocate block and ffs_alloccg() selects the cylinder group that is being written by bdwrite(), then kernel would panic due to recursive buffer lock acquision. Avoid dealing with buffers in bdwrite() that are from other side of snaplock divisor in the lock order then the buffer being written. Add new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in the bdwrite(). Default implementation, bufbdflush(), refactors the code from bdwrite(). For ffs device buffers, specialized implementation is used. Reviewed by: tegge, jeff, Russell Cattelan (cattelan xfs org, xfs changes) Tested by: Peter Holm X-MFC after: 3 weeks (if ever: it changes ABI)
author: kib <kib@FreeBSD.org> 2007-01-23 10:01:19 +0000
committer: kib <kib@FreeBSD.org> 2007-01-23 10:01:19 +0000
commit: fdd50404d125cf6f60a72b7d3483c1d35a62b6b0 (patch)
tree: b0e8fba9d4f75f20a608a5e6e9db64ce7633b0f4 /sys/ufs/ffs
parent: 7ebb7b7367c8ec8782e3fd7dfa2fdcf410af02e4 (diff)
download: FreeBSD-src-fdd50404d125cf6f60a72b7d3483c1d35a62b6b0.zip
FreeBSD-src-fdd50404d125cf6f60a72b7d3483c1d35a62b6b0.tar.gz
3 files changed, 120 insertions, 0 deletions
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 98f0f91..67986c1 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -61,6 +61,7 @@ ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
 ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
 int	ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
 void	ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
+void	ffs_bdflush(struct bufobj *, struct buf *);
 int	ffs_copyonwrite(struct vnode *, struct buf *);
 int	ffs_flushfiles(struct mount *, int, struct thread *);
 void	ffs_fragacct(struct fs *, int, int32_t [], int);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 14b231c..d94bd6d 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -163,6 +163,7 @@ static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
 static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
 static void process_deferred_inactive(struct mount *);
 static void try_free_snapdata(struct vnode *devvp, struct thread *td);
+static int ffs_bp_snapblk(struct vnode *, struct buf *);
 
 /*
  * To ensure the consistency of snapshots across crashes, we must
@@ -2065,6 +2066,119 @@ ffs_snapshot_unmount(mp)
 }
 
 /*
+ * Check the buffer block to be belong to device buffer that shall be
+ * locked after snaplk. devvp shall be locked on entry, and will be
+ * leaved locked upon exit.
+ */
+static int
+ffs_bp_snapblk(devvp, bp)
+	struct vnode *devvp;
+	struct buf *bp;
+{
+	struct snapdata *sn;
+	struct fs *fs;
+	ufs2_daddr_t lbn, *snapblklist;
+	int lower, upper, mid;
+
+	ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
+	KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
+	sn = devvp->v_rdev->si_snapdata;
+	if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL)
+		return (0);
+	fs = TAILQ_FIRST(&sn->sn_head)->i_fs;
+	lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+	snapblklist = sn->sn_blklist;
+	upper = sn->sn_listsize - 1;
+	lower = 1;
+	while (lower <= upper) {
+		mid = (lower + upper) / 2;
+		if (snapblklist[mid] == lbn)
+			break;
+		if (snapblklist[mid] < lbn)
+			lower = mid + 1;
+		else
+			upper = mid - 1;
+	}
+	if (lower <= upper)
+		return (1);
+	return (0);
+}
+
+void
+ffs_bdflush(bo, bp)
+	struct bufobj *bo;
+	struct buf *bp;
+{
+	struct thread *td;
+	struct vnode *vp, *devvp;
+	struct buf *nbp;
+	int bp_bdskip;
+
+	if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
+		return;
+
+	td = curthread;
+	vp = bp->b_vp;
+	devvp = bo->__bo_vnode;
+	KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
+
+	VI_LOCK(devvp);
+	bp_bdskip = ffs_bp_snapblk(devvp, bp);
+	if (bp_bdskip)
+		bdwriteskip++;
+	VI_UNLOCK(devvp);
+	if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
+		(void) VOP_FSYNC(vp, MNT_NOWAIT, td);
+		altbufferflushes++;
+	} else {
+		BO_LOCK(bo);
+		/*
+		 * Try to find a buffer to flush.
+		 */
+		TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
+			if ((nbp->b_vflags & BV_BKGRDINPROG) ||
+			    BUF_LOCK(nbp,
+				     LK_EXCLUSIVE | LK_NOWAIT, NULL))
+				continue;
+			if (bp == nbp)
+				panic("bdwrite: found ourselves");
+			BO_UNLOCK(bo);
+			/*
+			 * Don't countdeps with the bo lock
+			 * held.
+			 */
+			if (buf_countdeps(nbp, 0)) {
+				BO_LOCK(bo);
+				BUF_UNLOCK(nbp);
+				continue;
+			}
+			if (bp_bdskip) {
+				VI_LOCK(devvp);
+				if (!ffs_bp_snapblk(vp, nbp)) {
+					if (BO_MTX(bo) != VI_MTX(vp)) {
+						VI_UNLOCK(devvp);
+						BO_LOCK(bo);
+					}
+					BUF_UNLOCK(nbp);
+					continue;
+				}
+				VI_UNLOCK(devvp);
+			}
+			if (nbp->b_flags & B_CLUSTEROK) {
+				vfs_bio_awrite(nbp);
+			} else {
+				bremfree(nbp);
+				bawrite(nbp);
+			}
+			dirtybufferflushes++;
+			break;
+		}
+		if (nbp == NULL)
+			BO_UNLOCK(bo);
+	}
+}
+
+/*
  * Check for need to copy block that is about to be written,
  * copying the block if necessary.
  */
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index a2591bc..225dd8a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -117,6 +117,11 @@ static struct buf_ops ffs_ops = {
 	.bop_write =	ffs_bufwrite,
 	.bop_strategy =	ffs_geom_strategy,
 	.bop_sync =	bufsync,
+#ifdef NO_FFS_SNAPSHOT
+	.bop_bdflush =	bufbdflush,
+#else
+	.bop_bdflush =	ffs_bdflush,
+#endif
 };
 
 static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
author	kib <kib@FreeBSD.org>	2007-01-23 10:01:19 +0000
committer	kib <kib@FreeBSD.org>	2007-01-23 10:01:19 +0000
commit	fdd50404d125cf6f60a72b7d3483c1d35a62b6b0 (patch)
tree	b0e8fba9d4f75f20a608a5e6e9db64ce7633b0f4 /sys/ufs/ffs
parent	7ebb7b7367c8ec8782e3fd7dfa2fdcf410af02e4 (diff)
download	FreeBSD-src-fdd50404d125cf6f60a72b7d3483c1d35a62b6b0.zip FreeBSD-src-fdd50404d125cf6f60a72b7d3483c1d35a62b6b0.tar.gz