summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/geom/geom_vfs.c1
-rw-r--r--sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c7
-rw-r--r--sys/kern/vfs_bio.c93
-rw-r--r--sys/nfs4client/nfs4_vnops.c1
-rw-r--r--sys/nfsclient/nfs_vnops.c1
-rw-r--r--sys/sys/buf.h4
-rw-r--r--sys/sys/bufobj.h4
-rw-r--r--sys/ufs/ffs/ffs_extern.h1
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c114
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c5
10 files changed, 189 insertions, 42 deletions
diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c
index bff516b..4a33d22 100644
--- a/sys/geom/geom_vfs.c
+++ b/sys/geom/geom_vfs.c
@@ -50,6 +50,7 @@ static struct buf_ops __g_vfs_bufops = {
.bop_write = bufwrite,
.bop_strategy = g_vfs_strategy,
.bop_sync = bufsync,
+ .bop_bdflush = bufbdflush
};
struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c
index cf4ae89..da3650b 100644
--- a/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c
+++ b/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c
@@ -497,9 +497,16 @@ xfs_geom_bufsync(struct bufobj *bo, int waitfor, struct thread *td)
return bufsync(bo,waitfor,td);
}
+static void
+xfs_geom_bufbdflush(struct bufobj *bo, struct buf *bp)
+{
+ bufbdflush(bo, bp);
+}
+
struct buf_ops xfs_bo_ops = {
.bop_name = "XFS",
.bop_write = xfs_geom_bufwrite,
.bop_strategy = xfs_geom_strategy,
.bop_sync = xfs_geom_bufsync,
+ .bop_bdflush = xfs_geom_bufbdflush,
};
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 6b3447a..cb7d16f 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -80,6 +80,7 @@ struct buf_ops buf_ops_bio = {
.bop_write = bufwrite,
.bop_strategy = bufstrategy,
.bop_sync = bufsync,
+ .bop_bdflush = bufbdflush,
};
/*
@@ -146,10 +147,13 @@ SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
static int hirunningspace;
SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
"Maximum amount of space to use for in-progress I/O");
-static int dirtybufferflushes;
+int dirtybufferflushes;
SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes,
0, "Number of bdwrite to bawrite conversions to limit dirty buffers");
-static int altbufferflushes;
+int bdwriteskip;
+SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip,
+ 0, "Number of buffers supplied to bdwrite with snapshot deadlock risk");
+int altbufferflushes;
SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes,
0, "Number of fsync flushes to limit dirty buffers");
static int recursiveflushes;
@@ -164,7 +168,7 @@ SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0,
static int hidirtybuffers;
SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0,
"When the number of dirty buffers is considered severe");
-static int dirtybufthresh;
+int dirtybufthresh;
SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh,
0, "Number of bdwrite to bawrite conversions to clear dirty buffers");
static int numfreebuffers;
@@ -886,6 +890,47 @@ bufwrite(struct buf *bp)
return (0);
}
+void
+bufbdflush(struct bufobj *bo, struct buf *bp)
+{
+ struct buf *nbp;
+
+ if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) {
+ (void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread);
+ altbufferflushes++;
+ } else if (bo->bo_dirty.bv_cnt > dirtybufthresh) {
+ BO_LOCK(bo);
+ /*
+ * Try to find a buffer to flush.
+ */
+ TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
+ if ((nbp->b_vflags & BV_BKGRDINPROG) ||
+ BUF_LOCK(nbp,
+ LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ continue;
+ if (bp == nbp)
+ panic("bdwrite: found ourselves");
+ BO_UNLOCK(bo);
+ /* Don't countdeps with the bo lock held. */
+ if (buf_countdeps(nbp, 0)) {
+ BO_LOCK(bo);
+ BUF_UNLOCK(nbp);
+ continue;
+ }
+ if (nbp->b_flags & B_CLUSTEROK) {
+ vfs_bio_awrite(nbp);
+ } else {
+ bremfree(nbp);
+ bawrite(nbp);
+ }
+ dirtybufferflushes++;
+ break;
+ }
+ if (nbp == NULL)
+ BO_UNLOCK(bo);
+ }
+}
+
/*
* Delayed write. (Buffer is marked dirty). Do not bother writing
* anything if the buffer is marked invalid.
@@ -900,7 +945,6 @@ bdwrite(struct buf *bp)
{
struct thread *td = curthread;
struct vnode *vp;
- struct buf *nbp;
struct bufobj *bo;
CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
@@ -921,44 +965,9 @@ bdwrite(struct buf *bp)
*/
vp = bp->b_vp;
bo = bp->b_bufobj;
- if ((td->td_pflags & TDP_COWINPROGRESS) == 0) {
- BO_LOCK(bo);
- if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) {
- BO_UNLOCK(bo);
- (void) VOP_FSYNC(vp, MNT_NOWAIT, td);
- altbufferflushes++;
- } else if (bo->bo_dirty.bv_cnt > dirtybufthresh) {
- /*
- * Try to find a buffer to flush.
- */
- TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
- if ((nbp->b_vflags & BV_BKGRDINPROG) ||
- BUF_LOCK(nbp,
- LK_EXCLUSIVE | LK_NOWAIT, NULL))
- continue;
- if (bp == nbp)
- panic("bdwrite: found ourselves");
- BO_UNLOCK(bo);
- /* Don't countdeps with the bo lock held. */
- if (buf_countdeps(nbp, 0)) {
- BO_LOCK(bo);
- BUF_UNLOCK(nbp);
- continue;
- }
- if (nbp->b_flags & B_CLUSTEROK) {
- vfs_bio_awrite(nbp);
- } else {
- bremfree(nbp);
- bawrite(nbp);
- }
- dirtybufferflushes++;
- break;
- }
- if (nbp == NULL)
- BO_UNLOCK(bo);
- } else
- BO_UNLOCK(bo);
- } else
+ if ((td->td_pflags & TDP_COWINPROGRESS) == 0)
+ BO_BDFLUSH(bo, bp);
+ else
recursiveflushes++;
bdirty(bp);
diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c
index 5867471..419fc88 100644
--- a/sys/nfs4client/nfs4_vnops.c
+++ b/sys/nfs4client/nfs4_vnops.c
@@ -2874,4 +2874,5 @@ struct buf_ops buf_ops_nfs4 = {
.bop_write = nfs4_bwrite,
.bop_strategy = bufstrategy,
.bop_sync = bufsync,
+ .bop_bdflush = bufbdflush,
};
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index 4308244..c53c913 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -3275,4 +3275,5 @@ struct buf_ops buf_ops_nfs = {
.bop_write = nfs_bwrite,
.bop_strategy = bufstrategy,
.bop_sync = bufsync,
+ .bop_bdflush = bufbdflush,
};
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 359be84..a561aaa 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -483,6 +483,10 @@ extern int maxswzone; /* Max KVA for swap structures */
extern int maxbcache; /* Max KVA for buffer cache */
extern int runningbufspace;
extern int hibufspace;
+extern int dirtybufthresh;
+extern int bdwriteskip;
+extern int dirtybufferflushes;
+extern int altbufferflushes;
extern int buf_maxio; /* nominal maximum I/O for buffer */
extern struct buf *buf; /* The buffer headers. */
extern char *buffers; /* The buffer contents. */
diff --git a/sys/sys/bufobj.h b/sys/sys/bufobj.h
index ea568e4..6650702 100644
--- a/sys/sys/bufobj.h
+++ b/sys/sys/bufobj.h
@@ -70,17 +70,20 @@ struct bufv {
typedef void b_strategy_t(struct bufobj *, struct buf *);
typedef int b_write_t(struct buf *);
typedef int b_sync_t(struct bufobj *, int waitfor, struct thread *td);
+typedef void b_bdflush_t(struct bufobj *, struct buf *);
struct buf_ops {
char *bop_name;
b_write_t *bop_write;
b_strategy_t *bop_strategy;
b_sync_t *bop_sync;
+ b_bdflush_t *bop_bdflush;
};
#define BO_STRATEGY(bo, bp) ((bo)->bo_ops->bop_strategy((bo), (bp)))
#define BO_SYNC(bo, w, td) ((bo)->bo_ops->bop_sync((bo), (w), (td)))
#define BO_WRITE(bo, bp) ((bo)->bo_ops->bop_write((bp)))
+#define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp)))
struct bufobj {
struct mtx *bo_mtx; /* Mutex which protects "i" things */
@@ -130,6 +133,7 @@ void bufobj_wrefl(struct bufobj *bo);
int bufobj_invalbuf(struct bufobj *bo, int flags, struct thread *td, int slpflag, int slptimeo);
int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo);
int bufsync(struct bufobj *bo, int waitfor, struct thread *td);
+void bufbdflush(struct bufobj *bo, struct buf *bp);
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
#endif /* _SYS_BUFOBJ_H_ */
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 98f0f91..67986c1 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -61,6 +61,7 @@ ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
int ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
+void ffs_bdflush(struct bufobj *, struct buf *);
int ffs_copyonwrite(struct vnode *, struct buf *);
int ffs_flushfiles(struct mount *, int, struct thread *);
void ffs_fragacct(struct fs *, int, int32_t [], int);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 14b231c..d94bd6d 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -163,6 +163,7 @@ static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
static void process_deferred_inactive(struct mount *);
static void try_free_snapdata(struct vnode *devvp, struct thread *td);
+static int ffs_bp_snapblk(struct vnode *, struct buf *);
/*
* To ensure the consistency of snapshots across crashes, we must
@@ -2065,6 +2066,119 @@ ffs_snapshot_unmount(mp)
}
/*
+ * Check the buffer block to be belong to device buffer that shall be
+ * locked after snaplk. devvp shall be locked on entry, and will be
+ * leaved locked upon exit.
+ */
+static int
+ffs_bp_snapblk(devvp, bp)
+ struct vnode *devvp;
+ struct buf *bp;
+{
+ struct snapdata *sn;
+ struct fs *fs;
+ ufs2_daddr_t lbn, *snapblklist;
+ int lower, upper, mid;
+
+ ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
+ KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
+ sn = devvp->v_rdev->si_snapdata;
+ if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL)
+ return (0);
+ fs = TAILQ_FIRST(&sn->sn_head)->i_fs;
+ lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+ snapblklist = sn->sn_blklist;
+ upper = sn->sn_listsize - 1;
+ lower = 1;
+ while (lower <= upper) {
+ mid = (lower + upper) / 2;
+ if (snapblklist[mid] == lbn)
+ break;
+ if (snapblklist[mid] < lbn)
+ lower = mid + 1;
+ else
+ upper = mid - 1;
+ }
+ if (lower <= upper)
+ return (1);
+ return (0);
+}
+
+void
+ffs_bdflush(bo, bp)
+ struct bufobj *bo;
+ struct buf *bp;
+{
+ struct thread *td;
+ struct vnode *vp, *devvp;
+ struct buf *nbp;
+ int bp_bdskip;
+
+ if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
+ return;
+
+ td = curthread;
+ vp = bp->b_vp;
+ devvp = bo->__bo_vnode;
+ KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
+
+ VI_LOCK(devvp);
+ bp_bdskip = ffs_bp_snapblk(devvp, bp);
+ if (bp_bdskip)
+ bdwriteskip++;
+ VI_UNLOCK(devvp);
+ if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
+ (void) VOP_FSYNC(vp, MNT_NOWAIT, td);
+ altbufferflushes++;
+ } else {
+ BO_LOCK(bo);
+ /*
+ * Try to find a buffer to flush.
+ */
+ TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
+ if ((nbp->b_vflags & BV_BKGRDINPROG) ||
+ BUF_LOCK(nbp,
+ LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ continue;
+ if (bp == nbp)
+ panic("bdwrite: found ourselves");
+ BO_UNLOCK(bo);
+ /*
+ * Don't countdeps with the bo lock
+ * held.
+ */
+ if (buf_countdeps(nbp, 0)) {
+ BO_LOCK(bo);
+ BUF_UNLOCK(nbp);
+ continue;
+ }
+ if (bp_bdskip) {
+ VI_LOCK(devvp);
+ if (!ffs_bp_snapblk(vp, nbp)) {
+ if (BO_MTX(bo) != VI_MTX(vp)) {
+ VI_UNLOCK(devvp);
+ BO_LOCK(bo);
+ }
+ BUF_UNLOCK(nbp);
+ continue;
+ }
+ VI_UNLOCK(devvp);
+ }
+ if (nbp->b_flags & B_CLUSTEROK) {
+ vfs_bio_awrite(nbp);
+ } else {
+ bremfree(nbp);
+ bawrite(nbp);
+ }
+ dirtybufferflushes++;
+ break;
+ }
+ if (nbp == NULL)
+ BO_UNLOCK(bo);
+ }
+}
+
+/*
* Check for need to copy block that is about to be written,
* copying the block if necessary.
*/
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index a2591bc..225dd8a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -117,6 +117,11 @@ static struct buf_ops ffs_ops = {
.bop_write = ffs_bufwrite,
.bop_strategy = ffs_geom_strategy,
.bop_sync = bufsync,
+#ifdef NO_FFS_SNAPSHOT
+ .bop_bdflush = bufbdflush,
+#else
+ .bop_bdflush = ffs_bdflush,
+#endif
};
static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
OpenPOWER on IntegriCloud