summaryrefslogtreecommitdiffstats
path: root/sys/kern/vfs_bio.c
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2000-01-10 00:24:24 +0000
committermckusick <mckusick@FreeBSD.org>2000-01-10 00:24:24 +0000
commitd4409da21099a58aef95304ae121d7dd16935d28 (patch)
tree7af2f6cd9369ccc04c3599bb76bfdc259edea5e5 /sys/kern/vfs_bio.c
parent2f9951ffbdc2dff074f16cd490f451ce751a7710 (diff)
downloadFreeBSD-src-d4409da21099a58aef95304ae121d7dd16935d28.zip
FreeBSD-src-d4409da21099a58aef95304ae121d7dd16935d28.tar.gz
Several performance improvements for soft updates have been added:
1) Fastpath deletions. When a file is being deleted, check to see if it was so recently created that its inode has not yet been written to disk. If so, the delete can proceed to immediately free the inode. 2) Background writes: No file or block allocations can be done while the bitmap is being written to disk. To avoid these stalls, the bitmap is copied to another buffer which is written thus leaving the original available for futher allocations. 3) Link count tracking. Constantly track the difference in i_effnlink and i_nlink so that inodes that have had no change other than i_effnlink need not be written. 4) Identify buffers with rollback dependencies so that the buffer flushing daemon can choose to skip over them.
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r--sys/kern/vfs_bio.c149
1 files changed, 144 insertions, 5 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 9d2b5c2..f12316b 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -68,6 +68,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
static void vfs_clean_pages(struct buf * bp);
static void vfs_setdirty(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
+static void vfs_backgroundwritedone(struct buf *bp);
static int flushbufqueues(void);
static int bd_request;
@@ -349,7 +350,7 @@ bufinit(void)
* buffer cache operation.
*/
maxbufspace = (nbuf + 8) * DFLTBSIZE;
- hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 5);
+ hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
/*
* Limit the amount of malloc memory since it is wired permanently into
* the kernel space. Even though this is accounted for in the buffer
@@ -593,6 +594,7 @@ int
bwrite(struct buf * bp)
{
int oldflags, s;
+ struct buf *newbp;
if (bp->b_flags & B_INVAL) {
brelse(bp);
@@ -606,8 +608,66 @@ bwrite(struct buf * bp)
panic("bwrite: buffer is not busy???");
#endif
s = splbio();
+ /*
+ * If a background write is already in progress, delay
+ * writing this block if it is asynchronous. Otherwise
+ * wait for the background write to complete.
+ */
+ if (bp->b_xflags & BX_BKGRDINPROG) {
+ if (bp->b_flags & B_ASYNC) {
+ splx(s);
+ bdwrite(bp);
+ return (0);
+ }
+ bp->b_xflags |= BX_BKGRDWAIT;
+ tsleep(&bp->b_xflags, PRIBIO, "biord", 0);
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("bwrite: still writing");
+ }
+
+ /* Mark the buffer clean */
bundirty(bp);
+ /*
+ * If this buffer is marked for background writing and we
+ * do not have to wait for it, make a copy and write the
+ * copy so as to leave this buffer ready for further use.
+ */
+ if ((bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC)) {
+ if (bp->b_flags & B_CALL)
+ panic("bwrite: need chained iodone");
+
+ /* get a new block */
+ newbp = geteblk(bp->b_bufsize);
+
+ /* set it to be identical to the old block */
+ memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
+ bgetvp(bp->b_vp, newbp);
+ newbp->b_lblkno = bp->b_lblkno;
+ newbp->b_blkno = bp->b_blkno;
+ newbp->b_offset = bp->b_offset;
+ newbp->b_iodone = vfs_backgroundwritedone;
+ newbp->b_flags |= B_ASYNC | B_CALL;
+ newbp->b_flags &= ~B_INVAL;
+
+ /* move over the dependencies */
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
+ (*bioops.io_movedeps)(bp, newbp);
+
+ /*
+ * Initiate write on the copy, release the original to
+ * the B_LOCKED queue so that it cannot go away until
+ * the background write completes. If not locked it could go
+ * away and then be reconstituted while it was being written.
+ * If the reconstituted buffer were written, we could end up
+ * with two background copies being written at the same time.
+ */
+ bp->b_xflags |= BX_BKGRDINPROG;
+ bp->b_flags |= B_LOCKED;
+ bqrelse(bp);
+ bp = newbp;
+ }
+
bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
bp->b_flags |= B_WRITEINPROG | B_CACHE;
@@ -630,6 +690,56 @@ bwrite(struct buf * bp)
}
/*
+ * Complete a background write started from bwrite.
+ */
+static void
+vfs_backgroundwritedone(bp)
+ struct buf *bp;
+{
+ struct buf *origbp;
+
+ /*
+ * Find the original buffer that we are writing.
+ */
+ if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
+ panic("backgroundwritedone: lost buffer");
+ /*
+ * Process dependencies then return any unfinished ones.
+ */
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
+ (*bioops.io_complete)(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
+ (*bioops.io_movedeps)(bp, origbp);
+ /*
+ * Clear the BX_BKGRDINPROG flag in the original buffer
+ * and awaken it if it is waiting for the write to complete.
+ */
+ origbp->b_xflags &= ~BX_BKGRDINPROG;
+ if (origbp->b_xflags & BX_BKGRDWAIT) {
+ origbp->b_xflags &= ~BX_BKGRDWAIT;
+ wakeup(&origbp->b_xflags);
+ }
+ /*
+ * Clear the B_LOCKED flag and remove it from the locked
+ * queue if it currently resides there.
+ */
+ origbp->b_flags &= ~B_LOCKED;
+ if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
+ bremfree(origbp);
+ bqrelse(origbp);
+ }
+ /*
+ * This buffer is marked B_NOCACHE, so when it is released
+ * by biodone, it will be tossed. We mark it with B_READ
+ * to avoid biodone doing a second vwakeup.
+ */
+ bp->b_flags |= B_NOCACHE | B_READ;
+ bp->b_flags &= ~(B_CACHE | B_CALL | B_DONE);
+ bp->b_iodone = 0;
+ biodone(bp);
+}
+
+/*
* Delayed write. (Buffer is marked dirty). Do not bother writing
* anything if the buffer is marked invalid.
*
@@ -757,6 +867,10 @@ bundirty(bp)
--numdirtybuffers;
numdirtywakeup();
}
+ /*
+ * Since it is now being written, we can clear its deferred write flag.
+ */
+ bp->b_flags &= ~B_DEFERRED;
}
/*
@@ -895,12 +1009,16 @@ brelse(struct buf * bp)
*
* Normally we can do this whether a buffer is B_DELWRI or not. If
* the buffer is an NFS buffer, it is tracking piecemeal writes or
- * the commit state and we cannot afford to lose the buffer.
+ * the commit state and we cannot afford to lose the buffer. If the
+ * buffer has a background write in progress, we need to keep it
+ * around to prevent it from being reconstituted and starting a second
+ * background write.
*/
if ((bp->b_flags & B_VMIO)
&& !(bp->b_vp->v_tag == VT_NFS &&
!vn_isdisk(bp->b_vp) &&
- (bp->b_flags & B_DELWRI))
+ (bp->b_flags & B_DELWRI) &&
+ (bp->b_xflags & BX_BKGRDINPROG))
) {
int i, j, resid;
@@ -997,6 +1115,9 @@ brelse(struct buf * bp)
/* buffers with no memory */
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
+ bp->b_xflags &= ~BX_BKGRDWRITE;
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 1");
if (bp->b_kvasize) {
bp->b_qindex = QUEUE_EMPTYKVA;
kvawakeup = 1;
@@ -1011,6 +1132,9 @@ brelse(struct buf * bp)
/* buffers with junk contents */
} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
bp->b_flags |= B_INVAL;
+ bp->b_xflags &= ~BX_BKGRDWRITE;
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
if (bp->b_kvasize)
kvawakeup = 1;
@@ -1501,6 +1625,8 @@ restart:
}
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
(*bioops.io_deallocate)(bp);
+ if (bp->b_xflags & BX_BKGRDINPROG)
+ panic("losing buffer 3");
LIST_REMOVE(bp, b_hash);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
@@ -1508,6 +1634,7 @@ restart:
allocbuf(bp, 0);
bp->b_flags = 0;
+ bp->b_xflags = 0;
bp->b_dev = NODEV;
bp->b_vp = NULL;
bp->b_blkno = bp->b_lblkno = 0;
@@ -1761,7 +1888,8 @@ flushbufqueues(void)
while (bp) {
KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
- if ((bp->b_flags & B_DELWRI) != 0) {
+ if ((bp->b_flags & B_DELWRI) != 0 &&
+ (bp->b_xflags & BX_BKGRDINPROG) == 0) {
if (bp->b_flags & B_INVAL) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
panic("flushbufqueues: locked buf");
@@ -1770,13 +1898,24 @@ flushbufqueues(void)
++r;
break;
}
+ if (LIST_FIRST(&bp->b_dep) != NULL &&
+ bioops.io_countdeps &&
+ (bp->b_flags & B_DEFERRED) == 0 &&
+ (*bioops.io_countdeps)(bp, 0)) {
+ TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY],
+ bp, b_freelist);
+ TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY],
+ bp, b_freelist);
+ bp->b_flags |= B_DEFERRED;
+ continue;
+ }
vfs_bio_awrite(bp);
++r;
break;
}
bp = TAILQ_NEXT(bp, b_freelist);
}
- return(r);
+ return (r);
}
/*
OpenPOWER on IntegriCloud