summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2015-06-27 09:44:14 +0000
committerkib <kib@FreeBSD.org>2015-06-27 09:44:14 +0000
commit9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010 (patch)
treeb314165eb762cbcca7d9865cb7565966fa428720 /sys
parent511fb8f89945627d580acd49ab7064c624ebaa62 (diff)
downloadFreeBSD-src-9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010.zip
FreeBSD-src-9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010.tar.gz
Handle errors from background write of the cylinder group blocks.
First, on the write error, bufdone() call from ffs_backgroundwrite() panics because pbrelvp() cleared bp->b_bufobj, while brelse() would try to re-dirty the copy of the cg buffer. Handle this by setting B_INVAL for the case of BIO_ERROR. Second, we must re-dirty the real buffer containing the cylinder group block data when background write failed. Real cg buffer was already marked clean in ffs_bufwrite(). After the BV_BKGRDINPROG flag is cleared on the real cg buffer in ffs_backgroundwrite(), buffer scan may reuse the buffer at any moment. The result is lost write, and if the write error was only transient, we get corrupted bitmaps. We cannot re-dirty the original cg buffer in the ffs_backgroundwritedone(), since the context is not sleepable, preventing us from sleeping for origbp' lock. Add BV_BKGDERR flag (protected by the buffer object lock), which is converted into delayed write by brelse(), bqrelse() and buffer scan. In collaboration with: Conrad Meyer <cse.cem@gmail.com> Reviewed by: mckusick Sponsored by: The FreeBSD Foundation (kib), EMC/Isilon storage division (Conrad) MFC after: 2 weeks
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_bio.c22
-rw-r--r--sys/sys/buf.h3
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c20
3 files changed, 42 insertions, 3 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c715725..b380578 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1597,6 +1597,12 @@ brelse(struct buf *bp)
return;
}
+ if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
+ BO_LOCK(bp->b_bufobj);
+ bp->b_vflags &= ~BV_BKGRDERR;
+ BO_UNLOCK(bp->b_bufobj);
+ bdirty(bp);
+ }
if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
bp->b_error == EIO && !(bp->b_flags & B_INVAL)) {
/*
@@ -1853,7 +1859,11 @@ bqrelse(struct buf *bp)
}
/* buffers with stale but valid contents */
- if (bp->b_flags & B_DELWRI) {
+ if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
+ BV_BKGRDERR)) == BV_BKGRDERR) {
+ BO_LOCK(bp->b_bufobj);
+ bp->b_vflags &= ~BV_BKGRDERR;
+ BO_UNLOCK(bp->b_bufobj);
qindex = QUEUE_DIRTY;
} else {
if ((bp->b_flags & B_DELWRI) == 0 &&
@@ -2372,6 +2382,16 @@ restart:
continue;
}
+ /*
+ * Requeue the background write buffer with error.
+ */
+ if ((bp->b_vflags & BV_BKGRDERR) != 0) {
+ bremfreel(bp);
+ mtx_unlock(&bqclean);
+ bqrelse(bp);
+ continue;
+ }
+
KASSERT(bp->b_qindex == qindex,
("getnewbuf: inconsistent queue %d bp %p", qindex, bp));
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 4e2af1a..49ec4d5 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -253,8 +253,9 @@ struct buf {
#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
+#define BV_BKGRDERR 0x00000008 /* Error from background write */
-#define PRINT_BUF_VFLAGS "\20\3bkgrdwait\2bkgrdinprog\1scanned"
+#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
#ifdef _KERNEL
/*
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index ffd8802..af99dae 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1978,12 +1978,19 @@ ffs_backgroundwritedone(struct buf *bp)
BO_LOCK(bufobj);
if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
+
+ /*
+ * We should mark the cylinder group buffer origbp as
+ * dirty, to not loose the failed write.
+ */
+ if ((bp->b_ioflags & BIO_ERROR) != 0)
+ origbp->b_vflags |= BV_BKGRDERR;
BO_UNLOCK(bufobj);
/*
* Process dependencies then return any unfinished ones.
*/
pbrelvp(bp);
- if (!LIST_EMPTY(&bp->b_dep))
+ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
buf_complete(bp);
#ifdef SOFTUPDATES
if (!LIST_EMPTY(&bp->b_dep))
@@ -1995,6 +2002,15 @@ ffs_backgroundwritedone(struct buf *bp)
*/
bp->b_flags |= B_NOCACHE;
bp->b_flags &= ~B_CACHE;
+
+ /*
+ * Prevent brelse() from trying to keep and re-dirtying bp on
+ * errors. It causes b_bufobj dereference in
+ * bdirty()/reassignbuf(), and b_bufobj was cleared in
+ * pbrelvp() above.
+ */
+ if ((bp->b_ioflags & BIO_ERROR) != 0)
+ bp->b_flags |= B_INVAL;
bufdone(bp);
BO_LOCK(bufobj);
/*
@@ -2056,6 +2072,8 @@ ffs_bufwrite(struct buf *bp)
if (bp->b_vflags & BV_BKGRDINPROG)
panic("bufwrite: still writing");
}
+ if ((bp->b_vflags & BV_BKGRDERR) != 0)
+ bp->b_vflags &= ~BV_BKGRDERR;
BO_UNLOCK(bp->b_bufobj);
/*
OpenPOWER on IntegriCloud