diff options
author | kib <kib@FreeBSD.org> | 2015-06-27 09:44:14 +0000 |
---|---|---|
committer | kib <kib@FreeBSD.org> | 2015-06-27 09:44:14 +0000 |
commit | 9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010 (patch) | |
tree | b314165eb762cbcca7d9865cb7565966fa428720 /sys | |
parent | 511fb8f89945627d580acd49ab7064c624ebaa62 (diff) | |
download | FreeBSD-src-9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010.zip FreeBSD-src-9f65a2d8d999b24bb5d9e9b9dad30a1c98e3f010.tar.gz |
Handle errors from background write of the cylinder group blocks.
First, on the write error, bufdone() call from ffs_backgroundwrite()
panics because pbrelvp() cleared bp->b_bufobj, while brelse() would
try to re-dirty the copy of the cg buffer. Handle this by setting
B_INVAL for the case of BIO_ERROR.
Second, we must re-dirty the real buffer containing the cylinder group
block data when background write failed. Real cg buffer was already
marked clean in ffs_bufwrite(). After the BV_BKGRDINPROG flag is
cleared on the real cg buffer in ffs_backgroundwrite(), buffer scan
may reuse the buffer at any moment. The result is lost write, and if
the write error was only transient, we get corrupted bitmaps.
We cannot re-dirty the original cg buffer in the
ffs_backgroundwritedone(), since the context is not sleepable,
preventing us from sleeping for origbp' lock. Add BV_BKGDERR flag
(protected by the buffer object lock), which is converted into delayed
write by brelse(), bqrelse() and buffer scan.
In collaboration with: Conrad Meyer <cse.cem@gmail.com>
Reviewed by: mckusick
Sponsored by: The FreeBSD Foundation (kib),
EMC/Isilon storage division (Conrad)
MFC after: 2 weeks
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/vfs_bio.c | 22 | ||||
-rw-r--r-- | sys/sys/buf.h | 3 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 20 |
3 files changed, 42 insertions, 3 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c715725..b380578 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1597,6 +1597,12 @@ brelse(struct buf *bp) return; } + if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) { + BO_LOCK(bp->b_bufobj); + bp->b_vflags &= ~BV_BKGRDERR; + BO_UNLOCK(bp->b_bufobj); + bdirty(bp); + } if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) && bp->b_error == EIO && !(bp->b_flags & B_INVAL)) { /* @@ -1853,7 +1859,11 @@ bqrelse(struct buf *bp) } /* buffers with stale but valid contents */ - if (bp->b_flags & B_DELWRI) { + if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG | + BV_BKGRDERR)) == BV_BKGRDERR) { + BO_LOCK(bp->b_bufobj); + bp->b_vflags &= ~BV_BKGRDERR; + BO_UNLOCK(bp->b_bufobj); qindex = QUEUE_DIRTY; } else { if ((bp->b_flags & B_DELWRI) == 0 && @@ -2372,6 +2382,16 @@ restart: continue; } + /* + * Requeue the background write buffer with error. + */ + if ((bp->b_vflags & BV_BKGRDERR) != 0) { + bremfreel(bp); + mtx_unlock(&bqclean); + bqrelse(bp); + continue; + } + KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 4e2af1a..49ec4d5 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -253,8 +253,9 @@ struct buf { #define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */ #define BV_BKGRDINPROG 0x00000002 /* Background write in progress */ #define BV_BKGRDWAIT 0x00000004 /* Background write waiting */ +#define BV_BKGRDERR 0x00000008 /* Error from background write */ -#define PRINT_BUF_VFLAGS "\20\3bkgrdwait\2bkgrdinprog\1scanned" +#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned" #ifdef _KERNEL /* diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index ffd8802..af99dae 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1978,12 +1978,19 @@ ffs_backgroundwritedone(struct buf *bp) BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); + + /* + * We should mark the cylinder group buffer origbp as + * dirty, to not loose the failed write. + */ + if ((bp->b_ioflags & BIO_ERROR) != 0) + origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ pbrelvp(bp); - if (!LIST_EMPTY(&bp->b_dep)) + if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) @@ -1995,6 +2002,15 @@ ffs_backgroundwritedone(struct buf *bp) */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~B_CACHE; + + /* + * Prevent brelse() from trying to keep and re-dirtying bp on + * errors. It causes b_bufobj dereference in + * bdirty()/reassignbuf(), and b_bufobj was cleared in + * pbrelvp() above. + */ + if ((bp->b_ioflags & BIO_ERROR) != 0) + bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* @@ -2056,6 +2072,8 @@ ffs_bufwrite(struct buf *bp) if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } + if ((bp->b_vflags & BV_BKGRDERR) != 0) + bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /* |