diff options
author | mckusick <mckusick@FreeBSD.org> | 2016-10-17 21:44:41 +0000 |
---|---|---|
committer | mckusick <mckusick@FreeBSD.org> | 2016-10-17 21:44:41 +0000 |
commit | 7214aeaa9ab8c64f46e4850b7188b59c8f6bffd3 (patch) | |
tree | 4544468305f2f54c654ca45df3e9f1fd509f8351 /sys/ufs | |
parent | 47aad02bfc3fd2c6131dbebded9e22f1f6916087 (diff) | |
download | FreeBSD-src-7214aeaa9ab8c64f46e4850b7188b59c8f6bffd3.zip FreeBSD-src-7214aeaa9ab8c64f46e4850b7188b59c8f6bffd3.tar.gz |
MFC r304230:
Add two new macros, SLIST_CONCAT and LIST_CONCAT.
MFC r304239:
Bug 211013 reports that a write error to a UFS filesystem running
with softupdates panics the kernel.
PR: 211013
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 146 | ||||
-rw-r--r-- | sys/ufs/ffs/softdep.h | 1 |
2 files changed, 125 insertions, 22 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 494ec1b..812cc56 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -752,16 +752,16 @@ static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t); static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t); static int flush_deplist(struct allocdirectlst *, int, int *); static int sync_cgs(struct mount *, int); -static int handle_written_filepage(struct pagedep *, struct buf *); +static int handle_written_filepage(struct pagedep *, struct buf *, int); static int handle_written_sbdep(struct sbdep *, struct buf *); static void initiate_write_sbdep(struct sbdep *); static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_indirdep(struct indirdep *, struct buf *, - struct buf**); -static int handle_written_inodeblock(struct inodedep *, struct buf *); + struct buf**, int); +static int handle_written_inodeblock(struct inodedep *, struct buf *, int); static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, uint8_t *); -static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); +static int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); static void handle_written_jseg(struct jseg *, struct buf *); @@ -10903,6 +10903,10 @@ initiate_write_bmsafemap(bmsafemap, bp) struct fs *fs; ino_t ino; + /* + * If this is a background write, we did this at the time that + * the copy was made, so do not need to do it again. + */ if (bmsafemap->sm_state & IOSTARTED) return; bmsafemap->sm_state |= IOSTARTED; @@ -10976,10 +10980,39 @@ softdep_disk_write_complete(bp) /* * If an error occurred while doing the write, then the data - * has not hit the disk and the dependencies cannot be unrolled. + * has not hit the disk and the dependencies cannot be processed. + * But we do have to go through and roll forward any dependencies + * that were rolled back before the disk write. */ - if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) + if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) { + LIST_FOREACH(wk, &bp->b_dep, wk_list) { + switch (wk->wk_type) { + + case D_PAGEDEP: + handle_written_filepage(WK_PAGEDEP(wk), bp, 0); + continue; + + case D_INODEDEP: + handle_written_inodeblock(WK_INODEDEP(wk), + bp, 0); + continue; + + case D_BMSAFEMAP: + handle_written_bmsafemap(WK_BMSAFEMAP(wk), + bp, 0); + continue; + + case D_INDIRDEP: + handle_written_indirdep(WK_INDIRDEP(wk), + bp, &sbp, 0); + continue; + default: + /* nothing to roll forward */ + continue; + } + } return; + } if ((wk = LIST_FIRST(&bp->b_dep)) == NULL) return; ump = VFSTOUFS(wk->wk_mp); @@ -10999,17 +11032,20 @@ softdep_disk_write_complete(bp) switch (wk->wk_type) { case D_PAGEDEP: - if (handle_written_filepage(WK_PAGEDEP(wk), bp)) + if (handle_written_filepage(WK_PAGEDEP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; case D_INODEDEP: - if (handle_written_inodeblock(WK_INODEDEP(wk), bp)) + if (handle_written_inodeblock(WK_INODEDEP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; case D_BMSAFEMAP: - if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp)) + if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; @@ -11028,7 +11064,8 @@ softdep_disk_write_complete(bp) continue; case D_INDIRDEP: - if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp)) + if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; @@ -11328,12 +11365,17 @@ handle_bufwait(inodedep, refhd) * Called from within softdep_disk_write_complete above to restore * in-memory inode block contents to their most up-to-date state. Note * that this routine is always called from interrupt level with further - * splbio interrupts blocked. + * interrupts from this device blocked. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_inodeblock(inodedep, bp) +handle_written_inodeblock(inodedep, bp, flags) struct inodedep *inodedep; struct buf *bp; /* buffer containing the inode block */ + int flags; { struct freefile *freefile; struct allocdirect *adp, *nextadp; @@ -11363,7 +11405,8 @@ handle_written_inodeblock(inodedep, bp) /* * Leave this inodeblock dirty until it's in the list. */ - if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) { + if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED && + (flags & WRITESUCCEEDED)) { struct inodedep *inon; inon = TAILQ_NEXT(inodedep, id_unlinked); @@ -11402,7 +11445,8 @@ handle_written_inodeblock(inodedep, bp) goto bufwait; return (1); } - inodedep->id_state |= COMPLETE; + if (flags & WRITESUCCEEDED) + inodedep->id_state |= COMPLETE; /* * Roll forward anything that had to be rolled back before * the inode could be updated. @@ -11517,6 +11561,13 @@ handle_written_inodeblock(inodedep, bp) bdirty(bp); bufwait: /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) + return (hadchanges); + /* * Process any allocdirects that completed during the update. */ if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL) @@ -11573,11 +11624,20 @@ bufwait: return (hadchanges); } +/* + * Perform needed roll-forwards and kick off any dependencies that + * can now be processed. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. + */ static int -handle_written_indirdep(indirdep, bp, bpp) +handle_written_indirdep(indirdep, bp, bpp, flags) struct indirdep *indirdep; struct buf *bp; struct buf **bpp; + int flags; { struct allocindir *aip; struct buf *sbp; @@ -11602,6 +11662,16 @@ handle_written_indirdep(indirdep, bp, bpp) indirdep->ir_state &= ~(UNDONE | IOSTARTED); indirdep->ir_state |= ATTACHED; /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) { + stat_indir_blk_ptrs++; + bdirty(bp); + return (1); + } + /* * Move allocindirs with written pointers to the completehd if * the indirdep's pointer is not yet written. Otherwise * free them here. @@ -11755,11 +11825,16 @@ jnewblk_rollforward(jnewblk, fs, cgp, blksfree) * Complete a write to a bmsafemap structure. Roll forward any bitmap * changes if it's not a background write. Set all written dependencies * to DEPCOMPLETE and free the structure if possible. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_bmsafemap(bmsafemap, bp) +handle_written_bmsafemap(bmsafemap, bp, flags) struct bmsafemap *bmsafemap; struct buf *bp; + int flags; { struct newblk *newblk; struct inodedep *inodedep; @@ -11775,15 +11850,20 @@ handle_written_bmsafemap(bmsafemap, bp) int chgs; if ((bmsafemap->sm_state & IOSTARTED) == 0) - panic("initiate_write_bmsafemap: Not started\n"); + panic("handle_written_bmsafemap: Not started\n"); ump = VFSTOUFS(bmsafemap->sm_list.wk_mp); chgs = 0; bmsafemap->sm_state &= ~IOSTARTED; foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0; /* - * Release journal work that was waiting on the write. + * If write was successful, release journal work that was waiting + * on the write. Otherwise move the work back. */ - handle_jwork(&bmsafemap->sm_freewr); + if (flags & WRITESUCCEEDED) + handle_jwork(&bmsafemap->sm_freewr); + else + LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, + worklist, wk_list); /* * Restore unwritten inode allocation pending jaddref writes. @@ -11833,6 +11913,20 @@ handle_written_bmsafemap(bmsafemap, bp) free_jnewblk(jnewblk); } } + /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) { + LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, + newblk, nb_deps); + LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, + worklist, wk_list); + if (foreground) + bdirty(bp); + return (1); + } while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) { newblk->nb_state |= DEPCOMPLETE; newblk->nb_state &= ~ONDEPLIST; @@ -11936,12 +12030,17 @@ free_pagedep(pagedep) * A write operation was just completed. Removed inodes can * now be freed and associated block pointers may be committed. * Note that this routine is always called from interrupt level - * with further splbio interrupts blocked. + * with further interrupts from this device blocked. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_filepage(pagedep, bp) +handle_written_filepage(pagedep, bp, flags) struct pagedep *pagedep; struct buf *bp; /* buffer containing the written page */ + int flags; { struct dirrem *dirrem; struct diradd *dap, *nextdap; @@ -11951,6 +12050,8 @@ handle_written_filepage(pagedep, bp) if ((pagedep->pd_state & IOSTARTED) == 0) panic("handle_written_filepage: not started"); pagedep->pd_state &= ~IOSTARTED; + if ((flags & WRITESUCCEEDED) == 0) + goto rollforward; /* * Process any directory removals that have been committed. */ @@ -11970,6 +12071,7 @@ handle_written_filepage(pagedep, bp) if ((pagedep->pd_state & NEWBLOCK) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) free_diradd(dap, NULL); +rollforward: /* * Uncommitted directory entries must be restored. */ @@ -12002,7 +12104,7 @@ handle_written_filepage(pagedep, bp) * marked dirty so that its will eventually get written back in * its correct form. */ - if (chgs) { + if (chgs || (flags & WRITESUCCEEDED) == 0) { if ((bp->b_flags & B_DELWRI) == 0) stat_dir_entry++; bdirty(bp); diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index d95ae60..009e3b3 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -140,6 +140,7 @@ #define UNLINKPREV 0x100000 /* inodedep is pointed at in the unlink list */ #define UNLINKONLIST 0x200000 /* inodedep is in the unlinked list on disk */ #define UNLINKLINKS (UNLINKNEXT | UNLINKPREV) +#define WRITESUCCEEDED 0x400000 /* the disk write completed successfully */ #define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) |