From 6ba8b7f04c93a101c89962061bbd3307c0c54379 Mon Sep 17 00:00:00 2001 From: jeff Date: Fri, 10 Jun 2011 22:48:35 +0000 Subject: Implement fully asynchronous partial truncation with softupdates journaling to resolve errors which can cause corruption on recovery with the old synchronous mechanism. - Append partial truncation freework structures to indirdeps while truncation is proceeding. These prevent new block pointers from becoming valid until truncation completes and serialize truncations. - On completion of a partial truncate journal work waits for zeroed pointers to hit indirects. - softdep_journal_freeblocks() handles last frag allocation and last block zeroing. - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it is only implemented in one place. - Block allocation failure handling moved up one level so it does not proceed with buf locks held. This permits us to do more extensive reclaims when filesystem space is exhausted. - softdep_sync_metadata() is broken into two parts, the first executes once at the start of ffs_syncvnode() and flushes truncations and inode dependencies. The second is called on each locked buf. This eliminates excessive looping and rollbacks. - Improve the mechanism in process_worklist_item() that handles acquiring vnode locks for handle_workitem_remove() so that it works more generally and does not loop excessively over the same worklist items on each call. - Don't corrupt directories by zeroing the tail in fsck. This is only done for regular files. - Push a fsync complete record for files that need it so the checker knows a truncation in the journal is no longer valid. Discussed with: mckusick, kib (ffs_pages_remove and ffs_truncate parts) Tested by: pho --- sbin/fsck_ffs/suj.c | 52 ++++++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 36 deletions(-) (limited to 'sbin/fsck_ffs') diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c index c1c5811..b4aa679 100644 --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size) * uninitialized space later. */ off = blkoff(fs, size); - if (off) { + if (off && DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; @@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc) struct suj_ino *sino; int i; - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) + for (i = 0; i < SUJ_HASHSIZE; i++) { + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); + sino->si_blkadj = 0; sino->si_trunc = NULL; } + if (sino->si_blkadj) + ino_adjblks(sino); + } + } } /* @@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc) static void cg_check_blk(struct suj_cg *sc) { - struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc) for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); - /* - * Now that we've freed blocks which are not referenced we - * make a second pass over all inodes to adjust their block - * counts. - */ - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) - if (sino->si_blkadj) - ino_adjblks(sino); } /* @@ -1961,14 +1956,7 @@ ino_append(union jrec *rec) "parent %d, diroff %jd\n", refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, refrec->jr_diroff); - /* - * Lookup the ino and clear truncate if one is found. Partial - * truncates are always done synchronously so if we discover - * an operation that requires a lock the truncation has completed - * and can be discarded. - */ sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); - sino->si_trunc = NULL; sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; @@ -2174,9 +2162,7 @@ blk_build(struct jblkrec *blkrec) struct suj_rec *srec; struct suj_blk *sblk; struct jblkrec *blkrn; - struct suj_ino *sino; ufs2_daddr_t blk; - off_t foff; int frag; if (debug) @@ -2185,17 +2171,6 @@ blk_build(struct jblkrec *blkrec) blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); - /* - * Look up the inode and clear the truncate if any lbns after the - * truncate lbn are freed or allocated. - */ - sino = ino_lookup(blkrec->jb_ino, 0); - if (sino && sino->si_trunc) { - foff = lblktosize(fs, blkrec->jb_lbn); - foff += lfragtosize(fs, blkrec->jb_frags); - if (foff > sino->si_trunc->jt_size) - sino->si_trunc = NULL; - } blk = blknum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno); sblk = blk_lookup(blk, 1); @@ -2242,10 +2217,15 @@ ino_build_trunc(struct jtrncrec *rec) struct suj_ino *sino; if (debug) - printf("ino_build_trunc: ino %d, size %jd\n", - rec->jt_ino, rec->jt_size); + printf("ino_build_trunc: op %d ino %d, size %jd\n", + rec->jt_op, rec->jt_ino, rec->jt_size); sino = ino_lookup(rec->jt_ino, 1); - sino->si_trunc = rec; + if (rec->jt_op == JOP_SYNC) { + sino->si_trunc = NULL; + return; + } + if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) + sino->si_trunc = rec; } /* -- cgit v1.1