diff options
author | mckusick <mckusick@FreeBSD.org> | 2013-03-22 21:50:43 +0000 |
---|---|---|
committer | mckusick <mckusick@FreeBSD.org> | 2013-03-22 21:50:43 +0000 |
commit | 93fa1464f28c355009625fdcfd4007c798c20d44 (patch) | |
tree | d0ed2db47b0d4e99dc2acac864e90741fd40ba47 /sbin/fsck_ffs/fsutil.c | |
parent | 45f62f67d30e33d8fa4d0e6d3a3a0ff5a231f18f (diff) | |
download | FreeBSD-src-93fa1464f28c355009625fdcfd4007c798c20d44.zip FreeBSD-src-93fa1464f28c355009625fdcfd4007c798c20d44.tar.gz |
Speed up fsck by caching the cylinder group maps in pass1 so
that they do not need to be read again in pass5. As this nearly
doubles the memory requirement for fsck, the cache is thrown away
if other memory needs in fsck would otherwise fail. Thus, the
memory footprint of fsck remains unchanged in memory constrained
environments.
This work was inspired by a paper presented at Usenix's FAST '13:
www.usenix.org/conference/fast13/ffsck-fast-file-system-checker
Details of this implementation appears in the April 2013 of ;login:
www.usenix.org/publications/login/april-2013-volume-38-number-2.
A copy of the April 2013 ;login: paper can also be downloaded
from: www.mckusick.com/publications/faster_fsck.pdf.
Reviewed by: kib
Tested by: Peter Holm
MFC after: 4 weeks
Diffstat (limited to 'sbin/fsck_ffs/fsutil.c')
-rw-r--r-- | sbin/fsck_ffs/fsutil.c | 89 |
1 files changed, 76 insertions, 13 deletions
diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c index 34cc29f..10654ca 100644 --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -70,6 +70,7 @@ static struct timespec startpass, finishpass; struct timeval slowio_starttime; int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; +static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ static TAILQ_HEAD(buflist, bufarea) bufhead; /* head of buffer cache list */ static int numbufs; /* size of buffer cache */ static char *buftype[BT_NUMBUFTYPES] = BT_NAMES; @@ -163,7 +164,7 @@ bufinit(void) char *bufp; pbp = pdirbp = (struct bufarea *)0; - bufp = malloc((unsigned int)sblock.fs_bsize); + bufp = Malloc((unsigned int)sblock.fs_bsize); if (bufp == 0) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; @@ -173,8 +174,8 @@ bufinit(void) if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { - bp = (struct bufarea *)malloc(sizeof(struct bufarea)); - bufp = malloc((unsigned int)sblock.fs_bsize); + bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); + bufp = Malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; @@ -193,6 +194,57 @@ bufinit(void) } /* + * Manage cylinder group buffers. + */ +static struct bufarea *cgbufs; /* header for cylinder group cache */ +static int flushtries; /* number of tries to reclaim memory */ + +struct bufarea * +cgget(int cg) +{ + struct bufarea *cgbp; + struct cg *cgp; + + if (cgbufs == NULL) { + cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); + if (cgbufs == NULL) + errx(EEXIT, "cannot allocate cylinder group buffers"); + } + cgbp = &cgbufs[cg]; + if (cgbp->b_un.b_cg != NULL) + return (cgbp); + cgp = NULL; + if (flushtries == 0) + cgp = malloc((unsigned int)sblock.fs_cgsize); + if (cgp == NULL) { + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + return (&cgblk); + } + cgbp->b_un.b_cg = cgp; + initbarea(cgbp, BT_CYLGRP); + getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); + return (cgbp); +} + +/* + * Attempt to flush a cylinder group cache entry. + * Return whether the flush was successful. + */ +int +flushentry(void) +{ + struct bufarea *cgbp; + + cgbp = &cgbufs[flushtries++]; + if (cgbp->b_un.b_cg == NULL) + return (0); + flush(fswritefd, cgbp); + free(cgbp->b_un.b_buf); + cgbp->b_un.b_buf = NULL; + return (1); +} + +/* * Manage a cache of directory blocks. */ struct bufarea * @@ -363,6 +415,13 @@ ckfini(int markclean) } if (numbufs != cnt) errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); + for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { + if (cgbufs[cnt].b_un.b_cg == NULL) + continue; + flush(fswritefd, &cgbufs[cnt]); + free(cgbufs[cnt].b_un.b_cg); + } + free(cgbufs); pbp = pdirbp = (struct bufarea *)0; if (cursnapshot == 0 && sblock.fs_clean != markclean) { if ((sblock.fs_clean = markclean) != 0) { @@ -448,8 +507,8 @@ static void printIOstats(void) clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); timespecsub(&finishpass, &startpass); - msec = finishpass.tv_sec * 1000 + finishpass.tv_nsec / 1000000; - printf("Running time: %lld msec\n", msec); + printf("Running time: %d.%03ld msec\n", + finishpass.tv_sec, finishpass.tv_nsec / 1000000); printf("buffer reads by type:\n"); for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) totalmsec += readtime[i].tv_sec * 1000 + @@ -460,9 +519,10 @@ static void printIOstats(void) if (readcnt[i] == 0) continue; msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; - printf("%21s:%8ld %2ld.%ld%% %8lld msec %2lld.%lld%%\n", + printf("%21s:%8ld %2ld.%ld%% %4d.%03ld sec %2jd.%jd%%\n", buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, - (readcnt[i] * 1000 / diskreads) % 10, msec, + (readcnt[i] * 1000 / diskreads) % 10, + readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); } printf("\n"); @@ -562,8 +622,9 @@ blerase(int fd, ufs2_daddr_t blk, long size) * test fails, offer an option to rebuild the whole cylinder group. */ int -check_cgmagic(int cg, struct cg *cgp) +check_cgmagic(int cg, struct bufarea *cgbp) { + struct cg *cgp = cgbp->b_un.b_cg; /* * Extended cylinder group checks. @@ -623,7 +684,7 @@ check_cgmagic(int cg, struct cg *cgp) cgp->cg_nextfreeoff = cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } - cgdirty(); + dirty(cgbp); return (0); } @@ -634,7 +695,8 @@ ufs2_daddr_t allocblk(long frags) { int i, j, k, cg, baseblk; - struct cg *cgp = &cgrp; + struct bufarea *cgbp; + struct cg *cgp; if (frags <= 0 || frags > sblock.fs_frag) return (0); @@ -650,8 +712,9 @@ allocblk(long frags) continue; } cg = dtog(&sblock, i + j); - getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); - if (!check_cgmagic(cg, cgp)) + cgbp = cgget(cg); + cgp = cgbp->b_un.b_cg; + if (!check_cgmagic(cg, cgbp)) return (0); baseblk = dtogd(&sblock, i + j); for (k = 0; k < frags; k++) { @@ -663,7 +726,7 @@ allocblk(long frags) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; - cgdirty(); + dirty(cgbp); return (i + j); } } |