diff options
author | jeff <jeff@FreeBSD.org> | 2010-04-24 07:05:35 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2010-04-24 07:05:35 +0000 |
commit | a57449541074720475dfc21dfb8b025695b573eb (patch) | |
tree | e551aa2ab43f7f11c3646b241ebf3f582988d375 /sbin/tunefs | |
parent | 671efe7b2286fbfddcd385e966f431f529ca6376 (diff) | |
download | FreeBSD-src-a57449541074720475dfc21dfb8b025695b573eb.zip FreeBSD-src-a57449541074720475dfc21dfb8b025695b573eb.tar.gz |
- Merge soft-updates journaling from projects/suj/head into head. This
brings in support for an optional intent log which eliminates the need
for background fsck on unclean shutdown.
Sponsored by: iXsystems, Yahoo!, and Juniper.
With help from: McKusick and Peter Holm
Diffstat (limited to 'sbin/tunefs')
-rw-r--r-- | sbin/tunefs/tunefs.8 | 9 | ||||
-rw-r--r-- | sbin/tunefs/tunefs.c | 567 |
2 files changed, 567 insertions, 9 deletions
diff --git a/sbin/tunefs/tunefs.8 b/sbin/tunefs/tunefs.8 index 53e463c..a883cd4 100644 --- a/sbin/tunefs/tunefs.8 +++ b/sbin/tunefs/tunefs.8 @@ -28,7 +28,7 @@ .\" @(#)tunefs.8 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd October 21, 2009 +.Dd March 6, 2010 .Dt TUNEFS 8 .Os .Sh NAME @@ -40,6 +40,7 @@ .Op Fl a Cm enable | disable .Op Fl e Ar maxbpg .Op Fl f Ar avgfilesize +.Op Fl j Cm enable | disable .Op Fl J Cm enable | disable .Op Fl L Ar volname .Op Fl l Cm enable | disable @@ -49,6 +50,7 @@ .Op Fl o Cm space | time .Op Fl p .Op Fl s Ar avgfpdir +.Op Fl S Ar size .Ar special | filesystem .Sh DESCRIPTION The @@ -89,6 +91,8 @@ For file systems with exclusively large files, this parameter should be set higher. .It Fl f Ar avgfilesize Specify the expected average file size. +.It Fl j Cm enable | disable +Turn on/off soft updates journaling. .It Fl J Cm enable | disable Turn on/off gjournal flag. .It Fl L Ar volname @@ -136,6 +140,9 @@ obtained from the utility. .It Fl s Ar avgfpdir Specify the expected number of files per directory. +.It Fl S Ar size +Specify the softdep journal size in bytes. +The minimum is 4M. .El .Pp At least one of the above flags is required. diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c index e4adb52..a10b35d 100644 --- a/sbin/tunefs/tunefs.c +++ b/sbin/tunefs/tunefs.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/dinode.h> #include <ufs/ffs/fs.h> +#include <ufs/ufs/dir.h> #include <ctype.h> #include <err.h> @@ -61,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include <paths.h> #include <stdio.h> #include <stdlib.h> +#include <stdint.h> #include <string.h> #include <unistd.h> @@ -72,16 +74,20 @@ struct uufsd disk; void usage(void); void printfs(void); +int journal_alloc(int64_t size); +void journal_clear(void); +void sbdirty(void); int main(int argc, char *argv[]) { - char *avalue, *Jvalue, *Lvalue, *lvalue, *Nvalue, *nvalue; + char *avalue, *jvalue, *Jvalue, *Lvalue, *lvalue, *Nvalue, *nvalue; const char *special, *on; const char *name; int active; - int Aflag, aflag, eflag, evalue, fflag, fvalue, Jflag, Lflag, lflag; - int mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag, svalue; + int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, Lflag; + int lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag; + int svalue, Sflag, Svalue; int ch, found_arg, i; const char *chg[2]; struct ufs_args args; @@ -89,13 +95,13 @@ main(int argc, char *argv[]) if (argc < 3) usage(); - Aflag = aflag = eflag = fflag = Jflag = Lflag = lflag = mflag = 0; - Nflag = nflag = oflag = pflag = sflag = 0; - avalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL; - evalue = fvalue = mvalue = ovalue = svalue = 0; + Aflag = aflag = eflag = fflag = jflag = Jflag = Lflag = lflag = 0; + mflag = Nflag = nflag = oflag = pflag = sflag = 0; + avalue = jvalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL; + evalue = fvalue = mvalue = ovalue = svalue = Svalue = 0; active = 0; found_arg = 0; /* At least one arg is required. */ - while ((ch = getopt(argc, argv, "Aa:e:f:J:L:l:m:N:n:o:ps:")) != -1) + while ((ch = getopt(argc, argv, "Aa:e:f:j:J:L:l:m:N:n:o:ps:S:")) != -1) switch (ch) { case 'A': @@ -135,6 +141,18 @@ main(int argc, char *argv[]) fflag = 1; break; + case 'j': + found_arg = 1; + name = "softdep journaled file system"; + jvalue = optarg; + if (strcmp(jvalue, "enable") && + strcmp(jvalue, "disable")) { + errx(10, "bad %s (options are %s)", + name, "`enable' or `disable'"); + } + jflag = 1; + break; + case 'J': found_arg = 1; name = "gjournaled file system"; @@ -240,6 +258,16 @@ main(int argc, char *argv[]) sflag = 1; break; + case 'S': + found_arg = 1; + name = "Softdep Journal Size"; + Svalue = atoi(optarg); + if (Svalue < SUJ_MIN) + errx(10, "%s must be >= %d (was %s)", + name, SUJ_MIN, optarg); + Sflag = 1; + break; + default: usage(); } @@ -310,6 +338,33 @@ main(int argc, char *argv[]) sblock.fs_avgfilesize = fvalue; } } + if (jflag) { + name = "soft updates journaling"; + if (strcmp(jvalue, "enable") == 0) { + if ((sblock.fs_flags & (FS_DOSOFTDEP | FS_SUJ)) == + (FS_DOSOFTDEP | FS_SUJ)) { + warnx("%s remains unchanged as enabled", name); + } else if (sblock.fs_clean == 0) { + warnx("%s cannot be enabled until fsck is run", + name); + } else if (journal_alloc(Svalue) != 0) { + warnx("%s can not be enabled", name); + } else { + sblock.fs_flags |= FS_DOSOFTDEP | FS_SUJ; + warnx("%s set", name); + } + } else if (strcmp(jvalue, "disable") == 0) { + if ((~sblock.fs_flags & FS_SUJ) == FS_SUJ) { + warnx("%s remains unchanged as disabled", name); + } else { + journal_clear(); + sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ); + sblock.fs_sujfree = 0; + warnx("%s cleared, " + "remove .sujournal to reclaim space", name); + } + } + } if (Jflag) { name = "gjournal"; if (strcmp(Jvalue, "enable") == 0) { @@ -456,6 +511,500 @@ err: } void +sbdirty(void) +{ + disk.d_fs.fs_flags |= FS_UNCLEAN | FS_NEEDSFSCK; + disk.d_fs.fs_clean = 0; +} + +int blocks; +static char clrbuf[MAXBSIZE]; + +static ufs2_daddr_t +journal_balloc(void) +{ + ufs2_daddr_t blk; + struct cg *cgp; + int valid; + static int contig = 1; + + cgp = &disk.d_cg; + for (;;) { + blk = cgballoc(&disk); + if (blk > 0) + break; + /* + * If we failed to allocate a block from this cg, move to + * the next. + */ + if (cgwrite(&disk) < 0) { + warn("Failed to write updated cg"); + return (-1); + } + while ((valid = cgread(&disk)) == 1) { + /* + * Try to minimize fragmentation by requiring a minimum + * number of blocks present. + */ + if (cgp->cg_cs.cs_nbfree > blocks / 8) + break; + if (contig == 0 && cgp->cg_cs.cs_nbfree) + break; + } + if (valid) + continue; + /* + * Try once through looking only for large contiguous regions + * and again taking any space we can find. + */ + if (contig) { + contig = 0; + disk.d_ccg = 0; + warnx("Journal file fragmented."); + continue; + } + warnx("Failed to find sufficient free blocks for the journal"); + return -1; + } + if (bwrite(&disk, fsbtodb(&sblock, blk), clrbuf, + sblock.fs_bsize) <= 0) { + warn("Failed to initialize new block"); + return -1; + } + return (blk); +} + +/* + * Search a directory block for the SUJ_FILE. + */ +static ino_t +dir_search(ufs2_daddr_t blk, int bytes) +{ + char block[MAXBSIZE]; + struct direct *dp; + int off; + + if (bread(&disk, fsbtodb(&sblock, blk), block, bytes) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + for (off = 0; off < bytes; off += dp->d_reclen) { + dp = (struct direct *)&block[off]; + if (dp->d_reclen == 0) + break; + if (dp->d_ino == 0) + continue; + if (dp->d_namlen != strlen(SUJ_FILE)) + continue; + if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) + continue; + return (dp->d_ino); + } + + return (0); +} + +/* + * Search in the ROOTINO for the SUJ_FILE. If it exists we can not enable + * journaling. + */ +static ino_t +journal_findfile(void) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + ino_t ino; + int mode; + void *ip; + int i; + + if (getino(&disk, &ip, ROOTINO, &mode) != 0) { + warn("Failed to get root inode"); + return (-1); + } + dp2 = ip; + dp1 = ip; + if (sblock.fs_magic == FS_UFS1_MAGIC) { + if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { + warnx("ROOTINO extends beyond direct blocks."); + return (-1); + } + for (i = 0; i < NDADDR; i++) { + if (dp1->di_db[i] == 0) + break; + if ((ino = dir_search(dp1->di_db[i], + sblksize(&sblock, (off_t)dp1->di_size, i))) != 0) + return (ino); + } + } else { + if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { + warnx("ROOTINO extends beyond direct blocks."); + return (-1); + } + for (i = 0; i < NDADDR; i++) { + if (dp2->di_db[i] == 0) + break; + if ((ino = dir_search(dp2->di_db[i], + sblksize(&sblock, (off_t)dp2->di_size, i))) != 0) + return (ino); + } + } + + return (0); +} + +/* + * Insert the journal at inode 'ino' into directory blk 'blk' at the first + * free offset of 'off'. DIRBLKSIZ blocks after off are initialized as + * empty. + */ +static int +dir_insert(ufs2_daddr_t blk, off_t off, ino_t ino) +{ + struct direct *dp; + char block[MAXBSIZE]; + + if (bread(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + bzero(&block[off], sblock.fs_bsize - off); + dp = (struct direct *)&block[off]; + dp->d_ino = ino; + dp->d_reclen = DIRBLKSIZ; + dp->d_type = DT_REG; + dp->d_namlen = strlen(SUJ_FILE); + bcopy(SUJ_FILE, &dp->d_name, strlen(SUJ_FILE)); + off += DIRBLKSIZ; + for (; off < sblock.fs_bsize; off += DIRBLKSIZ) { + dp = (struct direct *)&block[off]; + dp->d_ino = 0; + dp->d_reclen = DIRBLKSIZ; + dp->d_type = DT_UNKNOWN; + } + if (bwrite(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) { + warn("Failed to write dir block"); + return (-1); + } + return (0); +} + +/* + * Extend a directory block in 'blk' by copying it to a full size block + * and inserting the new journal inode into .sujournal. + */ +static int +dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino) +{ + char block[MAXBSIZE]; + + if (bread(&disk, fsbtodb(&sblock, blk), block, size) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + if (bwrite(&disk, fsbtodb(&sblock, nblk), block, size) <= 0) { + warn("Failed to write dir block"); + return (-1); + } + + return dir_insert(nblk, size, ino); +} + +/* + * Insert the journal file into the ROOTINO directory. We always extend the + * last frag + */ +static int +journal_insertfile(ino_t ino) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + void *ip; + ufs2_daddr_t nblk; + ufs2_daddr_t blk; + ufs_lbn_t lbn; + int size; + int mode; + int off; + + if (getino(&disk, &ip, ROOTINO, &mode) != 0) { + warn("Failed to get root inode"); + sbdirty(); + return (-1); + } + dp2 = ip; + dp1 = ip; + blk = 0; + size = 0; + nblk = journal_balloc(); + if (nblk <= 0) + return (-1); + /* + * For simplicity sake we aways extend the ROOTINO into a new + * directory block rather than searching for space and inserting + * into an existing block. However, if the rootino has frags + * have to free them and extend the block. + */ + if (sblock.fs_magic == FS_UFS1_MAGIC) { + lbn = lblkno(&sblock, dp1->di_size); + off = blkoff(&sblock, dp1->di_size); + blk = dp1->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp1->di_size, lbn); + } else { + lbn = lblkno(&sblock, dp2->di_size); + off = blkoff(&sblock, dp2->di_size); + blk = dp2->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp2->di_size, lbn); + } + if (off != 0) { + if (dir_extend(blk, nblk, off, ino) == -1) + return (-1); + } else { + blk = 0; + if (dir_insert(nblk, 0, ino) == -1) + return (-1); + } + if (sblock.fs_magic == FS_UFS1_MAGIC) { + dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp1->di_db[lbn] = nblk; + dp1->di_size = lblktosize(&sblock, lbn+1); + } else { + dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp2->di_db[lbn] = nblk; + dp2->di_size = lblktosize(&sblock, lbn+1); + } + if (putino(&disk) < 0) { + warn("Failed to write root inode"); + return (-1); + } + if (cgwrite(&disk) < 0) { + warn("Failed to write updated cg"); + sbdirty(); + return (-1); + } + if (blk) { + if (cgbfree(&disk, blk, size) < 0) { + warn("Failed to write cg"); + return (-1); + } + } + + return (0); +} + +static int +indir_fill(ufs2_daddr_t blk, int level, int *resid) +{ + char indirbuf[MAXBSIZE]; + ufs1_daddr_t *bap1; + ufs2_daddr_t *bap2; + ufs2_daddr_t nblk; + int ncnt; + int cnt; + int i; + + bzero(indirbuf, sizeof(indirbuf)); + bap1 = (ufs1_daddr_t *)indirbuf; + bap2 = (void *)bap1; + cnt = 0; + for (i = 0; i < NINDIR(&sblock) && *resid != 0; i++) { + nblk = journal_balloc(); + if (nblk <= 0) + return (-1); + cnt++; + if (sblock.fs_magic == FS_UFS1_MAGIC) + *bap1++ = nblk; + else + *bap2++ = nblk; + if (level != 0) { + ncnt = indir_fill(nblk, level - 1, resid); + if (ncnt <= 0) + return (-1); + cnt += ncnt; + } else + (*resid)--; + } + if (bwrite(&disk, fsbtodb(&sblock, blk), indirbuf, + sblock.fs_bsize) <= 0) { + warn("Failed to write indirect"); + return (-1); + } + return (cnt); +} + +/* + * Clear the flag bits so the journal can be removed. + */ +void +journal_clear(void) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + ino_t ino; + int mode; + void *ip; + + ino = journal_findfile(); + if (ino == (ino_t)-1 || ino == 0) { + warnx("Journal file does not exist"); + return; + } + printf("Clearing journal flags from inode %d\n", ino); + if (getino(&disk, &ip, ino, &mode) != 0) { + warn("Failed to get journal inode"); + return; + } + dp2 = ip; + dp1 = ip; + if (sblock.fs_magic == FS_UFS1_MAGIC) + dp1->di_flags = 0; + else + dp2->di_flags = 0; + if (putino(&disk) < 0) { + warn("Failed to write journal inode"); + return; + } +} + +int +journal_alloc(int64_t size) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + ufs2_daddr_t blk; + void *ip; + struct cg *cgp; + int resid; + ino_t ino; + int blks; + int mode; + int i; + + cgp = &disk.d_cg; + ino = 0; + + /* + * If the journal file exists we can't allocate it. + */ + ino = journal_findfile(); + if (ino == (ino_t)-1) + return (-1); + if (ino > 0) { + warnx("Journal file %s already exists, please remove.", + SUJ_FILE); + return (-1); + } + /* + * If the user didn't supply a size pick one based on the filesystem + * size constrained with hardcoded MIN and MAX values. We opt for + * 1/1024th of the filesystem up to MAX but not exceeding one CG and + * not less than the MIN. + */ + if (size == 0) { + size = (sblock.fs_size * sblock.fs_bsize) / 1024; + size = MIN(SUJ_MAX, size); + if (size / sblock.fs_fsize > sblock.fs_fpg) + size = sblock.fs_fpg * sblock.fs_fsize; + size = MAX(SUJ_MIN, size); + } + resid = blocks = size / sblock.fs_bsize; + if (sblock.fs_cstotal.cs_nbfree < blocks) { + warn("Insufficient free space for %jd byte journal", size); + return (-1); + } + /* + * Find a cg with enough blocks to satisfy the journal + * size. Presently the journal does not span cgs. + */ + while (cgread(&disk) == 1) { + if (cgp->cg_cs.cs_nifree == 0) + continue; + ino = cgialloc(&disk); + if (ino <= 0) + break; + printf("Using inode %d in cg %d for %jd byte journal\n", + ino, cgp->cg_cgx, size); + if (getino(&disk, &ip, ino, &mode) != 0) { + warn("Failed to get allocated inode"); + sbdirty(); + goto out; + } + /* + * We leave fields unrelated to the number of allocated + * blocks and size uninitialized. This causes legacy + * fsck implementations to clear the inode. + */ + dp2 = ip; + dp1 = ip; + if (sblock.fs_magic == FS_UFS1_MAGIC) { + bzero(dp1, sizeof(*dp1)); + dp1->di_size = size; + dp1->di_mode = IFREG | IREAD; + dp1->di_nlink = 1; + dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; + } else { + bzero(dp2, sizeof(*dp2)); + dp2->di_size = size; + dp2->di_mode = IFREG | IREAD; + dp2->di_nlink = 1; + dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; + } + for (i = 0; i < NDADDR && resid; i++, resid--) { + blk = journal_balloc(); + if (blk <= 0) + goto out; + if (sblock.fs_magic == FS_UFS1_MAGIC) { + dp1->di_db[i] = blk; + dp1->di_blocks++; + } else { + dp2->di_db[i] = blk; + dp2->di_blocks++; + } + } + for (i = 0; i < NIADDR && resid; i++) { + blk = journal_balloc(); + if (blk <= 0) + goto out; + blks = indir_fill(blk, i, &resid) + 1; + if (blks <= 0) { + sbdirty(); + goto out; + } + if (sblock.fs_magic == FS_UFS1_MAGIC) { + dp1->di_ib[i] = blk; + dp1->di_blocks += blks; + } else { + dp2->di_ib[i] = blk; + dp2->di_blocks += blks; + } + } + if (sblock.fs_magic == FS_UFS1_MAGIC) + dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize; + else + dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize; + if (putino(&disk) < 0) { + warn("Failed to write inode"); + sbdirty(); + return (-1); + } + if (cgwrite(&disk) < 0) { + warn("Failed to write updated cg"); + sbdirty(); + return (-1); + } + if (journal_insertfile(ino) < 0) { + sbdirty(); + return (-1); + } + sblock.fs_sujfree = 0; + return (0); + } + warnx("Insufficient free space for the journal."); +out: + return (-1); +} + +void usage(void) { fprintf(stderr, "%s\n%s\n%s\n%s\n", @@ -477,6 +1026,8 @@ printfs(void) (sblock.fs_flags & FS_MULTILABEL)? "enabled" : "disabled"); warnx("soft updates: (-n) %s", (sblock.fs_flags & FS_DOSOFTDEP)? "enabled" : "disabled"); + warnx("soft update journaling: (-j) %s", + (sblock.fs_flags & FS_SUJ)? "enabled" : "disabled"); warnx("gjournal: (-J) %s", (sblock.fs_flags & FS_GJOURNAL)? "enabled" : "disabled"); warnx("maximum blocks per file in a cylinder group: (-e) %d", |