diff options
-rw-r--r-- | sbin/dumpfs/dumpfs.c | 4 | ||||
-rw-r--r-- | sbin/newfs/mkfs.c | 6 | ||||
-rw-r--r-- | sbin/newfs/newfs.8 | 12 | ||||
-rw-r--r-- | sbin/newfs/newfs.c | 11 | ||||
-rw-r--r-- | sbin/newfs/newfs.h | 1 | ||||
-rw-r--r-- | sbin/tunefs/tunefs.8 | 14 | ||||
-rw-r--r-- | sbin/tunefs/tunefs.c | 41 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 228 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_balloc.c | 12 | ||||
-rw-r--r-- | sys/ufs/ffs/fs.h | 5 |
10 files changed, 253 insertions, 81 deletions
diff --git a/sbin/dumpfs/dumpfs.c b/sbin/dumpfs/dumpfs.c index a41ca4d..6669d53 100644 --- a/sbin/dumpfs/dumpfs.c +++ b/sbin/dumpfs/dumpfs.c @@ -241,8 +241,8 @@ dumpfs(const char *name) afs.fs_sblkno, afs.fs_cblkno, afs.fs_iblkno, afs.fs_dblkno); printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t%d\n", afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly, afs.fs_clean); - printf("avgfpdir %d\tavgfilesize %d\n", - afs.fs_avgfpdir, afs.fs_avgfilesize); + printf("metaspace %jd\tavgfpdir %d\tavgfilesize %d\n", + afs.fs_metaspace, afs.fs_avgfpdir, afs.fs_avgfilesize); printf("flags\t"); if (afs.fs_old_flags & FS_FLAGS_UPDATED) fsflags = afs.fs_flags; diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c index 0a547f9..a4cfa8d 100644 --- a/sbin/newfs/mkfs.c +++ b/sbin/newfs/mkfs.c @@ -444,6 +444,12 @@ restart: if (sblock.fs_sbsize > SBLOCKSIZE) sblock.fs_sbsize = SBLOCKSIZE; sblock.fs_minfree = minfree; + if (metaspace > 0 && metaspace < sblock.fs_fpg / 2) + sblock.fs_metaspace = blknum(&sblock, metaspace); + else if (metaspace != -1) + /* reserve half of minfree for metadata blocks */ + sblock.fs_metaspace = blknum(&sblock, + (sblock.fs_fpg * minfree) / 200); if (maxbpg == 0) sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize); else diff --git a/sbin/newfs/newfs.8 b/sbin/newfs/newfs.8 index 7cfdfd1..f40b607 100644 --- a/sbin/newfs/newfs.8 +++ b/sbin/newfs/newfs.8 @@ -50,6 +50,7 @@ .Op Fl g Ar avgfilesize .Op Fl h Ar avgfpdir .Op Fl i Ar bytes +.Op Fl k Ar held-for-metadata-blocks .Op Fl m Ar free-space .Op Fl o Ar optimization .Op Fl p Ar partition @@ -163,6 +164,17 @@ This flag is implemented by running the .Xr tunefs 8 utility found in the user's .Dv $PATH . +.It Fl k Ar held-for-metadata-blocks +Set the amount of space to be held for metadata blocks in each cylinder group. +When set, the file system preference routines will try to save +the specified amount of space immediately following the inode blocks +in each cylinder group for use by metadata blocks. +Clustering the metadata blocks speeds up random file access +and decreases the running time of +.Xr fsck 8 . +By default +.Xr newfs 8 +sets it to half of the space reserved to minfree. .It Fl l Enable multilabel MAC on the new file system. .It Fl m Ar free-space diff --git a/sbin/newfs/newfs.c b/sbin/newfs/newfs.c index 3ce0be7..59b7e61 100644 --- a/sbin/newfs/newfs.c +++ b/sbin/newfs/newfs.c @@ -102,6 +102,7 @@ int bsize = 0; /* block size */ int maxbsize = 0; /* maximum clustering */ int maxblkspercg = MAXBLKSPERCG; /* maximum blocks per cylinder group */ int minfree = MINFREE; /* free space threshold */ +int metaspace; /* space held for metadata blocks */ int opt = DEFAULTOPT; /* optimization preference (space or time) */ int density; /* number of bytes per inode */ int maxcontig = 0; /* max contiguous blocks to allocate */ @@ -141,7 +142,7 @@ main(int argc, char *argv[]) part_name = 'c'; reserved = 0; while ((ch = getopt(argc, argv, - "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jlm:no:p:r:s:t")) != -1) + "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jk:lm:no:p:r:s:t")) != -1) switch (ch) { case 'E': Eflag = 1; @@ -248,6 +249,13 @@ main(int argc, char *argv[]) case 'l': lflag = 1; break; + case 'k': + if ((metaspace = atoi(optarg)) < 0) + errx(1, "%s: bad metadata space %%", optarg); + if (metaspace == 0) + /* force to stay zero in mkfs */ + metaspace = -1; + break; case 'm': if ((minfree = atoi(optarg)) < 0 || minfree > 99) errx(1, "%s: bad free space %%", optarg); @@ -501,6 +509,7 @@ usage() fprintf(stderr, "\t-h average files per directory\n"); fprintf(stderr, "\t-i number of bytes per inode\n"); fprintf(stderr, "\t-j enable soft updates journaling\n"); + fprintf(stderr, "\t-k space to hold for metadata blocks\n"); fprintf(stderr, "\t-l enable multilabel MAC\n"); fprintf(stderr, "\t-n do not create .snap directory\n"); fprintf(stderr, "\t-m minimum free space %%\n"); diff --git a/sbin/newfs/newfs.h b/sbin/newfs/newfs.h index 9e1da0d..72f4314 100644 --- a/sbin/newfs/newfs.h +++ b/sbin/newfs/newfs.h @@ -96,6 +96,7 @@ extern int bsize; /* block size */ extern int maxbsize; /* maximum clustering */ extern int maxblkspercg; /* maximum blocks per cylinder group */ extern int minfree; /* free space threshold */ +extern int metaspace; /* space held for metadata blocks */ extern int opt; /* optimization preference (space or time) */ extern int density; /* number of bytes per inode */ extern int maxcontig; /* max contiguous blocks to allocate */ diff --git a/sbin/tunefs/tunefs.8 b/sbin/tunefs/tunefs.8 index 5b522e5..a58c174 100644 --- a/sbin/tunefs/tunefs.8 +++ b/sbin/tunefs/tunefs.8 @@ -42,6 +42,7 @@ .Op Fl f Ar avgfilesize .Op Fl j Cm enable | disable .Op Fl J Cm enable | disable +.Op Fl k Ar held-for-metadata-blocks .Op Fl L Ar volname .Op Fl l Cm enable | disable .Op Fl m Ar minfree @@ -96,6 +97,19 @@ Specify the expected average file size. Turn on/off soft updates journaling. .It Fl J Cm enable | disable Turn on/off gjournal flag. +.It Fl k Ar held-for-metadata-blocks +Set the amount of space to be held for metadata blocks. +When set, the file system preference routines will try to save +the specified amount of space immediately following the inode blocks +in each cylinder group for use by metadata blocks. +Clustering the metadata blocks speeds up random file access +and decreases the running time of +.Xr fsck 8 . +While this option can be set at any time, +it is most effective if set before any data is loaded into the file system. +By default +.Xr newfs 8 +sets it to half of the space reserved to minfree. .It Fl L Ar volname Add/modify an optional file system volume label. .It Fl l Cm enable | disable diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c index 39e08f7..0671d1d 100644 --- a/sbin/tunefs/tunefs.c +++ b/sbin/tunefs/tunefs.c @@ -89,10 +89,9 @@ main(int argc, char *argv[]) const char *special, *on; const char *name; int active; - int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, Lflag; - int lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag; - int tflag; - int svalue, Svalue; + int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, kflag; + int kvalue, Lflag, lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue; + int pflag, sflag, svalue, Svalue, tflag; int ch, found_arg, i; const char *chg[2]; struct ufs_args args; @@ -100,13 +99,13 @@ main(int argc, char *argv[]) if (argc < 3) usage(); - Aflag = aflag = eflag = fflag = jflag = Jflag = Lflag = lflag = 0; - mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0; + Aflag = aflag = eflag = fflag = jflag = Jflag = kflag = Lflag = 0; + lflag = mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0; avalue = jvalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL; evalue = fvalue = mvalue = ovalue = svalue = Svalue = 0; active = 0; found_arg = 0; /* At least one arg is required. */ - while ((ch = getopt(argc, argv, "Aa:e:f:j:J:L:l:m:N:n:o:ps:S:t:")) + while ((ch = getopt(argc, argv, "Aa:e:f:j:J:k:L:l:m:N:n:o:ps:S:t:")) != -1) switch (ch) { @@ -171,6 +170,14 @@ main(int argc, char *argv[]) Jflag = 1; break; + case 'k': + found_arg = 1; + name = "space to hold for metadata blocks"; + kvalue = atoi(optarg); + if (mvalue < 0) + errx(10, "bad %s (%s)", name, optarg); + kflag = 1; + break; case 'L': found_arg = 1; @@ -404,6 +411,22 @@ main(int argc, char *argv[]) } } } + if (kflag) { + name = "space to hold for metadata blocks"; + if (sblock.fs_metaspace == kvalue) + warnx("%s remains unchanged as %d", name, kvalue); + else { + kvalue = blknum(&sblock, kvalue); + if (kvalue > sblock.fs_fpg / 2) { + kvalue = blknum(&sblock, sblock.fs_fpg / 2); + warnx("%s cannot exceed half the file system " + "space", name); + } + warnx("%s changes from %jd to %d", + name, sblock.fs_metaspace, kvalue); + sblock.fs_metaspace = kvalue; + } + } if (lflag) { name = "multilabel"; if (strcmp(lvalue, "enable") == 0) { @@ -1064,7 +1087,7 @@ usage(void) { fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n", "usage: tunefs [-A] [-a enable | disable] [-e maxbpg] [-f avgfilesize]", -" [-J enable | disable] [-j enable | disable]", +" [-J enable | disable] [-j enable | disable] [-k metaspace]", " [-L volname] [-l enable | disable] [-m minfree]", " [-N enable | disable] [-n enable | disable]", " [-o space | time] [-p] [-s avgfpdir] [-t enable | disable]", @@ -1097,6 +1120,8 @@ printfs(void) sblock.fs_avgfpdir); warnx("minimum percentage of free space: (-m) %d%%", sblock.fs_minfree); + warnx("space to hold for metadata blocks: (-k) %jd", + sblock.fs_metaspace); warnx("optimization preference: (-o) %s", sblock.fs_optim == FS_OPTSPACE ? "space" : "time"); if (sblock.fs_minfree >= MINFREE && diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index ab21b89..d7db636 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -817,15 +817,6 @@ ffs_reallocblks_ufs2(ap) UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); /* - * Skip a block for the first indirect block. Indirect blocks are - * usually initially laid out in a good position between the data - * blocks, but block reallocation would usually destroy locality by - * moving them out of the way to make room for data blocks if we - * didn't compensate here. - */ - if (start_lbn == NDADDR) - pref += fs->fs_frag; - /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, @@ -1090,7 +1081,7 @@ ffs_dirpref(pip) struct inode *pip; { struct fs *fs; - u_int cg, prefcg, dirsize, cgsize; + int cg, prefcg, dirsize, cgsize; u_int avgifree, avgbfree, avgndir, curdirsize; u_int minifree, minbfree, maxndir; u_int mincg, minndir; @@ -1158,6 +1149,22 @@ ffs_dirpref(pip) * Limit number of dirs in one cg and reserve space for * regular files, but only if we have no deficit in * inodes or space. + * + * We are trying to find a suitable cylinder group nearby + * our preferred cylinder group to place a new directory. + * We scan from our preferred cylinder group forward looking + * for a cylinder group that meets our criterion. If we get + * to the final cylinder group and do not find anything, + * we start scanning backwards from our preferred cylinder + * group. The ideal would be to alternate looking forward + * and backward, but that is just too complex to code for + * the gain it would get. The most likely place where the + * backward scan would take effect is when we start near + * the end of the filesystem and do not find anything from + * where we are to the end. In that case, scanning backward + * will likely find us a suitable cylinder group much closer + * to our desired location than if we were to start scanning + * forward from the beginning of the filesystem. */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->fs_ncg; cg++) @@ -1167,7 +1174,7 @@ ffs_dirpref(pip) if (fs->fs_contigdirs[cg] < maxcontigdirs) return ((ino_t)(fs->fs_ipg * cg)); } - for (cg = 0; cg < prefcg; cg++) + for (cg = prefcg - 1; cg >= 0; cg--) if (fs->fs_cs(fs, cg).cs_ndir < maxndir && fs->fs_cs(fs, cg).cs_nifree >= minifree && fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { @@ -1180,7 +1187,7 @@ ffs_dirpref(pip) for (cg = prefcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) return ((ino_t)(fs->fs_ipg * cg)); - for (cg = 0; cg < prefcg; cg++) + for (cg = prefcg - 1; cg >= 0; cg--) if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) break; return ((ino_t)(fs->fs_ipg * cg)); @@ -1193,9 +1200,15 @@ ffs_dirpref(pip) * * If no blocks have been allocated in the first section, the policy is to * request a block in the same cylinder group as the inode that describes - * the file. If no blocks have been allocated in any other section, the - * policy is to place the section in a cylinder group with a greater than - * average number of free blocks. An appropriate cylinder group is found + * the file. The first indirect is allocated immediately following the last + * direct block and the data blocks for the first indirect immediately + * follow it. + * + * If no blocks have been allocated in any other section, the indirect + * block(s) are allocated in the same cylinder group as its inode in an + * area reserved immediately following the inode blocks. The policy for + * the data blocks is to place them in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found * by using a rotor that sweeps the cylinder groups. When a new group of * blocks is needed, the sweep begins in the cylinder group following the * cylinder group from which the previous allocation was made. The sweep @@ -1218,39 +1231,78 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap) ufs1_daddr_t *bap; { struct fs *fs; - u_int cg; + u_int cg, inocg; u_int avgbfree, startcg; ufs2_daddr_t pref; + KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap")); mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); fs = ip->i_fs; /* - * If we are allocating the first indirect block, try to place it - * immediately following the last direct block. - * + * Allocation of indirect blocks is indicated by passing negative + * values in indx: -1 for single indirect, -2 for double indirect, + * -3 for triple indirect. As noted below, we attempt to allocate + * the first indirect inline with the file data. For all later + * indirect blocks, the data is often allocated in other cylinder + * groups. However to speed random file access and to speed up + * fsck, the filesystem reserves the first fs_metaspace blocks + * (typically half of fs_minfree) of the data area of each cylinder + * group to hold these later indirect blocks. + */ + inocg = ino_to_cg(fs, ip->i_number); + if (indx < 0) { + /* + * Our preference for indirect blocks is the zone at the + * beginning of the inode's cylinder group data area that + * we try to reserve for indirect blocks. + */ + pref = cgmeta(fs, inocg); + /* + * If we are allocating the first indirect block, try to + * place it immediately following the last direct block. + */ + if (indx == -1 && lbn < NDADDR + NINDIR(fs) && + ip->i_din1->di_db[NDADDR - 1] != 0) + pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag; + return (pref); + } + /* * If we are allocating the first data block in the first indirect - * block, try to place it immediately following the indirect block. + * block and the indirect has been allocated in the data block area, + * try to place it immediately following the indirect block. */ if (lbn == NDADDR) { - pref = ip->i_din1->di_db[NDADDR - 1]; - if (bap == NULL && pref != 0) - return (pref + fs->fs_frag); pref = ip->i_din1->di_ib[0]; - if (pref != 0) + if (pref != 0 && pref >= cgdata(fs, inocg) && + pref < cgbase(fs, inocg + 1)) return (pref + fs->fs_frag); } + /* + * If we are at the beginning of a file, or we have already allocated + * the maximum number of blocks per cylinder group, or we do not + * have a block allocated immediately preceeding us, then we need + * to decide where to start allocating new blocks. + */ if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (cgbase(fs, cg) + fs->fs_frag); - } + /* + * If we are allocating a directory data block, we want + * to place it in the metadata area. + */ + if ((ip->i_mode & IFMT) == IFDIR) + return (cgmeta(fs, inocg)); + /* + * Until we fill all the direct and all the first indirect's + * blocks, we try to allocate in the data area of the inode's + * cylinder group. + */ + if (lbn < NDADDR + NINDIR(fs)) + return (cgdata(fs, inocg)); /* * Find a cylinder with greater than average number of * unused data blocks. */ if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + startcg = inocg + lbn / fs->fs_maxbpg; else startcg = dtog(fs, bap[indx - 1]) + 1; startcg %= fs->fs_ncg; @@ -1258,17 +1310,17 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap) for (cg = startcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } for (cg = 0; cg <= startcg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } return (0); } /* - * We just always try to lay things out contiguously. + * Otherwise, we just always try to lay things out contiguously. */ return (bap[indx - 1] + fs->fs_frag); } @@ -1284,39 +1336,78 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) ufs2_daddr_t *bap; { struct fs *fs; - u_int cg; + u_int cg, inocg; u_int avgbfree, startcg; ufs2_daddr_t pref; + KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap")); mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); fs = ip->i_fs; /* - * If we are allocating the first indirect block, try to place it - * immediately following the last direct block. - * + * Allocation of indirect blocks is indicated by passing negative + * values in indx: -1 for single indirect, -2 for double indirect, + * -3 for triple indirect. As noted below, we attempt to allocate + * the first indirect inline with the file data. For all later + * indirect blocks, the data is often allocated in other cylinder + * groups. However to speed random file access and to speed up + * fsck, the filesystem reserves the first fs_metaspace blocks + * (typically half of fs_minfree) of the data area of each cylinder + * group to hold these later indirect blocks. + */ + inocg = ino_to_cg(fs, ip->i_number); + if (indx < 0) { + /* + * Our preference for indirect blocks is the zone at the + * beginning of the inode's cylinder group data area that + * we try to reserve for indirect blocks. + */ + pref = cgmeta(fs, inocg); + /* + * If we are allocating the first indirect block, try to + * place it immediately following the last direct block. + */ + if (indx == -1 && lbn < NDADDR + NINDIR(fs) && + ip->i_din2->di_db[NDADDR - 1] != 0) + pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag; + return (pref); + } + /* * If we are allocating the first data block in the first indirect - * block, try to place it immediately following the indirect block. + * block and the indirect has been allocated in the data block area, + * try to place it immediately following the indirect block. */ if (lbn == NDADDR) { - pref = ip->i_din1->di_db[NDADDR - 1]; - if (bap == NULL && pref != 0) - return (pref + fs->fs_frag); - pref = ip->i_din1->di_ib[0]; - if (pref != 0) + pref = ip->i_din2->di_ib[0]; + if (pref != 0 && pref >= cgdata(fs, inocg) && + pref < cgbase(fs, inocg + 1)) return (pref + fs->fs_frag); } + /* + * If we are at the beginning of a file, or we have already allocated + * the maximum number of blocks per cylinder group, or we do not + * have a block allocated immediately preceeding us, then we need + * to decide where to start allocating new blocks. + */ if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (cgbase(fs, cg) + fs->fs_frag); - } + /* + * If we are allocating a directory data block, we want + * to place it in the metadata area. + */ + if ((ip->i_mode & IFMT) == IFDIR) + return (cgmeta(fs, inocg)); + /* + * Until we fill all the direct and all the first indirect's + * blocks, we try to allocate in the data area of the inode's + * cylinder group. + */ + if (lbn < NDADDR + NINDIR(fs)) + return (cgdata(fs, inocg)); /* * Find a cylinder with greater than average number of * unused data blocks. */ if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + startcg = inocg + lbn / fs->fs_maxbpg; else startcg = dtog(fs, bap[indx - 1]) + 1; startcg %= fs->fs_ncg; @@ -1324,17 +1415,17 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) for (cg = startcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } for (cg = 0; cg <= startcg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } return (0); } /* - * We just always try to lay things out contiguously. + * Otherwise, we just always try to lay things out contiguously. */ return (bap[indx - 1] + fs->fs_frag); } @@ -1611,31 +1702,37 @@ ffs_alloccgblk(ip, bp, bpref, size) ufs1_daddr_t bno; ufs2_daddr_t blkno; u_int8_t *blksfree; - int i; + int i, cgbpref; fs = ip->i_fs; ump = ip->i_ump; mtx_assert(UFS_MTX(ump), MA_OWNED); cgp = (struct cg *)bp->b_data; blksfree = cg_blksfree(cgp); - if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { + if (bpref == 0) { bpref = cgp->cg_rotor; - } else { - bpref = blknum(fs, bpref); - bno = dtogd(fs, bpref); - /* - * if the requested block is available, use it - */ - if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) - goto gotit; + } else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) { + /* map bpref to correct zone in this cg */ + if (bpref < cgdata(fs, cgbpref)) + bpref = cgmeta(fs, cgp->cg_cgx); + else + bpref = cgdata(fs, cgp->cg_cgx); } /* + * if the requested block is available, use it + */ + bno = dtogd(fs, blknum(fs, bpref)); + if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) + goto gotit; + /* * Take the next available block in this cylinder group. */ bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); if (bno < 0) return (0); - cgp->cg_rotor = bno; + /* Update cg_rotor only if allocated from the data zone */ + if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx))) + cgp->cg_rotor = bno; gotit: blkno = fragstoblks(fs, bno); ffs_clrblock(fs, blksfree, (long)blkno); @@ -1742,9 +1839,10 @@ ffs_clusteralloc(ip, cg, bpref, len, unused) * be recalled to try an allocation in the next cylinder group. */ if (dtog(fs, bpref) != cg) - bpref = 0; + bpref = cgdata(fs, cg); else - bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); + bpref = blknum(fs, bpref); + bpref = fragstoblks(fs, dtogd(fs, bpref)); mapp = &cg_clustersfree(cgp)[bpref / NBBY]; map = *mapp++; bit = 1 << (bpref % NBBY); diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index d20df77..771f8eb 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -246,7 +246,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); - pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); + pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1, + (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); @@ -299,7 +300,8 @@ retry: } UFS_LOCK(ump); if (pref == 0) - pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); + pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1, + (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); @@ -794,7 +796,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); - pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); + pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, + (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); @@ -848,7 +851,8 @@ retry: } UFS_LOCK(ump); if (pref == 0) - pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); + pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, + (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index b62a808..b154a80 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -333,7 +333,8 @@ struct fs { int32_t fs_maxbsize; /* maximum blocking factor permitted */ int64_t fs_unrefs; /* number of unreferenced inodes */ int64_t fs_providersize; /* size of underlying GEOM provider */ - int64_t fs_sparecon64[15]; /* old rotation block list head */ + int64_t fs_metaspace; /* size of area reserved for metadata */ + int64_t fs_sparecon64[14]; /* old rotation block list head */ int64_t fs_sblockloc; /* byte offset of standard superblock */ struct csum_total fs_cstotal; /* (u) cylinder summary information */ ufs_time_t fs_time; /* last time written */ @@ -525,6 +526,8 @@ struct cg { * They calc filesystem addresses of cylinder group data structures. */ #define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c)) +#define cgdata(fs, c) (cgdmin(fs, c) + (fs)->fs_metaspace) /* data zone */ +#define cgmeta(fs, c) (cgdmin(fs, c)) /* meta data */ #define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ #define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ #define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ |