summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sbin/dumpfs/dumpfs.c4
-rw-r--r--sbin/newfs/mkfs.c6
-rw-r--r--sbin/newfs/newfs.812
-rw-r--r--sbin/newfs/newfs.c11
-rw-r--r--sbin/newfs/newfs.h1
-rw-r--r--sbin/tunefs/tunefs.814
-rw-r--r--sbin/tunefs/tunefs.c41
-rw-r--r--sys/ufs/ffs/ffs_alloc.c228
-rw-r--r--sys/ufs/ffs/ffs_balloc.c12
-rw-r--r--sys/ufs/ffs/fs.h5
10 files changed, 253 insertions, 81 deletions
diff --git a/sbin/dumpfs/dumpfs.c b/sbin/dumpfs/dumpfs.c
index a41ca4d..6669d53 100644
--- a/sbin/dumpfs/dumpfs.c
+++ b/sbin/dumpfs/dumpfs.c
@@ -241,8 +241,8 @@ dumpfs(const char *name)
afs.fs_sblkno, afs.fs_cblkno, afs.fs_iblkno, afs.fs_dblkno);
printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t%d\n",
afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly, afs.fs_clean);
- printf("avgfpdir %d\tavgfilesize %d\n",
- afs.fs_avgfpdir, afs.fs_avgfilesize);
+ printf("metaspace %jd\tavgfpdir %d\tavgfilesize %d\n",
+ afs.fs_metaspace, afs.fs_avgfpdir, afs.fs_avgfilesize);
printf("flags\t");
if (afs.fs_old_flags & FS_FLAGS_UPDATED)
fsflags = afs.fs_flags;
diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c
index 0a547f9..a4cfa8d 100644
--- a/sbin/newfs/mkfs.c
+++ b/sbin/newfs/mkfs.c
@@ -444,6 +444,12 @@ restart:
if (sblock.fs_sbsize > SBLOCKSIZE)
sblock.fs_sbsize = SBLOCKSIZE;
sblock.fs_minfree = minfree;
+ if (metaspace > 0 && metaspace < sblock.fs_fpg / 2)
+ sblock.fs_metaspace = blknum(&sblock, metaspace);
+ else if (metaspace != -1)
+ /* reserve half of minfree for metadata blocks */
+ sblock.fs_metaspace = blknum(&sblock,
+ (sblock.fs_fpg * minfree) / 200);
if (maxbpg == 0)
sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize);
else
diff --git a/sbin/newfs/newfs.8 b/sbin/newfs/newfs.8
index 7cfdfd1..f40b607 100644
--- a/sbin/newfs/newfs.8
+++ b/sbin/newfs/newfs.8
@@ -50,6 +50,7 @@
.Op Fl g Ar avgfilesize
.Op Fl h Ar avgfpdir
.Op Fl i Ar bytes
+.Op Fl k Ar held-for-metadata-blocks
.Op Fl m Ar free-space
.Op Fl o Ar optimization
.Op Fl p Ar partition
@@ -163,6 +164,17 @@ This flag is implemented by running the
.Xr tunefs 8
utility found in the user's
.Dv $PATH .
+.It Fl k Ar held-for-metadata-blocks
+Set the amount of space to be held for metadata blocks in each cylinder group.
+When set, the file system preference routines will try to save
+the specified amount of space immediately following the inode blocks
+in each cylinder group for use by metadata blocks.
+Clustering the metadata blocks speeds up random file access
+and decreases the running time of
+.Xr fsck 8 .
+By default
+.Xr newfs 8
+sets it to half of the space reserved to minfree.
.It Fl l
Enable multilabel MAC on the new file system.
.It Fl m Ar free-space
diff --git a/sbin/newfs/newfs.c b/sbin/newfs/newfs.c
index 3ce0be7..59b7e61 100644
--- a/sbin/newfs/newfs.c
+++ b/sbin/newfs/newfs.c
@@ -102,6 +102,7 @@ int bsize = 0; /* block size */
int maxbsize = 0; /* maximum clustering */
int maxblkspercg = MAXBLKSPERCG; /* maximum blocks per cylinder group */
int minfree = MINFREE; /* free space threshold */
+int metaspace; /* space held for metadata blocks */
int opt = DEFAULTOPT; /* optimization preference (space or time) */
int density; /* number of bytes per inode */
int maxcontig = 0; /* max contiguous blocks to allocate */
@@ -141,7 +142,7 @@ main(int argc, char *argv[])
part_name = 'c';
reserved = 0;
while ((ch = getopt(argc, argv,
- "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jlm:no:p:r:s:t")) != -1)
+ "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jk:lm:no:p:r:s:t")) != -1)
switch (ch) {
case 'E':
Eflag = 1;
@@ -248,6 +249,13 @@ main(int argc, char *argv[])
case 'l':
lflag = 1;
break;
+ case 'k':
+ if ((metaspace = atoi(optarg)) < 0)
+ errx(1, "%s: bad metadata space %%", optarg);
+ if (metaspace == 0)
+ /* force to stay zero in mkfs */
+ metaspace = -1;
+ break;
case 'm':
if ((minfree = atoi(optarg)) < 0 || minfree > 99)
errx(1, "%s: bad free space %%", optarg);
@@ -501,6 +509,7 @@ usage()
fprintf(stderr, "\t-h average files per directory\n");
fprintf(stderr, "\t-i number of bytes per inode\n");
fprintf(stderr, "\t-j enable soft updates journaling\n");
+ fprintf(stderr, "\t-k space to hold for metadata blocks\n");
fprintf(stderr, "\t-l enable multilabel MAC\n");
fprintf(stderr, "\t-n do not create .snap directory\n");
fprintf(stderr, "\t-m minimum free space %%\n");
diff --git a/sbin/newfs/newfs.h b/sbin/newfs/newfs.h
index 9e1da0d..72f4314 100644
--- a/sbin/newfs/newfs.h
+++ b/sbin/newfs/newfs.h
@@ -96,6 +96,7 @@ extern int bsize; /* block size */
extern int maxbsize; /* maximum clustering */
extern int maxblkspercg; /* maximum blocks per cylinder group */
extern int minfree; /* free space threshold */
+extern int metaspace; /* space held for metadata blocks */
extern int opt; /* optimization preference (space or time) */
extern int density; /* number of bytes per inode */
extern int maxcontig; /* max contiguous blocks to allocate */
diff --git a/sbin/tunefs/tunefs.8 b/sbin/tunefs/tunefs.8
index 5b522e5..a58c174 100644
--- a/sbin/tunefs/tunefs.8
+++ b/sbin/tunefs/tunefs.8
@@ -42,6 +42,7 @@
.Op Fl f Ar avgfilesize
.Op Fl j Cm enable | disable
.Op Fl J Cm enable | disable
+.Op Fl k Ar held-for-metadata-blocks
.Op Fl L Ar volname
.Op Fl l Cm enable | disable
.Op Fl m Ar minfree
@@ -96,6 +97,19 @@ Specify the expected average file size.
Turn on/off soft updates journaling.
.It Fl J Cm enable | disable
Turn on/off gjournal flag.
+.It Fl k Ar held-for-metadata-blocks
+Set the amount of space to be held for metadata blocks.
+When set, the file system preference routines will try to save
+the specified amount of space immediately following the inode blocks
+in each cylinder group for use by metadata blocks.
+Clustering the metadata blocks speeds up random file access
+and decreases the running time of
+.Xr fsck 8 .
+While this option can be set at any time,
+it is most effective if set before any data is loaded into the file system.
+By default
+.Xr newfs 8
+sets it to half of the space reserved to minfree.
.It Fl L Ar volname
Add/modify an optional file system volume label.
.It Fl l Cm enable | disable
diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c
index 39e08f7..0671d1d 100644
--- a/sbin/tunefs/tunefs.c
+++ b/sbin/tunefs/tunefs.c
@@ -89,10 +89,9 @@ main(int argc, char *argv[])
const char *special, *on;
const char *name;
int active;
- int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, Lflag;
- int lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag;
- int tflag;
- int svalue, Svalue;
+ int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, kflag;
+ int kvalue, Lflag, lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue;
+ int pflag, sflag, svalue, Svalue, tflag;
int ch, found_arg, i;
const char *chg[2];
struct ufs_args args;
@@ -100,13 +99,13 @@ main(int argc, char *argv[])
if (argc < 3)
usage();
- Aflag = aflag = eflag = fflag = jflag = Jflag = Lflag = lflag = 0;
- mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0;
+ Aflag = aflag = eflag = fflag = jflag = Jflag = kflag = Lflag = 0;
+ lflag = mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0;
avalue = jvalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL;
evalue = fvalue = mvalue = ovalue = svalue = Svalue = 0;
active = 0;
found_arg = 0; /* At least one arg is required. */
- while ((ch = getopt(argc, argv, "Aa:e:f:j:J:L:l:m:N:n:o:ps:S:t:"))
+ while ((ch = getopt(argc, argv, "Aa:e:f:j:J:k:L:l:m:N:n:o:ps:S:t:"))
!= -1)
switch (ch) {
@@ -171,6 +170,14 @@ main(int argc, char *argv[])
Jflag = 1;
break;
+ case 'k':
+ found_arg = 1;
+ name = "space to hold for metadata blocks";
+ kvalue = atoi(optarg);
+ if (mvalue < 0)
+ errx(10, "bad %s (%s)", name, optarg);
+ kflag = 1;
+ break;
case 'L':
found_arg = 1;
@@ -404,6 +411,22 @@ main(int argc, char *argv[])
}
}
}
+ if (kflag) {
+ name = "space to hold for metadata blocks";
+ if (sblock.fs_metaspace == kvalue)
+ warnx("%s remains unchanged as %d", name, kvalue);
+ else {
+ kvalue = blknum(&sblock, kvalue);
+ if (kvalue > sblock.fs_fpg / 2) {
+ kvalue = blknum(&sblock, sblock.fs_fpg / 2);
+ warnx("%s cannot exceed half the file system "
+ "space", name);
+ }
+ warnx("%s changes from %jd to %d",
+ name, sblock.fs_metaspace, kvalue);
+ sblock.fs_metaspace = kvalue;
+ }
+ }
if (lflag) {
name = "multilabel";
if (strcmp(lvalue, "enable") == 0) {
@@ -1064,7 +1087,7 @@ usage(void)
{
fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n",
"usage: tunefs [-A] [-a enable | disable] [-e maxbpg] [-f avgfilesize]",
-" [-J enable | disable] [-j enable | disable]",
+" [-J enable | disable] [-j enable | disable] [-k metaspace]",
" [-L volname] [-l enable | disable] [-m minfree]",
" [-N enable | disable] [-n enable | disable]",
" [-o space | time] [-p] [-s avgfpdir] [-t enable | disable]",
@@ -1097,6 +1120,8 @@ printfs(void)
sblock.fs_avgfpdir);
warnx("minimum percentage of free space: (-m) %d%%",
sblock.fs_minfree);
+ warnx("space to hold for metadata blocks: (-k) %jd",
+ sblock.fs_metaspace);
warnx("optimization preference: (-o) %s",
sblock.fs_optim == FS_OPTSPACE ? "space" : "time");
if (sblock.fs_minfree >= MINFREE &&
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index ab21b89..d7db636 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -817,15 +817,6 @@ ffs_reallocblks_ufs2(ap)
UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
/*
- * Skip a block for the first indirect block. Indirect blocks are
- * usually initially laid out in a good position between the data
- * blocks, but block reallocation would usually destroy locality by
- * moving them out of the way to make room for data blocks if we
- * didn't compensate here.
- */
- if (start_lbn == NDADDR)
- pref += fs->fs_frag;
- /*
* Search the block map looking for an allocation of the desired size.
*/
if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
@@ -1090,7 +1081,7 @@ ffs_dirpref(pip)
struct inode *pip;
{
struct fs *fs;
- u_int cg, prefcg, dirsize, cgsize;
+ int cg, prefcg, dirsize, cgsize;
u_int avgifree, avgbfree, avgndir, curdirsize;
u_int minifree, minbfree, maxndir;
u_int mincg, minndir;
@@ -1158,6 +1149,22 @@ ffs_dirpref(pip)
* Limit number of dirs in one cg and reserve space for
* regular files, but only if we have no deficit in
* inodes or space.
+ *
+ * We are trying to find a suitable cylinder group nearby
+ * our preferred cylinder group to place a new directory.
+ * We scan from our preferred cylinder group forward looking
+ * for a cylinder group that meets our criterion. If we get
+ * to the final cylinder group and do not find anything,
+ * we start scanning backwards from our preferred cylinder
+ * group. The ideal would be to alternate looking forward
+ * and backward, but that is just too complex to code for
+ * the gain it would get. The most likely place where the
+ * backward scan would take effect is when we start near
+ * the end of the filesystem and do not find anything from
+ * where we are to the end. In that case, scanning backward
+ * will likely find us a suitable cylinder group much closer
+ * to our desired location than if we were to start scanning
+ * forward from the beginning of the filesystem.
*/
prefcg = ino_to_cg(fs, pip->i_number);
for (cg = prefcg; cg < fs->fs_ncg; cg++)
@@ -1167,7 +1174,7 @@ ffs_dirpref(pip)
if (fs->fs_contigdirs[cg] < maxcontigdirs)
return ((ino_t)(fs->fs_ipg * cg));
}
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
fs->fs_cs(fs, cg).cs_nifree >= minifree &&
fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
@@ -1180,7 +1187,7 @@ ffs_dirpref(pip)
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
return ((ino_t)(fs->fs_ipg * cg));
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
break;
return ((ino_t)(fs->fs_ipg * cg));
@@ -1193,9 +1200,15 @@ ffs_dirpref(pip)
*
* If no blocks have been allocated in the first section, the policy is to
* request a block in the same cylinder group as the inode that describes
- * the file. If no blocks have been allocated in any other section, the
- * policy is to place the section in a cylinder group with a greater than
- * average number of free blocks. An appropriate cylinder group is found
+ * the file. The first indirect is allocated immediately following the last
+ * direct block and the data blocks for the first indirect immediately
+ * follow it.
+ *
+ * If no blocks have been allocated in any other section, the indirect
+ * block(s) are allocated in the same cylinder group as its inode in an
+ * area reserved immediately following the inode blocks. The policy for
+ * the data blocks is to place them in a cylinder group with a greater than
+ * average number of free blocks. An appropriate cylinder group is found
* by using a rotor that sweeps the cylinder groups. When a new group of
* blocks is needed, the sweep begins in the cylinder group following the
* cylinder group from which the previous allocation was made. The sweep
@@ -1218,39 +1231,78 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap)
ufs1_daddr_t *bap;
{
struct fs *fs;
- u_int cg;
+ u_int cg, inocg;
u_int avgbfree, startcg;
ufs2_daddr_t pref;
+ KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
fs = ip->i_fs;
/*
- * If we are allocating the first indirect block, try to place it
- * immediately following the last direct block.
- *
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din1->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
* If we are allocating the first data block in the first indirect
- * block, try to place it immediately following the indirect block.
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
*/
if (lbn == NDADDR) {
- pref = ip->i_din1->di_db[NDADDR - 1];
- if (bap == NULL && pref != 0)
- return (pref + fs->fs_frag);
pref = ip->i_din1->di_ib[0];
- if (pref != 0)
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
return (pref + fs->fs_frag);
}
+ /*
+ * If we are at the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceeding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((ip->i_mode & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg =
- ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1]) + 1;
startcg %= fs->fs_ncg;
@@ -1258,17 +1310,17 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap)
for (cg = startcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
for (cg = 0; cg <= startcg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
return (0);
}
/*
- * We just always try to lay things out contiguously.
+ * Otherwise, we just always try to lay things out contiguously.
*/
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1284,39 +1336,78 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
ufs2_daddr_t *bap;
{
struct fs *fs;
- u_int cg;
+ u_int cg, inocg;
u_int avgbfree, startcg;
ufs2_daddr_t pref;
+ KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
fs = ip->i_fs;
/*
- * If we are allocating the first indirect block, try to place it
- * immediately following the last direct block.
- *
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din2->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
* If we are allocating the first data block in the first indirect
- * block, try to place it immediately following the indirect block.
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
*/
if (lbn == NDADDR) {
- pref = ip->i_din1->di_db[NDADDR - 1];
- if (bap == NULL && pref != 0)
- return (pref + fs->fs_frag);
- pref = ip->i_din1->di_ib[0];
- if (pref != 0)
+ pref = ip->i_din2->di_ib[0];
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
return (pref + fs->fs_frag);
}
+ /*
+ * If we are at the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceeding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((ip->i_mode & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg =
- ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1]) + 1;
startcg %= fs->fs_ncg;
@@ -1324,17 +1415,17 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
for (cg = startcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
for (cg = 0; cg <= startcg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
return (0);
}
/*
- * We just always try to lay things out contiguously.
+ * Otherwise, we just always try to lay things out contiguously.
*/
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1611,31 +1702,37 @@ ffs_alloccgblk(ip, bp, bpref, size)
ufs1_daddr_t bno;
ufs2_daddr_t blkno;
u_int8_t *blksfree;
- int i;
+ int i, cgbpref;
fs = ip->i_fs;
ump = ip->i_ump;
mtx_assert(UFS_MTX(ump), MA_OWNED);
cgp = (struct cg *)bp->b_data;
blksfree = cg_blksfree(cgp);
- if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
+ if (bpref == 0) {
bpref = cgp->cg_rotor;
- } else {
- bpref = blknum(fs, bpref);
- bno = dtogd(fs, bpref);
- /*
- * if the requested block is available, use it
- */
- if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
- goto gotit;
+ } else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
+ /* map bpref to correct zone in this cg */
+ if (bpref < cgdata(fs, cgbpref))
+ bpref = cgmeta(fs, cgp->cg_cgx);
+ else
+ bpref = cgdata(fs, cgp->cg_cgx);
}
/*
+ * if the requested block is available, use it
+ */
+ bno = dtogd(fs, blknum(fs, bpref));
+ if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
+ goto gotit;
+ /*
* Take the next available block in this cylinder group.
*/
bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
if (bno < 0)
return (0);
- cgp->cg_rotor = bno;
+ /* Update cg_rotor only if allocated from the data zone */
+ if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx)))
+ cgp->cg_rotor = bno;
gotit:
blkno = fragstoblks(fs, bno);
ffs_clrblock(fs, blksfree, (long)blkno);
@@ -1742,9 +1839,10 @@ ffs_clusteralloc(ip, cg, bpref, len, unused)
* be recalled to try an allocation in the next cylinder group.
*/
if (dtog(fs, bpref) != cg)
- bpref = 0;
+ bpref = cgdata(fs, cg);
else
- bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+ bpref = blknum(fs, bpref);
+ bpref = fragstoblks(fs, dtogd(fs, bpref));
mapp = &cg_clustersfree(cgp)[bpref / NBBY];
map = *mapp++;
bit = 1 << (bpref % NBBY);
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index d20df77..771f8eb 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -246,7 +246,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
lbns_remfree = lbns;
if (nb == 0) {
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
+ pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
+ (ufs1_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) {
curthread_pflags_restore(saved_inbdflush);
@@ -299,7 +300,8 @@ retry:
}
UFS_LOCK(ump);
if (pref == 0)
- pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
+ pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
+ (ufs1_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
@@ -794,7 +796,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
lbns_remfree = lbns;
if (nb == 0) {
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
+ pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
+ (ufs2_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) {
curthread_pflags_restore(saved_inbdflush);
@@ -848,7 +851,8 @@ retry:
}
UFS_LOCK(ump);
if (pref == 0)
- pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
+ pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
+ (ufs2_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index b62a808..b154a80 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -333,7 +333,8 @@ struct fs {
int32_t fs_maxbsize; /* maximum blocking factor permitted */
int64_t fs_unrefs; /* number of unreferenced inodes */
int64_t fs_providersize; /* size of underlying GEOM provider */
- int64_t fs_sparecon64[15]; /* old rotation block list head */
+ int64_t fs_metaspace; /* size of area reserved for metadata */
+ int64_t fs_sparecon64[14]; /* old rotation block list head */
int64_t fs_sblockloc; /* byte offset of standard superblock */
struct csum_total fs_cstotal; /* (u) cylinder summary information */
ufs_time_t fs_time; /* last time written */
@@ -525,6 +526,8 @@ struct cg {
* They calc filesystem addresses of cylinder group data structures.
*/
#define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c))
+#define cgdata(fs, c) (cgdmin(fs, c) + (fs)->fs_metaspace) /* data zone */
+#define cgmeta(fs, c) (cgdmin(fs, c)) /* meta data */
#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */
#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */
#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */
OpenPOWER on IntegriCloud