summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sbin/newfs/mkfs.c13
-rw-r--r--sbin/newfs/newfs.c18
-rw-r--r--sbin/tunefs/tunefs.c57
-rw-r--r--sys/ufs/ffs/ffs_alloc.c129
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c10
-rw-r--r--sys/ufs/ffs/fs.h25
6 files changed, 220 insertions, 32 deletions
diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c
index 8386c1a..9ac5502 100644
--- a/sbin/newfs/mkfs.c
+++ b/sbin/newfs/mkfs.c
@@ -119,6 +119,8 @@ extern int maxbpg; /* maximum blocks per file in a cyl group */
extern int nrpos; /* # of distinguished rotational positions */
extern int bbsize; /* boot block size */
extern int sbsize; /* superblock size */
+extern int avgfilesize; /* expected average file size */
+extern int avgfilesperdir; /* expected number of files per directory */
extern u_long memleft; /* virtual memory available */
extern caddr_t membase; /* start address of memory based filesystem */
extern char * filename;
@@ -274,6 +276,17 @@ mkfs(pp, fsys, fi, fo)
if (sblock.fs_nsect <= 0)
printf("preposterous nsect %d\n", sblock.fs_nsect), exit(15);
/*
+ * collect and verify the filesystem density info
+ */
+ sblock.fs_avgfilesize = avgfilesize;
+ sblock.fs_avgfpdir = avgfilesperdir;
+ if (sblock.fs_avgfilesize <= 0)
+ printf("illegal expected average file size %d\n",
+ sblock.fs_avgfilesize), exit(14);
+ if (sblock.fs_avgfpdir <= 0)
+ printf("illegal expected number of files per directory %d\n",
+ sblock.fs_avgfpdir), exit(15);
+ /*
* collect and verify the block and fragment sizes
*/
sblock.fs_bsize = bsize;
diff --git a/sbin/newfs/newfs.c b/sbin/newfs/newfs.c
index e755993..8eed256 100644
--- a/sbin/newfs/newfs.c
+++ b/sbin/newfs/newfs.c
@@ -163,9 +163,7 @@ void fatal();
* The number of sectors are used to determine the size of a cyl-group.
* Kirk suggested one or two meg per "cylinder" so we say two.
*/
-
#define NTRACKS 1 /* number of heads */
-
#define NSECTORS 4096 /* number of sectors */
int mfs; /* run as the memory based filesystem */
@@ -199,6 +197,8 @@ int maxcontig = 0; /* max contiguous blocks to allocate */
int rotdelay = ROTDELAY; /* rotational delay between blocks */
int maxbpg; /* maximum blocks per file in a cyl group */
int nrpos = NRPOS; /* # of distinguished rotational positions */
+int avgfilesize = AVFILESIZ;/* expected average file size */
+int avgfilesperdir = AFPDIR;/* expected number of files per directory */
int bbsize = BBSIZE; /* boot block size */
int sbsize = SBSIZE; /* superblock size */
int mntflags = MNT_ASYNC; /* flags to be passed to mount */
@@ -256,8 +256,8 @@ main(argc, argv)
}
opstring = mfs ?
- "NF:T:Ua:b:c:d:e:f:i:m:o:s:" :
- "NOS:T:Ua:b:c:d:e:f:i:k:l:m:n:o:p:r:s:t:u:vx:";
+ "NF:T:Ua:b:c:d:e:f:g:h:i:m:o:s:" :
+ "NOS:T:Ua:b:c:d:e:f:g:h:i:k:l:m:n:o:p:r:s:t:u:vx:";
while ((ch = getopt(argc, argv, opstring)) != -1)
switch (ch) {
case 'N':
@@ -308,6 +308,14 @@ main(argc, argv)
if ((fsize = atoi(optarg)) <= 0)
fatal("%s: bad fragment size", optarg);
break;
+ case 'g':
+ if ((avgfilesize = atoi(optarg)) <= 0)
+ fatal("%s: bad average file size", optarg);
+ break;
+ case 'h':
+ if ((avgfilesperdir = atoi(optarg)) <= 0)
+ fatal("%s: bad average files per dir", optarg);
+ break;
case 'i':
if ((density = atoi(optarg)) <= 0)
fatal("%s: bad bytes per inode", optarg);
@@ -768,6 +776,8 @@ usage()
fprintf(stderr, "\t-d rotational delay between contiguous blocks\n");
fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n");
fprintf(stderr, "\t-f frag size\n");
+ fprintf(stderr, "\t-g average file size\n");
+ fprintf(stderr, "\t-h average files per directory\n");
fprintf(stderr, "\t-i number of bytes per inode\n");
fprintf(stderr, "\t-k sector 0 skew, per track\n");
fprintf(stderr, "\t-l hardware sector interleave\n");
diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c
index 734e01d..1fda3f7 100644
--- a/sbin/tunefs/tunefs.c
+++ b/sbin/tunefs/tunefs.c
@@ -91,9 +91,10 @@ main(argc, argv)
char *special, *name;
struct stat st;
int Aflag = 0, active = 0;
- int aflag = 0, dflag = 0, eflag = 0, mflag = 0;
- int nflag = 0, oflag = 0, pflag = 0;
- int avalue = 0, dvalue = 0, evalue = 0, mvalue = 0, ovalue = 0;
+ int aflag = 0, dflag = 0, eflag = 0, fflag = 0, mflag = 0;
+ int nflag = 0, oflag = 0, pflag = 0, sflag = 0;
+ int avalue = 0, dvalue = 0, evalue = 0, fvalue = 0;
+ int mvalue = 0, ovalue = 0, svalue = 0;
char *nvalue = NULL;
struct fstab *fs;
char *chg[2], device[MAXPATHLEN];
@@ -104,7 +105,7 @@ main(argc, argv)
if (argc < 3)
usage();
found_arg = 0; /* at least one arg is required */
- while ((ch = getopt(argc, argv, "Aa:d:e:m:n:o:p")) != -1)
+ while ((ch = getopt(argc, argv, "Aa:d:e:f:m:n:o:ps:")) != -1)
switch (ch) {
case 'A':
found_arg = 1;
@@ -132,6 +133,14 @@ main(argc, argv)
errx(10, "%s must be >= 1 (was %s)", name, optarg);
eflag = 1;
break;
+ case 'f':
+ found_arg = 1;
+ name = "average file size";
+ fvalue = atoi(optarg);
+ if (fvalue < 1)
+ errx(10, "%s must be >= 1 (was %s)", name, optarg);
+ fflag = 1;
+ break;
case 'm':
found_arg = 1;
name = "minimum percentage of free space";
@@ -168,6 +177,14 @@ main(argc, argv)
found_arg = 1;
pflag = 1;
break;
+ case 's':
+ found_arg = 1;
+ name = "expected number of files per directory";
+ svalue = atoi(optarg);
+ if (svalue < 1)
+ errx(10, "%s must be >= 1 (was %s)", name, optarg);
+ sflag = 1;
+ break;
default:
usage();
}
@@ -239,6 +256,17 @@ again:
sblock.fs_maxbpg = evalue;
}
}
+ if (fflag) {
+ name = "average file size";
+ if (sblock.fs_avgfilesize == fvalue) {
+ warnx("%s remains unchanged as %d", name, fvalue);
+ }
+ else {
+ warnx("%s changes from %d to %d",
+ name, sblock.fs_avgfilesize, fvalue);
+ sblock.fs_avgfilesize = fvalue;
+ }
+ }
if (mflag) {
name = "minimum percentage of free space";
if (sblock.fs_minfree == mvalue) {
@@ -291,6 +319,17 @@ again:
warnx(OPTWARN, "space", "<", MINFREE);
}
}
+ if (sflag) {
+ name = "expected number of files per directory";
+ if (sblock.fs_avgfpdir == svalue) {
+ warnx("%s remains unchanged as %d", name, svalue);
+ }
+ else {
+ warnx("%s changes from %d to %d",
+ name, sblock.fs_avgfpdir, svalue);
+ sblock.fs_avgfpdir = svalue;
+ }
+ }
putsb(&sblock, special, Aflag);
if (active) {
@@ -307,9 +346,9 @@ void
usage()
{
fprintf(stderr, "%s\n%s\n%s\n",
-"usage: tunefs [-A] [-a maxcontig] [-d rotdelay] [-e maxbpg] [-m minfree]",
-" [-p] [-n enable | disable] [-o space | time]",
-" special | filesystem");
+"usage: tunefs [-A] [-a maxcontig] [-d rotdelay] [-e maxbpg] [-f avgfilesize]",
+" [-m minfree] [-p] [-n enable | disable] [-o space | time]",
+" [-s filesperdir] special | filesystem");
exit(2);
}
@@ -366,6 +405,10 @@ printfs()
sblock.fs_rotdelay);
warnx("maximum blocks per file in a cylinder group: (-e) %d",
sblock.fs_maxbpg);
+ warnx("average file size: (-f) %d",
+ sblock.fs_avgfilesize);
+ warnx("average number of files in a directory: (-s) %d",
+ sblock.fs_avgfpdir);
warnx("minimum percentage of free space: (-m) %d%%",
sblock.fs_minfree);
warnx("optimization preference: (-o) %s",
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 9476933..81fb75e 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -71,7 +71,7 @@ static void ffs_clusteracct __P((struct fs *, struct cg *, ufs_daddr_t,
int));
static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
int));
-static ino_t ffs_dirpref __P((struct fs *));
+static ino_t ffs_dirpref __P((struct inode *));
static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
static void ffs_fserr __P((struct fs *, u_int, char *));
static u_long ffs_hashalloc
@@ -593,12 +593,23 @@ ffs_valloc(pvp, mode, cred, vpp)
goto noinodes;
if ((mode & IFMT) == IFDIR)
- ipref = ffs_dirpref(fs);
+ ipref = ffs_dirpref(pip);
else
ipref = pip->i_number;
if (ipref >= fs->fs_ncg * fs->fs_ipg)
ipref = 0;
cg = ino_to_cg(fs, ipref);
+ /*
+ * Track number of dirs created one after another
+ * in a same cg without intervening by files.
+ */
+ if ((mode & IFMT) == IFDIR) {
+ if (fs->fs_contigdirs[cg] < 255)
+ fs->fs_contigdirs[cg]++;
+ } else {
+ if (fs->fs_contigdirs[cg] > 0)
+ fs->fs_contigdirs[cg]--;
+ }
ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode,
(allocfcn_t *)ffs_nodealloccg);
if (ino == 0)
@@ -633,28 +644,112 @@ noinodes:
}
/*
- * Find a cylinder to place a directory.
+ * Find a cylinder group to place a directory.
+ *
+ * The policy implemented by this algorithm is to allocate a
+ * directory inode in the same cylinder group as its parent
+ * directory, but also to reserve space for its files inodes
+ * and data. Restrict the number of directories which may be
+ * allocated one after another in the same cylinder group
+ * without intervening allocation of files.
*
- * The policy implemented by this algorithm is to select from
- * among those cylinder groups with above the average number of
- * free inodes, the one with the smallest number of directories.
+ * If we allocate a first level directory then force allocation
+ * in another cylinder group.
*/
static ino_t
-ffs_dirpref(fs)
- register struct fs *fs;
+ffs_dirpref(pip)
+ struct inode *pip;
{
- int cg, minndir, mincg, avgifree;
+ register struct fs *fs;
+ int cg, prefcg, dirsize, cgsize;
+ int avgifree, avgbfree, avgndir, curdirsize;
+ int minifree, minbfree, maxndir;
+ int mincg, minndir;
+ int maxcontigdirs;
+
+ fs = pip->i_fs;
avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
- minndir = fs->fs_ipg;
- mincg = 0;
- for (cg = 0; cg < fs->fs_ncg; cg++)
- if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
- fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
- mincg = cg;
- minndir = fs->fs_cs(fs, cg).cs_ndir;
+ avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+ avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
+
+ /*
+ * Force allocation in another cg if creating a first level dir.
+ */
+ if (ITOV(pip)->v_flag & VROOT) {
+ prefcg = arc4random() % fs->fs_ncg;
+ mincg = prefcg;
+ minndir = fs->fs_ipg;
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ mincg = cg;
+ minndir = fs->fs_cs(fs, cg).cs_ndir;
+ }
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ mincg = cg;
+ minndir = fs->fs_cs(fs, cg).cs_ndir;
+ }
+ return ((ino_t)(fs->fs_ipg * mincg));
+ }
+
+ /*
+ * Count various limits which used for
+ * optimal allocation of a directory inode.
+ */
+ maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
+ minifree = avgifree - fs->fs_ipg / 4;
+ if (minifree < 0)
+ minifree = 0;
+ minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
+ if (minbfree < 0)
+ minbfree = 0;
+ cgsize = fs->fs_fsize * fs->fs_fpg;
+ dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
+ curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
+ if (dirsize < curdirsize)
+ dirsize = curdirsize;
+ maxcontigdirs = min(cgsize / dirsize, 255);
+ if (fs->fs_avgfpdir > 0)
+ maxcontigdirs = min(maxcontigdirs,
+ fs->fs_ipg / fs->fs_avgfpdir);
+ if (maxcontigdirs == 0)
+ maxcontigdirs = 1;
+
+ /*
+ * Limit number of dirs in one cg and reserve space for
+ * regular files, but only if we have no deficit in
+ * inodes or space.
+ */
+ prefcg = ino_to_cg(fs, pip->i_number);
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ if (fs->fs_contigdirs[cg] < maxcontigdirs)
+ return ((ino_t)(fs->fs_ipg * cg));
+ }
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ if (fs->fs_contigdirs[cg] < maxcontigdirs)
+ return ((ino_t)(fs->fs_ipg * cg));
}
- return ((ino_t)(fs->fs_ipg * mincg));
+ /*
+ * This is a backstop when we have deficit in space.
+ */
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+ return ((ino_t)(fs->fs_ipg * cg));
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+ break;
+ return ((ino_t)(fs->fs_ipg * cg));
}
/*
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 9803a22..17fa431 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -624,6 +624,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
size += fs->fs_ncg * sizeof(int32_t);
+ size += fs->fs_ncg * sizeof(u_int8_t);
space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
fs->fs_csp = space;
for (i = 0; i < blks; i += fs->fs_frag) {
@@ -645,6 +646,15 @@ ffs_mountfs(devvp, mp, p, malloctype)
for (i = 0; i < fs->fs_ncg; i++)
*lp++ = fs->fs_contigsumsize;
}
+ size = fs->fs_ncg * sizeof(u_int8_t);
+ fs->fs_contigdirs = (u_int8_t *)space;
+ space = (u_int8_t *)space + size;
+ bzero(fs->fs_contigdirs, size);
+ /* Compatibility for old filesystems XXX */
+ if (fs->fs_avgfilesize <= 0) /* XXX */
+ fs->fs_avgfilesize = AVFILESIZ; /* XXX */
+ if (fs->fs_avgfpdir <= 0) /* XXX */
+ fs->fs_avgfpdir = AFPDIR; /* XXX */
mp->mnt_data = (qaddr_t)ump;
mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index 4083c14..c11a9f8 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -108,15 +108,17 @@
/*
* There is a 128-byte region in the superblock reserved for in-core
* pointers to summary information. Originally this included an array
- * of pointers to blocks of struct csum; now there are just two
+ * of pointers to blocks of struct csum; now there are just three
* pointers and the remaining space is padded with fs_ocsp[].
*
* NOCSPTRS determines the size of this padding. One pointer (fs_csp)
* is taken away to point to a contiguous array of struct csum for
* all cylinder groups; a second (fs_maxcluster) points to an array
- * of cluster sizes that is computed as cylinder groups are inspected.
+ * of cluster sizes that is computed as cylinder groups are inspected,
+ * and the third points to an array that tracks the creation of new
+ * directories.
*/
-#define NOCSPTRS ((128 / sizeof(void *)) - 2)
+#define NOCSPTRS ((128 / sizeof(void *)) - 3)
/*
* A summary of contiguous blocks of various sizes is maintained
@@ -142,6 +144,18 @@
#define DEFAULTOPT FS_OPTTIME
/*
+ * Grigoriy Orlov <gluk@ptci.ru> has done some extensive work to fine
+ * tune the layout preferences for directories within a filesystem.
+ * His algorithm can be tuned by adjusting the following parameters
+ * which tell the system the average file size and the average number
+ * of files per directory. These defaults are well selected for typical
+ * filesystems, but may need to be tuned for odd cases like filesystems
+ * being used for sqiud caches or news spools.
+ */
+#define AVFILESIZ 16384 /* expected average file size */
+#define AFPDIR 64 /* expected number of files per directory */
+
+/*
* The maximum number of snapshot nodes that can be associated
* with each filesystem. This limit affects only the number of
* snapshot files that can be recorded within the superblock so
@@ -273,12 +287,15 @@ struct fs {
/* these fields retain the current block allocation info */
int32_t fs_cgrotor; /* last cg searched */
void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */
+ u_int8_t *fs_contigdirs; /* # of contiguously allocated dirs */
struct csum *fs_csp; /* cg summary info buffer for fs_cs */
int32_t *fs_maxcluster; /* max cluster in each cyl group */
int32_t fs_cpc; /* cyl per cycle in postbl */
int16_t fs_opostbl[16][8]; /* old rotation block list head */
int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
- int32_t fs_sparecon[30]; /* reserved for future constants */
+ int32_t fs_avgfilesize; /* expected average file size */
+ int32_t fs_avgfpdir; /* expected # of files per directory */
+ int32_t fs_sparecon[28]; /* reserved for future constants */
int32_t fs_contigsumsize; /* size of cluster summary array */
int32_t fs_maxsymlinklen; /* max length of an internal symlink */
int32_t fs_inodefmt; /* format of on-disk inodes */
OpenPOWER on IntegriCloud