summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c129
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c10
-rw-r--r--sys/ufs/ffs/fs.h25
3 files changed, 143 insertions, 21 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 9476933..81fb75e 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -71,7 +71,7 @@ static void ffs_clusteracct __P((struct fs *, struct cg *, ufs_daddr_t,
int));
static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
int));
-static ino_t ffs_dirpref __P((struct fs *));
+static ino_t ffs_dirpref __P((struct inode *));
static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
static void ffs_fserr __P((struct fs *, u_int, char *));
static u_long ffs_hashalloc
@@ -593,12 +593,23 @@ ffs_valloc(pvp, mode, cred, vpp)
goto noinodes;
if ((mode & IFMT) == IFDIR)
- ipref = ffs_dirpref(fs);
+ ipref = ffs_dirpref(pip);
else
ipref = pip->i_number;
if (ipref >= fs->fs_ncg * fs->fs_ipg)
ipref = 0;
cg = ino_to_cg(fs, ipref);
+ /*
+ * Track number of dirs created one after another
+ * in a same cg without intervening by files.
+ */
+ if ((mode & IFMT) == IFDIR) {
+ if (fs->fs_contigdirs[cg] < 255)
+ fs->fs_contigdirs[cg]++;
+ } else {
+ if (fs->fs_contigdirs[cg] > 0)
+ fs->fs_contigdirs[cg]--;
+ }
ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode,
(allocfcn_t *)ffs_nodealloccg);
if (ino == 0)
@@ -633,28 +644,112 @@ noinodes:
}
/*
- * Find a cylinder to place a directory.
+ * Find a cylinder group to place a directory.
+ *
+ * The policy implemented by this algorithm is to allocate a
+ * directory inode in the same cylinder group as its parent
+ * directory, but also to reserve space for its files inodes
+ * and data. Restrict the number of directories which may be
+ * allocated one after another in the same cylinder group
+ * without intervening allocation of files.
*
- * The policy implemented by this algorithm is to select from
- * among those cylinder groups with above the average number of
- * free inodes, the one with the smallest number of directories.
+ * If we allocate a first level directory then force allocation
+ * in another cylinder group.
*/
static ino_t
-ffs_dirpref(fs)
- register struct fs *fs;
+ffs_dirpref(pip)
+ struct inode *pip;
{
- int cg, minndir, mincg, avgifree;
+ register struct fs *fs;
+ int cg, prefcg, dirsize, cgsize;
+ int avgifree, avgbfree, avgndir, curdirsize;
+ int minifree, minbfree, maxndir;
+ int mincg, minndir;
+ int maxcontigdirs;
+
+ fs = pip->i_fs;
avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
- minndir = fs->fs_ipg;
- mincg = 0;
- for (cg = 0; cg < fs->fs_ncg; cg++)
- if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
- fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
- mincg = cg;
- minndir = fs->fs_cs(fs, cg).cs_ndir;
+ avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+ avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
+
+ /*
+ * Force allocation in another cg if creating a first level dir.
+ */
+ if (ITOV(pip)->v_flag & VROOT) {
+ prefcg = arc4random() % fs->fs_ncg;
+ mincg = prefcg;
+ minndir = fs->fs_ipg;
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ mincg = cg;
+ minndir = fs->fs_cs(fs, cg).cs_ndir;
+ }
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+ mincg = cg;
+ minndir = fs->fs_cs(fs, cg).cs_ndir;
+ }
+ return ((ino_t)(fs->fs_ipg * mincg));
+ }
+
+ /*
+ * Count various limits which used for
+ * optimal allocation of a directory inode.
+ */
+ maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
+ minifree = avgifree - fs->fs_ipg / 4;
+ if (minifree < 0)
+ minifree = 0;
+ minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
+ if (minbfree < 0)
+ minbfree = 0;
+ cgsize = fs->fs_fsize * fs->fs_fpg;
+ dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
+ curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
+ if (dirsize < curdirsize)
+ dirsize = curdirsize;
+ maxcontigdirs = min(cgsize / dirsize, 255);
+ if (fs->fs_avgfpdir > 0)
+ maxcontigdirs = min(maxcontigdirs,
+ fs->fs_ipg / fs->fs_avgfpdir);
+ if (maxcontigdirs == 0)
+ maxcontigdirs = 1;
+
+ /*
+ * Limit number of dirs in one cg and reserve space for
+ * regular files, but only if we have no deficit in
+ * inodes or space.
+ */
+ prefcg = ino_to_cg(fs, pip->i_number);
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ if (fs->fs_contigdirs[cg] < maxcontigdirs)
+ return ((ino_t)(fs->fs_ipg * cg));
+ }
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+ fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+ fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+ if (fs->fs_contigdirs[cg] < maxcontigdirs)
+ return ((ino_t)(fs->fs_ipg * cg));
}
- return ((ino_t)(fs->fs_ipg * mincg));
+ /*
+ * This is a backstop when we have deficit in space.
+ */
+ for (cg = prefcg; cg < fs->fs_ncg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+ return ((ino_t)(fs->fs_ipg * cg));
+ for (cg = 0; cg < prefcg; cg++)
+ if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+ break;
+ return ((ino_t)(fs->fs_ipg * cg));
}
/*
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 9803a22..17fa431 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -624,6 +624,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
size += fs->fs_ncg * sizeof(int32_t);
+ size += fs->fs_ncg * sizeof(u_int8_t);
space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
fs->fs_csp = space;
for (i = 0; i < blks; i += fs->fs_frag) {
@@ -645,6 +646,15 @@ ffs_mountfs(devvp, mp, p, malloctype)
for (i = 0; i < fs->fs_ncg; i++)
*lp++ = fs->fs_contigsumsize;
}
+ size = fs->fs_ncg * sizeof(u_int8_t);
+ fs->fs_contigdirs = (u_int8_t *)space;
+ space = (u_int8_t *)space + size;
+ bzero(fs->fs_contigdirs, size);
+ /* Compatibility for old filesystems XXX */
+ if (fs->fs_avgfilesize <= 0) /* XXX */
+ fs->fs_avgfilesize = AVFILESIZ; /* XXX */
+ if (fs->fs_avgfpdir <= 0) /* XXX */
+ fs->fs_avgfpdir = AFPDIR; /* XXX */
mp->mnt_data = (qaddr_t)ump;
mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index 4083c14..c11a9f8 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -108,15 +108,17 @@
/*
* There is a 128-byte region in the superblock reserved for in-core
* pointers to summary information. Originally this included an array
- * of pointers to blocks of struct csum; now there are just two
+ * of pointers to blocks of struct csum; now there are just three
* pointers and the remaining space is padded with fs_ocsp[].
*
* NOCSPTRS determines the size of this padding. One pointer (fs_csp)
* is taken away to point to a contiguous array of struct csum for
* all cylinder groups; a second (fs_maxcluster) points to an array
- * of cluster sizes that is computed as cylinder groups are inspected.
+ * of cluster sizes that is computed as cylinder groups are inspected,
+ * and the third points to an array that tracks the creation of new
+ * directories.
*/
-#define NOCSPTRS ((128 / sizeof(void *)) - 2)
+#define NOCSPTRS ((128 / sizeof(void *)) - 3)
/*
* A summary of contiguous blocks of various sizes is maintained
@@ -142,6 +144,18 @@
#define DEFAULTOPT FS_OPTTIME
/*
+ * Grigoriy Orlov <gluk@ptci.ru> has done some extensive work to fine
+ * tune the layout preferences for directories within a filesystem.
+ * His algorithm can be tuned by adjusting the following parameters
+ * which tell the system the average file size and the average number
+ * of files per directory. These defaults are well selected for typical
+ * filesystems, but may need to be tuned for odd cases like filesystems
+ * being used for sqiud caches or news spools.
+ */
+#define AVFILESIZ 16384 /* expected average file size */
+#define AFPDIR 64 /* expected number of files per directory */
+
+/*
* The maximum number of snapshot nodes that can be associated
* with each filesystem. This limit affects only the number of
* snapshot files that can be recorded within the superblock so
@@ -273,12 +287,15 @@ struct fs {
/* these fields retain the current block allocation info */
int32_t fs_cgrotor; /* last cg searched */
void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */
+ u_int8_t *fs_contigdirs; /* # of contiguously allocated dirs */
struct csum *fs_csp; /* cg summary info buffer for fs_cs */
int32_t *fs_maxcluster; /* max cluster in each cyl group */
int32_t fs_cpc; /* cyl per cycle in postbl */
int16_t fs_opostbl[16][8]; /* old rotation block list head */
int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
- int32_t fs_sparecon[30]; /* reserved for future constants */
+ int32_t fs_avgfilesize; /* expected average file size */
+ int32_t fs_avgfpdir; /* expected # of files per directory */
+ int32_t fs_sparecon[28]; /* reserved for future constants */
int32_t fs_contigsumsize; /* size of cluster summary array */
int32_t fs_maxsymlinklen; /* max length of an internal symlink */
int32_t fs_inodefmt; /* format of on-disk inodes */
OpenPOWER on IntegriCloud