summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/localalloc.c
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2010-04-05 18:17:14 -0700
committerJoel Becker <joel.becker@oracle.com>2010-05-05 18:18:07 -0700
commit6b82021b9e91cd689fdffadbcdb9a42597bbe764 (patch)
treeac4235e792e74a2e60a41e95d62965b7ed4b3232 /fs/ocfs2/localalloc.c
parent73c8a80003d13be54e2309865030404441075182 (diff)
downloadop-kernel-dev-6b82021b9e91cd689fdffadbcdb9a42597bbe764.zip
op-kernel-dev-6b82021b9e91cd689fdffadbcdb9a42597bbe764.tar.gz
ocfs2: increase the default size of local alloc windows
I have observed that the current size of 8M gives us pretty poor fragmentation on multi-threaded workloads which do lots of writes. Generally, I can increase the size of local alloc windows and observe a marked decrease in fragmentation, even up and beyond window sizes of 512 megabytes. This makes sense for a couple reasons - larger local alloc means more room for reservation windows. On multi-node workloads the larger local alloc helps as well because we don't have to do window slides as often. Also, I removed the OCFS2_DEFAULT_LOCAL_ALLOC_SIZE constant as it is no longer used and the comment above it was out of date. To test fragmentation, I used a workload which launched 4 threads that did 4k writes into a series of about 140 alternating files. With resv_level=2, and a 4k/4k file system I observed the following average fragmentation for various localalloc= parameters: localalloc= avg. fragmentation 8 48 32 16 64 10 120 7 On larger cluster sizes, the difference is more dramatic. The new default size top out at 256M, which we'll only get for cluster sizes of 32K and above. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2/localalloc.c')
-rw-r--r--fs/ocfs2/localalloc.c114
1 files changed, 112 insertions, 2 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index e39a3e7..00022aac 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -75,10 +75,120 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode);
+/*
+ * ocfs2_la_default_mb() - determine a default size, in megabytes of
+ * the local alloc.
+ *
+ * Generally, we'd like to pick as large a local alloc as
+ * possible. Performance on large workloads tends to scale
+ * proportionally to la size. In addition to that, the reservations
+ * code functions more efficiently as it can reserve more windows for
+ * write.
+ *
+ * Some things work against us when trying to choose a large local alloc:
+ *
+ * - We need to ensure our sizing is picked to leave enough space in
+ * group descriptors for other allocations (such as block groups,
+ * etc). Picking default sizes which are a multiple of 4 could help
+ * - block groups are allocated in 2mb and 4mb chunks.
+ *
+ * - Likewise, we don't want to starve other nodes of bits on small
+ * file systems. This can easily be taken care of by limiting our
+ * default to a reasonable size (256M) on larger cluster sizes.
+ *
+ * - Some file systems can't support very large sizes - 4k and 8k in
+ * particular are limited to less than 128 and 256 megabytes respectively.
+ *
+ * The following reference table shows group descriptor and local
+ * alloc maximums at various cluster sizes (4k blocksize)
+ *
+ * csize: 4K group: 126M la: 121M
+ * csize: 8K group: 252M la: 243M
+ * csize: 16K group: 504M la: 486M
+ * csize: 32K group: 1008M la: 972M
+ * csize: 64K group: 2016M la: 1944M
+ * csize: 128K group: 4032M la: 3888M
+ * csize: 256K group: 8064M la: 7776M
+ * csize: 512K group: 16128M la: 15552M
+ * csize: 1024K group: 32256M la: 31104M
+ */
+#define OCFS2_LA_MAX_DEFAULT_MB 256
+#define OCFS2_LA_OLD_DEFAULT 8
+unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
+{
+ unsigned int la_mb;
+ unsigned int gd_mb;
+ unsigned int megs_per_slot;
+ struct super_block *sb = osb->sb;
+
+ gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
+ 8 * ocfs2_group_bitmap_size(sb));
+
+ /*
+ * This takes care of files systems with very small group
+ * descriptors - 512 byte blocksize at cluster sizes lower
+ * than 16K and also 1k blocksize with 4k cluster size.
+ */
+ if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
+ || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
+ return OCFS2_LA_OLD_DEFAULT;
+
+ /*
+ * Leave enough room for some block groups and make the final
+ * value we work from a multiple of 4.
+ */
+ gd_mb -= 16;
+ gd_mb &= 0xFFFFFFFB;
+
+ la_mb = gd_mb;
+
+ /*
+ * Keep window sizes down to a reasonable default
+ */
+ if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
+ /*
+ * Some clustersize / blocksize combinations will have
+ * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
+ * default size, but get poor distribution when
+ * limited to exactly 256 megabytes.
+ *
+ * As an example, 16K clustersize at 4K blocksize
+ * gives us a cluster group size of 504M. Paring the
+ * local alloc size down to 256 however, would give us
+ * only one window and around 200MB left in the
+ * cluster group. Instead, find the first size below
+ * 256 which would give us an even distribution.
+ *
+ * Larger cluster group sizes actually work out pretty
+ * well when pared to 256, so we don't have to do this
+ * for any group that fits more than two
+ * OCFS2_LA_MAX_DEFAULT_MB windows.
+ */
+ if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
+ la_mb = 256;
+ else {
+ unsigned int gd_mult = gd_mb;
+
+ while (gd_mult > 256)
+ gd_mult = gd_mult >> 1;
+
+ la_mb = gd_mult;
+ }
+ }
+
+ megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
+ megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
+ /* Too many nodes, too few disk clusters. */
+ if (megs_per_slot < la_mb)
+ la_mb = megs_per_slot;
+
+ return la_mb;
+}
+
void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
{
struct super_block *sb = osb->sb;
- unsigned int la_default_mb = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
+ unsigned int la_default_mb = ocfs2_la_default_mb(osb);
unsigned int la_max_mb;
la_max_mb = ocfs2_clusters_to_megabytes(sb,
@@ -185,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
osb->local_alloc_bits, (osb->bitmap_cpg - 1));
osb->local_alloc_bits =
ocfs2_megabytes_to_clusters(osb->sb,
- OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
+ ocfs2_la_default_mb(osb));
}
/* read the alloc off disk */
OpenPOWER on IntegriCloud