summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2011-01-07 17:50:09 +1100
committerNick Piggin <npiggin@kernel.dk>2011-01-07 17:50:32 +1100
commit9d55c369bb5e695e629bc35cba2ef607755b3bee (patch)
treed97cd498a67f1f7987d4403ef90e399516a3b8c9
parente1bb57826381199cc79fbf44e9dfeee58fc7b339 (diff)
downloadop-kernel-dev-9d55c369bb5e695e629bc35cba2ef607755b3bee.zip
op-kernel-dev-9d55c369bb5e695e629bc35cba2ef607755b3bee.tar.gz
fs: implement faster dentry memcmp
The standard memcmp function on a Westmere system shows up hot in profiles in the `git diff` workload (both parallel and single threaded), and it is likely due to the costs associated with trapping into microcode, and little opportunity to improve memory access (dentry name is not likely to take up more than a cacheline). So replace it with an open-coded byte comparison. This increases code size by 8 bytes in the critical __d_lookup_rcu function, but the speedup is huge, averaging 10 runs of each: git diff st user sys elapsed CPU before 1.15 2.57 3.82 97.1 after 1.14 2.35 3.61 96.8 git diff mt user sys elapsed CPU before 1.27 3.85 1.46 349 after 1.26 3.54 1.43 333 Elapsed time for single threaded git diff at 95.0% confidence: -0.21 +/- 0.01 -5.45% +/- 0.24% It's -0.66% +/- 0.06% elapsed time on my Opteron, so rep cmp costs on the fam10h seem to be relatively smaller, but there is still a win. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
-rw-r--r--fs/dcache.c12
-rw-r--r--include/linux/dcache.h21
2 files changed, 24 insertions, 9 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a4ce7d..5699d4c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1454,9 +1454,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
continue;
if (alias->d_parent != entry->d_parent)
continue;
- if (qstr->len != len)
- continue;
- if (memcmp(qstr->name, name, len))
+ if (dentry_cmp(qstr->name, qstr->len, name, len))
continue;
__dget(alias);
return alias;
@@ -1810,9 +1808,7 @@ seqretry:
tlen, tname, name))
continue;
} else {
- if (tlen != len)
- continue;
- if (memcmp(tname, str, tlen))
+ if (dentry_cmp(tname, tlen, str, len))
continue;
}
/*
@@ -1925,9 +1921,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
tlen, tname, name))
goto next;
} else {
- if (tlen != len)
- goto next;
- if (memcmp(tname, str, tlen))
+ if (dentry_cmp(tname, tlen, str, len))
goto next;
}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c0a2ca9..bd07758 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -47,6 +47,27 @@ struct dentry_stat_t {
};
extern struct dentry_stat_t dentry_stat;
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+ const unsigned char *ct, size_t tcount)
+{
+ int ret;
+ if (scount != tcount)
+ return 1;
+ do {
+ ret = (*cs != *ct);
+ if (ret)
+ break;
+ cs++;
+ ct++;
+ tcount--;
+ } while (tcount);
+ return ret;
+}
+
/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash() 0
OpenPOWER on IntegriCloud