summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-09-14 17:28:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-09-14 17:28:32 -0700
commit9226b5b440f2b4fbb3b797f3cb74a9a627220660 (patch)
tree2b9ff475c498e19606031d5e7fde74cdba30dee8
parent5910cfdce307d6e5c55d747809e3c670c9e8a9a7 (diff)
downloadop-kernel-dev-9226b5b440f2b4fbb3b797f3cb74a9a627220660.zip
op-kernel-dev-9226b5b440f2b4fbb3b797f3cb74a9a627220660.tar.gz
vfs: avoid non-forwarding large load after small store in path lookup
The performance regression that Josef Bacik reported in the pathname lookup (see commit 99d263d4c5b2 "vfs: fix bad hashing of dentries") made me look at performance stability of the dcache code, just to verify that the problem was actually fixed. That turned up a few other problems in this area. There are a few cases where we exit RCU lookup mode and go to the slow serializing case when we shouldn't, Al has fixed those and they'll come in with the next VFS pull. But my performance verification also shows that link_path_walk() turns out to have a very unfortunate 32-bit store of the length and hash of the name we look up, followed by a 64-bit read of the combined hash_len field. That screws up the processor store to load forwarding, causing an unnecessary hickup in this critical routine. It's caused by the ugly calling convention for the "hash_name()" function, and easily fixed by just making hash_name() fill in the whole 'struct qstr' rather than passing it a pointer to just the hash value. With that, the profile for this function looks much smoother. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/namei.c19
-rw-r--r--include/linux/dcache.h1
2 files changed, 11 insertions, 9 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 2292358..2be5120 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1669,13 +1669,14 @@ EXPORT_SYMBOL(full_name_hash);
/*
* Calculate the length and hash of the path component, and
- * return the length of the component;
+ * fill in the qstr. return the "len" as the result.
*/
-static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+static inline unsigned long hash_name(const char *name, struct qstr *res)
{
unsigned long a, b, adata, bdata, mask, hash, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+ res->name = name;
hash = a = 0;
len = -sizeof(unsigned long);
do {
@@ -1691,9 +1692,10 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
mask = create_zero_mask(adata | bdata);
hash += a & zero_bytemask(mask);
- *hashp = fold_hash(hash);
+ len += find_zero(mask);
+ res->hash_len = hashlen_create(fold_hash(hash), len);
- return len + find_zero(mask);
+ return len;
}
#else
@@ -1711,18 +1713,19 @@ EXPORT_SYMBOL(full_name_hash);
* We know there's a real path component here of at least
* one character.
*/
-static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+static inline long hash_name(const char *name, struct qstr *res)
{
unsigned long hash = init_name_hash();
unsigned long len = 0, c;
+ res->name = name;
c = (unsigned char)*name;
do {
len++;
hash = partial_name_hash(c, hash);
c = (unsigned char)name[len];
} while (c && c != '/');
- *hashp = end_name_hash(hash);
+ res->hash_len = hashlen_create(end_name_hash(hash), len);
return len;
}
@@ -1756,9 +1759,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (err)
break;
- len = hash_name(name, &this.hash);
- this.name = name;
- this.len = len;
+ len = hash_name(name, &this);
type = LAST_NORM;
if (name[0] == '.') switch (len) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e4ae2ad..75a227c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,6 +55,7 @@ struct qstr {
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
#define hashlen_hash(hashlen) ((u32) (hashlen))
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
+#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash))
struct dentry_stat_t {
long nr_dentry;
OpenPOWER on IntegriCloud