summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcin Slusarz <marcin.slusarz@gmail.com>2011-06-26 21:39:18 +0200
committerPekka Enberg <penberg@kernel.org>2011-07-07 22:44:45 +0300
commitc4089f98e943ff445665dea49c190657b34ccffe (patch)
tree8238e65e7eaf575afc52c44179f428c3069584cd
parentd18a90dd85f8243ed20cdadb6d8a37d595df456d (diff)
downloadop-kernel-dev-c4089f98e943ff445665dea49c190657b34ccffe.zip
op-kernel-dev-c4089f98e943ff445665dea49c190657b34ccffe.tar.gz
slub: reduce overhead of slub_debug
slub checks for poison one byte by one, which is highly inefficient and shows up frequently as a highest cpu-eater in perf top. Joining reads gives nice speedup: (Compiling some project with different options) make -j12 make clean slub_debug disabled: 1m 27s 1.2 s slub_debug enabled: 1m 46s 7.6 s slub_debug enabled + this patch: 1m 33s 3.2 s check_bytes still shows up high, but not always at the top. Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Pekka Enberg <penberg@kernel.org> Cc: Matt Mackall <mpm@selenic.com> Cc: linux-mm@kvack.org Signed-off-by: Pekka Enberg <penberg@kernel.org>
-rw-r--r--mm/slub.c36
1 files changed, 34 insertions, 2 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 0e4f4f8..e3403b3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -589,10 +589,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
memset(p + s->objsize, val, s->inuse - s->objsize);
}
-static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
+static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
{
while (bytes) {
- if (*start != (u8)value)
+ if (*start != value)
return start;
start++;
bytes--;
@@ -600,6 +600,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
return NULL;
}
+static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
+{
+ u64 value64;
+ unsigned int words, prefix;
+
+ if (bytes <= 16)
+ return check_bytes8(start, value, bytes);
+
+ value64 = value | value << 8 | value << 16 | value << 24;
+ value64 = value64 | value64 << 32;
+ prefix = 8 - ((unsigned long)start) % 8;
+
+ if (prefix) {
+ u8 *r = check_bytes8(start, value, prefix);
+ if (r)
+ return r;
+ start += prefix;
+ bytes -= prefix;
+ }
+
+ words = bytes / 8;
+
+ while (words) {
+ if (*(u64 *)start != value64)
+ return check_bytes8(start, value, 8);
+ start += 8;
+ words--;
+ }
+
+ return check_bytes8(start, value, bytes % 8);
+}
+
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
void *from, void *to)
{
OpenPOWER on IntegriCloud