summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-04 06:33:49 +0100
committerIngo Molnar <mingo@kernel.org>2015-03-04 06:36:15 +0100
commitf8e92fb4b0ffc4d62279ab39f34e798e37e90b0b (patch)
tree9caa8df664792e64ddcb4ea03fd418a8a529c82e /tools
parentd2c032e3dc58137a7261a7824d3acce435db1d66 (diff)
parentdfecb95cdfeaf7872d83a96bec3a606e9cd95c8d (diff)
downloadop-kernel-dev-f8e92fb4b0ffc4d62279ab39f34e798e37e90b0b.zip
op-kernel-dev-f8e92fb4b0ffc4d62279ab39f34e798e37e90b0b.tar.gz
Merge tag 'alternatives_padding' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/asm
Pull alternative instructions framework improvements from Borislav Petkov: "A more involved rework of the alternatives framework to be able to pad instructions and thus make using the alternatives macros more straightforward and without having to figure out old and new instruction sizes but have the toolchain figure that out for us. Furthermore, it optimizes JMPs used so that fetch and decode can be relieved with smaller versions of the JMPs, where possible. Some stats: x86_64 defconfig: Alternatives sites total: 2478 Total padding added (in Bytes): 6051 The padding is currently done for: X86_FEATURE_ALWAYS X86_FEATURE_ERMS X86_FEATURE_LFENCE_RDTSC X86_FEATURE_MFENCE_RDTSC X86_FEATURE_SMAP This is with the latest version of the patchset. Of course, on each machine the alternatives sites actually being patched are a proper subset of the total number." Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c128
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
-rw-r--r--tools/perf/util/include/asm/alternative-asm.h1
6 files changed, 73 insertions, 72 deletions
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d66ab79..8c0c1a2 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,12 +1,12 @@
-MEMCPY_FN(__memcpy,
+MEMCPY_FN(memcpy_orig,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
-MEMCPY_FN(memcpy_c,
+MEMCPY_FN(__memcpy,
"x86-64-movsq",
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
-MEMCPY_FN(memcpy_c_e,
+MEMCPY_FN(memcpy_erms,
"x86-64-movsb",
"movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index fcd9cf0..e4c2c30 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,8 +1,6 @@
#define memcpy MEMCPY /* don't hide glibc's memcpy() */
#define altinstr_replacement text
#define globl p2align 4; .globl
-#define Lmemcpy_c globl memcpy_c; memcpy_c
-#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
#include "../../../arch/x86/lib/memcpy_64.S"
/*
* We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db1d3a2..d3dfb79 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -36,7 +36,7 @@ static const struct option options[] = {
"Specify length of memory to copy. "
"Available units: B, KB, MB, GB and TB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
- "Specify routine to copy"),
+ "Specify routine to copy, \"all\" runs all available routines"),
OPT_INTEGER('i', "iterations", &iterations,
"repeat memcpy() invocation this number of times"),
OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@ struct bench_mem_info {
const char *const *usage;
};
-static int bench_mem_common(int argc, const char **argv,
- const char *prefix __maybe_unused,
- struct bench_mem_info *info)
+static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
{
- int i;
- size_t len;
- double totallen;
+ const struct routine *r = &info->routines[r_idx];
double result_bps[2];
u64 result_cycle[2];
- argc = parse_options(argc, argv, options,
- info->usage, 0);
-
- if (no_prefault && only_prefault) {
- fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
- return 1;
- }
-
- if (use_cycle)
- init_cycle();
-
- len = (size_t)perf_atoll((char *)length_str);
- totallen = (double)len * iterations;
-
result_cycle[0] = result_cycle[1] = 0ULL;
result_bps[0] = result_bps[1] = 0.0;
- if ((s64)len <= 0) {
- fprintf(stderr, "Invalid length:%s\n", length_str);
- return 1;
- }
-
- /* same to without specifying either of prefault and no-prefault */
- if (only_prefault && no_prefault)
- only_prefault = no_prefault = false;
-
- for (i = 0; info->routines[i].name; i++) {
- if (!strcmp(info->routines[i].name, routine))
- break;
- }
- if (!info->routines[i].name) {
- printf("Unknown routine:%s\n", routine);
- printf("Available routines...\n");
- for (i = 0; info->routines[i].name; i++) {
- printf("\t%s ... %s\n",
- info->routines[i].name, info->routines[i].desc);
- }
- return 1;
- }
+ printf("Routine %s (%s)\n", r->name, r->desc);
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv,
if (!only_prefault && !no_prefault) {
/* show both of results */
if (use_cycle) {
- result_cycle[0] =
- info->do_cycle(&info->routines[i], len, false);
- result_cycle[1] =
- info->do_cycle(&info->routines[i], len, true);
+ result_cycle[0] = info->do_cycle(r, len, false);
+ result_cycle[1] = info->do_cycle(r, len, true);
} else {
- result_bps[0] =
- info->do_gettimeofday(&info->routines[i],
- len, false);
- result_bps[1] =
- info->do_gettimeofday(&info->routines[i],
- len, true);
+ result_bps[0] = info->do_gettimeofday(r, len, false);
+ result_bps[1] = info->do_gettimeofday(r, len, true);
}
} else {
- if (use_cycle) {
- result_cycle[pf] =
- info->do_cycle(&info->routines[i],
- len, only_prefault);
- } else {
- result_bps[pf] =
- info->do_gettimeofday(&info->routines[i],
- len, only_prefault);
- }
+ if (use_cycle)
+ result_cycle[pf] = info->do_cycle(r, len, only_prefault);
+ else
+ result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
}
switch (bench_format) {
@@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv,
die("unknown format: %d\n", bench_format);
break;
}
+}
+
+static int bench_mem_common(int argc, const char **argv,
+ const char *prefix __maybe_unused,
+ struct bench_mem_info *info)
+{
+ int i;
+ size_t len;
+ double totallen;
+
+ argc = parse_options(argc, argv, options,
+ info->usage, 0);
+
+ if (no_prefault && only_prefault) {
+ fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+ return 1;
+ }
+
+ if (use_cycle)
+ init_cycle();
+
+ len = (size_t)perf_atoll((char *)length_str);
+ totallen = (double)len * iterations;
+
+ if ((s64)len <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ /* same to without specifying either of prefault and no-prefault */
+ if (only_prefault && no_prefault)
+ only_prefault = no_prefault = false;
+
+ if (!strncmp(routine, "all", 3)) {
+ for (i = 0; info->routines[i].name; i++)
+ __bench_mem_routine(info, i, len, totallen);
+ return 0;
+ }
+
+ for (i = 0; info->routines[i].name; i++) {
+ if (!strcmp(info->routines[i].name, routine))
+ break;
+ }
+ if (!info->routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; info->routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ info->routines[i].name, info->routines[i].desc);
+ }
+ return 1;
+ }
+
+ __bench_mem_routine(info, i, len, totallen);
return 0;
}
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index a71dff9..f02d028 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,12 +1,12 @@
-MEMSET_FN(__memset,
+MEMSET_FN(memset_orig,
"x86-64-unrolled",
"unrolled memset() in arch/x86/lib/memset_64.S")
-MEMSET_FN(memset_c,
+MEMSET_FN(__memset,
"x86-64-stosq",
"movsq-based memset() in arch/x86/lib/memset_64.S")
-MEMSET_FN(memset_c_e,
+MEMSET_FN(memset_erms,
"x86-64-stosb",
"movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 9e5af89..de27878 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,8 +1,6 @@
#define memset MEMSET /* don't hide glibc's memset() */
#define altinstr_replacement text
#define globl p2align 4; .globl
-#define Lmemset_c globl memset_c; memset_c
-#define Lmemset_c_e globl memset_c_e; memset_c_e
#include "../../../arch/x86/lib/memset_64.S"
/*
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
index 6789d788..3a3a0f1 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -4,5 +4,6 @@
/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
#define altinstruction_entry #
+#define ALTERNATIVE_2 #
#endif
OpenPOWER on IntegriCloud