From 1d321881afb955bba1e0a8f50d3a04e111fcb581 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 28 Mar 2011 00:46:11 +0400 Subject: perf, x86: P4 PMU - clean up the code a bit No change on the functional level, just align the table properly. Signed-off-by: Cyrill Gorcunov Cc: Lin Ming LKML-Reference: <4D8FA213.5050108@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index c2520e1..8ff882f 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { -- cgit v1.1 From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 16 Mar 2011 17:29:47 -0400 Subject: jump label: Introduce static_branch() interface Introduce: static __always_inline bool static_branch(struct jump_label_key *key); instead of the old JUMP_LABEL(key, label) macro. In this way, jump labels become really easy to use: Define: struct jump_label_key jump_key; Can be used as: if (static_branch(&jump_key)) do unlikely code enable/disale via: jump_label_inc(&jump_key); jump_label_dec(&jump_key); that's it! For the jump labels disabled case, the static_branch() becomes an atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(), atomic_dec() operations. We show testing results for this change below. Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct. Since we now require a 'struct jump_label_key *key', we can store a pointer into the jump table addresses. In this way, we can enable/disable jump labels, in basically constant time. This change allows us to completely remove the previous hashtable scheme. Thanks to Peter Zijlstra for this re-write. Testing: I ran a series of 'tbench 20' runs 5 times (with reboots) for 3 configurations, where tracepoints were disabled. jump label configured in avg: 815.6 jump label *not* configured in (using atomic reads) avg: 800.1 jump label *not* configured in (regular reads) avg: 803.4 Signed-off-by: Peter Zijlstra LKML-Reference: <20110316212947.GA8792@redhat.com> Signed-off-by: Jason Baron Suggested-by: H. Peter Anvin Tested-by: David Daney Acked-by: Ralf Baechle Acked-by: David S. Miller Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- arch/mips/include/asm/jump_label.h | 22 ++++++++++++---------- arch/sparc/include/asm/jump_label.h | 25 ++++++++++++++----------- arch/x86/include/asm/alternative.h | 3 +-- arch/x86/include/asm/jump_label.h | 26 +++++++++++++++----------- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/module.c | 1 + 6 files changed, 44 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 7622ccf..1881b31 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,16 +20,18 @@ #define WORD_INSN ".word" #endif -#define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:\tnop\n\t" \ - "nop\n\t" \ - ".pushsection __jump_table, \"a\"\n\t" \ - WORD_INSN " 1b, %l[" #label "], %0\n\t" \ - ".popsection\n\t" \ - : : "i" (key) : : label); \ - } while (0) - +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:\tnop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 427d468..fc73a82 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,17 +7,20 @@ #define JUMP_LABEL_NOP_SIZE 4 -#define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:\n\t" \ - "nop\n\t" \ - "nop\n\t" \ - ".pushsection __jump_table, \"a\"\n\t"\ - ".align 4\n\t" \ - ".word 1b, %l[" #label "], %c0\n\t" \ - ".popsection \n\t" \ - : : "i" (key) : : label);\ - } while (0) +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13009d1..8cdd1e2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,7 +4,6 @@ #include #include #include -#include #include /* @@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); extern void text_poke_smp_batch(struct text_poke_param *params, int n); -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL) #define IDEAL_NOP_SIZE_5 5 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; extern void arch_init_ideal_nop5(void); diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 574dbc2..f217cee 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -5,20 +5,24 @@ #include #include +#include #define JUMP_LABEL_NOP_SIZE 5 -# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" - -# define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:" \ - JUMP_LABEL_INITIAL_NOP \ - ".pushsection __jump_table, \"aw\" \n\t"\ - _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ - ".popsection \n\t" \ - : : "i" (key) : : label); \ - } while (0) +#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:" + JUMP_LABEL_INITIAL_NOP + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4a23467..651454b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -679,7 +679,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); } -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL) #ifdef CONFIG_X86_64 unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index ab23f1a..52f256f 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From ef64789413c73f32faa5e5f1bc393e5843b0aa51 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 16 Mar 2011 15:58:27 -0400 Subject: jump label: Add _ASM_ALIGN for x86 and x86_64 The linker should not be adding holes to word size aligned pointers, but out of paranoia we are explicitly specifying that alignment. I have not seen any holes in the jump label section in practice. Signed-off-by: Jason Baron LKML-Reference: Acked-by: Peter Zijlstra Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- arch/x86/include/asm/jump_label.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index f217cee..a32b18c 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,6 +16,7 @@ static __always_inline bool arch_static_branch(struct jump_label_key *key) asm goto("1:" JUMP_LABEL_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" ".popsection \n\t" : : "i" (key) : : l_yes); -- cgit v1.1 From 5373db886b791b2bc7811e2c115377916c409a5d Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 16 Mar 2011 15:58:30 -0400 Subject: jump label: Add s390 support Implement the architecture backend for jump label support on s390. For a shared kernel booted from a NSS silently disable jump labels because the NSS is read-only. Therefore jump labels will be disabled in a shared kernel and can't be activated. Signed-off-by: Jan Glauber LKML-Reference: <6935d2c41ce111e1719176ed4bbd3dbe4de80855.1300299760.git.jbaron@redhat.com> Acked-by: Peter Zijlstra Signed-off-by: Martin Schwidefsky Signed-off-by: Steven Rostedt --- arch/s390/Kconfig | 1 + arch/s390/include/asm/jump_label.h | 37 ++++++++++++++++++++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/jump_label.c | 59 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 arch/s390/include/asm/jump_label.h create mode 100644 arch/s390/kernel/jump_label.c (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2508a6f..4a7f140 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -88,6 +88,7 @@ config S390 select HAVE_KERNEL_XZ select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX + select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h new file mode 100644 index 0000000..95a6cf2 --- /dev/null +++ b/arch/s390/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_S390_JUMP_LABEL_H +#define _ASM_S390_JUMP_LABEL_H + +#include + +#define JUMP_LABEL_NOP_SIZE 6 + +#ifdef CONFIG_64BIT +#define ASM_PTR ".quad" +#define ASM_ALIGN ".balign 8" +#else +#define ASM_PTR ".long" +#define ASM_ALIGN ".balign 4" +#endif + +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("0: brcl 0,0\n" + ".pushsection __jump_table, \"aw\"\n" + ASM_ALIGN "\n" + ASM_PTR " 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (key) : : label); + return false; +label: + return true; +} + +typedef unsigned long jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 64230bc..5ff15da 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o sysinfo.o nmi.o sclp.o + vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 0000000..44cc06b --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,59 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber + */ +#include +#include +#include +#include +#include + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + unsigned long *target; + struct insn *insn; + ssize_t size; +}; + +static int __arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + int rc; + + rc = probe_kernel_write(args->target, args->insn, args->size); + WARN_ON_ONCE(rc < 0); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + struct insn insn; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + args.target = (void *) entry->code; + args.insn = &insn; + args.size = JUMP_LABEL_NOP_SIZE; + + stop_machine(__arch_jump_label_transform, &args, NULL); +} + +#endif -- cgit v1.1 From c8e5910edf8bbe2e5c6c35a4ef2a578cc7893b25 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Sat, 16 Apr 2011 02:27:55 +0200 Subject: perf, x86: Use ALTERNATIVE() to check for X86_FEATURE_PERFCTR_CORE Using ALTERNATIVE() when checking for X86_FEATURE_PERFCTR_CORE avoids an extra pointer chase and data cache hit. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1302913676-14352-4-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index eed3673a..224a84f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,6 +31,7 @@ #include #include #include +#include #if 0 #undef wrmsrl @@ -363,12 +364,18 @@ again: return new_raw_count; } -/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ static inline int x86_pmu_addr_offset(int index) { - if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) - return index << 1; - return index; + int offset; + + /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ + alternative_io(ASM_NOP2, + "shll $1, %%eax", + X86_FEATURE_PERFCTR_CORE, + "=a" (offset), + "a" (index)); + + return offset; } static inline unsigned int x86_pmu_config_addr(int index) -- cgit v1.1 From 103b3934817a7c42fba6e1ef76ecb390a2837d40 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 21 Apr 2011 11:03:20 -0400 Subject: perf, x86: P4 PMU -- Use perf_sample_data_init helper Instead of opencoded assignments better to use perf_sample_data_init helper. Tested-by: Lin Ming Signed-off-by: Cyrill Gorcunov Signed-off-by: Don Zickus Cc: Cyrill Gorcunov Link: http://lkml.kernel.org/r/1303398203-2918-2-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 8ff882f..ae31e96 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - data.addr = 0; - data.raw = NULL; + perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); -- cgit v1.1 From fa7b69475a6c192853949ba496dd9c37b497b548 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Fri, 22 Apr 2011 10:08:52 -0700 Subject: perf events, x86, P4: Fix typo in comment Signed-off-by: Justin P. Mattock Acked-by: Cyrill Gorcunov Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1303492132-3004-1-git-send-email-justinmattock@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae31e96..f4c1da2 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1187,7 +1187,7 @@ static __init int p4_pmu_init(void) { unsigned int low, high; - /* If we get stripped -- indexig fails */ + /* If we get stripped -- indexing fails */ BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); rdmsr(MSR_IA32_MISC_ENABLE, low, high); -- cgit v1.1 From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Apr 2011 08:18:31 +0200 Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate cycles the CPU does nothing useful, because it is stalled on a cache-miss or some other condition. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9ae4a2a..efa2704 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1413,6 +1413,9 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; + /* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2; + if (ebx & 0x40) { /* * Erratum AAJ80 detected, we work it around by using -- cgit v1.1 From 5c543e3c442d6382db127152c7096ca6a55283de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Apr 2011 12:02:04 +0200 Subject: perf events, x86: Mark constrant tables read mostly Various constraint tables were not marked read-mostly. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-wpqwwvmhxucy5e718wnamjiv@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index efa2704..067a48b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static struct event_constraint intel_core_event_constraints[] = +static struct event_constraint intel_core_event_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ @@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_core2_event_constraints[] = +static struct event_constraint intel_core2_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_event_constraints[] = +static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct extra_reg intel_nehalem_extra_regs[] = +static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), EVENT_EXTRA_END }; -static struct event_constraint intel_nehalem_percore_constraints[] = +static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0xb7, 0), EVENT_CONSTRAINT_END }; -static struct event_constraint intel_westmere_event_constraints[] = +static struct event_constraint intel_westmere_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_snb_event_constraints[] = +static struct event_constraint intel_snb_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct extra_reg intel_westmere_extra_regs[] = +static struct extra_reg intel_westmere_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), EVENT_EXTRA_END }; -static struct event_constraint intel_westmere_percore_constraints[] = +static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0xb7, 0), INTEL_EVENT_CONSTRAINT(0xbb, 0), EVENT_CONSTRAINT_END }; -static struct event_constraint intel_gen_event_constraints[] = +static struct event_constraint intel_gen_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ -- cgit v1.1 From 8a850cadca0e387c87a0911a61e99fd66aeb57ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Apr 2011 11:16:44 +0200 Subject: perf event, x86: Use better stalled cycles metric Use the UOPS_EXECUTED.*,c=1,i=1 event on Intel CPUs - it is a rather good indicator of CPU execution stalls, more sensitive and more inclusive than the 0xa2 resource stalls event (which does not count nearly as many stall types). Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn2dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 067a48b..1ea9422 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1413,8 +1413,8 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; - /* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2; + /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1; if (ebx & 0x40) { /* -- cgit v1.1 From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 13:19:47 +0200 Subject: perf events: Add generic front-end and back-end stalled cycle event definitions Add two generic hardware events: front-end and back-end stalled cycles. These events measure conditions when the CPU is executing code but its capabilities are not fully utilized. Understanding such situations and analyzing them is an important sub-task of code optimization workflows. Both events limit performance: most front end stalls tend to be caused by branch misprediction or instruction fetch cachemisses, backend stalls can be caused by various resource shortages or inefficient instruction scheduling. Front-end stalls are the more important ones: code cannot run fast if the instruction stream is not being kept up. An over-utilized back-end can cause front-end stalls and thus has to be kept an eye on as well. The exact composition is very program logic and instruction mix dependent. We use the terms 'stall', 'front-end' and 'back-end' loosely and try to use the best available events from specific CPUs that approximate these concepts. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1ea9422..393085b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_nehalem_extra_regs; /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; if (ebx & 0x40) { /* -- cgit v1.1 From 91fc4cc00099986bc1ba50e1f421c3548cffae42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 14:17:19 +0200 Subject: perf, x86: Add new stalled cycles events for Intel and AMD CPUs Extend the Intel and AMD event definitions with generic front-end and back-end stall events. ( These are only approximations - suggestions are welcome for better events. ) Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n001io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 14 ++++++++------ arch/x86/kernel/cpu/perf_event_intel.c | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index cf4e369..fe29c1d 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids */ static const u64 amd_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ }; static u64 amd_pmu_event_map(int hw_event) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 393085b..7983b9a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1413,7 +1413,9 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; - /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; if (ebx & 0x40) { -- cgit v1.1 From 301120396b766ae4480e52ece220516a1707822b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 30 Apr 2011 09:14:54 +0200 Subject: perf events, x86: Add Westmere stalled-cycles-frontend/backend events Extend the Intel Westmere PMU driver with definitions for generic front-end and back-end stall events. ( These are only approximations. ) Reported-by: David Ahern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n008io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7983b9a..be8363a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1458,6 +1458,12 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; x86_pmu.extra_regs = intel_westmere_extra_regs; + + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + pr_cont("Westmere events, "); break; -- cgit v1.1 From e04d1b23f9706186187dcb0be1a752e48dcc540b Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 6 May 2011 07:14:02 +0000 Subject: perf events, x86: Add SandyBridge stalled-cycles-frontend/backend events Extend the Intel SandyBridge PMU driver with definitions for generic front-end and back-end stall events. ( As commit 3011203 "perf events, x86: Add Westmere stalled-cycles-frontend/backend events" says, these are only approximations. ) Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1304666042-17577-1-git-send-email-ming.m.lin@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e61539b..7cf2ec5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1474,6 +1474,12 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_events; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; + pr_cont("SandyBridge events, "); break; -- cgit v1.1 From 449a66fd1fa75d36dca917704827c40c8f416bca Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 12 May 2011 15:11:12 +0200 Subject: x86: Remove warning and warning_symbol from struct stacktrace_ops Both warning and warning_symbol are nowhere used. Let's get rid of them. Signed-off-by: Richard Weinberger Cc: Oleg Nesterov Cc: Andrew Morton Cc: Huang Ying Cc: Soeren Sandmann Pedersen Cc: Namhyung Kim Cc: x86 Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Robert Richter Cc: Paul Mundt Link: http://lkml.kernel.org/r/1305205872-10321-2-git-send-email-richard@nod.at Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/stacktrace.h | 3 --- arch/x86/kernel/cpu/perf_event.c | 13 ------------- arch/x86/kernel/dumpstack.c | 16 ---------------- arch/x86/kernel/stacktrace.c | 13 ------------- arch/x86/oprofile/backtrace.c | 13 ------------- 5 files changed, 58 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index d7e89c8..70bbe39 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo, /* Generic stack tracer with callbacks */ struct stacktrace_ops { - void (*warning)(void *data, char *msg); - /* msg must contain %s for the symbol */ - void (*warning_symbol)(void *data, char *msg, unsigned long symbol); void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0de6b2b..3a0338b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1773,17 +1773,6 @@ static struct pmu pmu = { * callchain support */ -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - static int backtrace_stack(void *data, char *name) { return 0; @@ -1797,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, .walk_stack = print_context_stack_bp, diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e2a3f06..f478ff6 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo, } EXPORT_SYMBOL_GPL(print_context_stack_bp); - -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - static int print_trace_stack(void *data, char *name) { printk("%s <%s> ", (char *)data, name); @@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, .address = print_trace_address, .walk_stack = print_context_stack, diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6515733..55d9bc0 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -9,15 +9,6 @@ #include #include -static void save_stack_warning(void *data, char *msg) -{ -} - -static void -save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) -{ -} - static int save_stack_stack(void *data, char *name) { return 0; @@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, .address = save_stack_address, .walk_stack = print_context_stack, }; static const struct stacktrace_ops save_stack_ops_nosched = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, .address = save_stack_address_nosched, .walk_stack = print_context_stack, diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e..a5b64ab 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -16,17 +16,6 @@ #include #include -static void backtrace_warning_symbol(void *data, char *msg, - unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - static int backtrace_stack(void *data, char *name) { /* Yes, we want all stacks */ @@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, .walk_stack = print_context_stack, -- cgit v1.1 From 2895cd2ab81dfb7bc22637bc110857db44a30b4a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 13 Apr 2011 16:43:29 -0400 Subject: ftrace/x86: Do not trace .discard.text section The section called .discard.text has tracing attached to it and is currently ignored by ftrace. But it does include a call to the mcount stub. Adding a notrace to the code keeps gcc from adding the useless mcount caller to it. Link: http://lkml.kernel.org/r/20110421023739.243651696@goodmis.org Signed-off-by: Steven Rostedt --- arch/x86/include/asm/setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index db8aa19..647d8a0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used \ + static void __section(.discard.text) __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ -- cgit v1.1 From 521ccb5c4aece609311bfa7157910a8f0c942af5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 10 May 2011 10:10:41 +0200 Subject: ftrace/x86: mcount offset calculation Do the mcount offset adjustment in the recordmcount.pl/recordmcount.[ch] at compile time and not in ftrace_call_adjust at run time. Signed-off-by: Martin Schwidefsky Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index db24c22..268c783 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -38,11 +38,10 @@ extern void mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* - * call mcount is "e8 <4 byte offset>" - * The addr points to the 4 byte offset and the caller of this - * function wants the pointer to e8. Simply subtract one. + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. */ - return addr - 1; + return addr; } #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.1 From f29638868280534ed7e2fdd93b31557232597940 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 10 May 2011 10:10:43 +0200 Subject: ftrace/s390: mcount offset calculation Do the mcount offset adjustment in the recordmcount.pl/recordmcount.[ch] at compile time and not in ftrace_call_adjust at run time. Signed-off-by: Martin Schwidefsky Signed-off-by: Steven Rostedt --- arch/s390/include/asm/ftrace.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3c29be4..b7931fa 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -11,15 +11,13 @@ struct dyn_arch_ftrace { }; #ifdef CONFIG_64BIT #define MCOUNT_INSN_SIZE 12 -#define MCOUNT_OFFSET 8 #else #define MCOUNT_INSN_SIZE 20 -#define MCOUNT_OFFSET 4 #endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr - MCOUNT_OFFSET; + return addr; } #endif /* __ASSEMBLY__ */ -- cgit v1.1 From 2494b030ba9334c7dd7df9b9f7abe4eacc950ec5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 12:33:26 -0700 Subject: x86, cpufeature: Fix cpuid leaf 7 feature detection CPUID leaf 7, subleaf 0 returns the maximum subleaf in EAX, not the number of subleaves. Since so far only subleaf 0 is defined (and only the EBX bitfield) we do not need to qualify the test. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305660806-17519-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin Cc: 2.6.36..39 --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2ced00..173f3a3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -565,8 +565,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - if (eax > 0) - c->x86_capability[9] = ebx; + c->x86_capability[9] = ebx; } /* AMD-defined flags: level 0x80000001 */ -- cgit v1.1 From 724a92ee45c04cb9d82884a856b03b1e594d9de1 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:10 -0700 Subject: x86, cpufeature: Add CPU feature bit for enhanced REP MOVSB/STOSB Intel processors are adding enhancements to REP MOVSB/STOSB and the use of REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended. Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/ STOSB). Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-2-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 91f3e087..7f2f7b1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -195,6 +195,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) -- cgit v1.1 From 161ec53c702ce9df2f439804dfb9331807066daa Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:11 -0700 Subject: x86, mem, intel: Initialize Enhanced REP MOVSB/STOSB If kernel intends to use enhanced REP MOVSB/STOSB, it must ensure IA32_MISC_ENABLE.Fast_String_Enable (bit 0) is set and CPUID.(EAX=07H, ECX=0H): EBX[bit 9] also reports 1. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-3-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df86bc8..fc73a34 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,10 +29,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { + u64 misc_enable; + /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { @@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) * (model 2) with the same problem. */ if (c->x86 == 15) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { @@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) } } #endif + + /* + * If fast string is not enabled in IA32_MISC_ENABLE for any reason, + * clear the fast string and enhanced fast string CPU capabilities. + */ + if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { + printk(KERN_INFO "Disabled fast string operations\n"); + setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); + setup_clear_cpu_cap(X86_FEATURE_ERMS); + } + } } #ifdef CONFIG_X86_32 -- cgit v1.1 From 509731336313b3799cf03071d72c64fa6383895e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:12 -0700 Subject: x86, alternative, doc: Add comment for applying alternatives order Some string operation functions may be patched twice, e.g. on enhanced REP MOVSB /STOSB processors, memcpy is patched first by fast string alternative function, then it is patched by enhanced REP MOVSB/STOSB alternative function. Add comment for applying alternatives order to warn people who may change the applying alternatives order for any reason. [ Documentation-only patch ] Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-4-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4a23467..f4fe15d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -210,6 +210,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start, u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite a previous scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); -- cgit v1.1 From 9072d11da15a71e086eab3b5085184f2c1d06913 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:13 -0700 Subject: x86, alternative: Add altinstruction_entry macro Add altinstruction_entry macro to generate .altinstructions section entries from assembly code. This should be less failure-prone than open-coding. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-5-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative-asm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index a63a68b..94d420b 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -15,4 +15,13 @@ .endm #endif +.macro altinstruction_entry orig alt feature orig_len alt_len + .align 8 + .quad \orig + .quad \alt + .word \feature + .byte \orig_len + .byte \alt_len +.endm + #endif /* __ASSEMBLY__ */ -- cgit v1.1 From e365c9df2f2f001450decf9512412d2d5bd1cdef Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:14 -0700 Subject: x86, mem: clear_page_64.S: Support clear_page() with enhanced REP MOVSB/STOSB Intel processors are adding enhancements to REP MOVSB/STOSB and the use of REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended. Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/ STOSB). Support clear_page() with rep stosb for processor supporting enhanced REP MOVSB /STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative clear_page_c_e function using enhanced REP STOSB overrides the original function and the fast string function. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-6-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/clear_page_64.S | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326b..f2145cf 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,6 @@ #include #include +#include /* * Zero a page. @@ -14,6 +15,15 @@ ENTRY(clear_page_c) CFI_ENDPROC ENDPROC(clear_page_c) +ENTRY(clear_page_c_e) + CFI_STARTPROC + movl $4096,%ecx + xorl %eax,%eax + rep stosb + ret + CFI_ENDPROC +ENDPROC(clear_page_c_e) + ENTRY(clear_page) CFI_STARTPROC xorl %eax,%eax @@ -38,21 +48,26 @@ ENTRY(clear_page) .Lclear_page_end: ENDPROC(clear_page) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ + /* + * Some CPUs support enhanced REP MOVSB/STOSB instructions. + * It is recommended to use this when possible. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original function. + * + */ #include .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: +2: .byte 0xeb /* jmp */ + .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ +3: .previous .section .altinstructions,"a" - .align 8 - .quad clear_page - .quad 1b - .word X86_FEATURE_REP_GOOD - .byte .Lclear_page_end - clear_page - .byte 2b - 1b + altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ + .Lclear_page_end-clear_page, 2b-1b + altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ + .Lclear_page_end-clear_page,3b-2b .previous -- cgit v1.1 From 4307bec9344aed83f8107c3eb4285bd9d218fc10 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:15 -0700 Subject: x86, mem: copy_user_64.S: Support copy_to/from_user by enhanced REP MOVSB/STOSB Support copy_to_user/copy_from_user() by enhanced REP MOVSB/STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative copy_user_enhanced_fast_string function using enhanced rep movsb overrides the original function and the fast string function. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-7-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/copy_user_64.S | 65 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 99e4826..d17a117 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -15,23 +15,30 @@ #include #include #include +#include - .macro ALTERNATIVE_JUMP feature,orig,alt +/* + * By placing feature2 after feature1 in altinstructions section, we logically + * implement: + * If CPU has feature2, jmp to alt2 is used + * else if CPU has feature1, jmp to alt1 is used + * else jmp to orig is used. + */ + .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" 2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt-1b /* offset */ /* or alternatively to alt */ + .long \alt1-1b /* offset */ /* or alternatively to alt1 */ +3: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt2-1b /* offset */ /* or alternatively to alt2 */ .previous + .section .altinstructions,"a" - .align 8 - .quad 0b - .quad 2b - .word \feature /* when feature is set */ - .byte 5 - .byte 5 + altinstruction_entry 0b,2b,\feature1,5,5 + altinstruction_entry 0b,3b,\feature2,5,5 .previous .endm @@ -73,7 +80,9 @@ ENTRY(_copy_to_user) jc bad_to_user cmpq TI_addr_limit(%rax),%rcx jae bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_to_user) @@ -86,7 +95,9 @@ ENTRY(_copy_from_user) jc bad_from_user cmpq TI_addr_limit(%rax),%rcx jae bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_from_user) @@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string) .previous CFI_ENDPROC ENDPROC(copy_user_generic_string) + +/* + * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. + * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +ENTRY(copy_user_enhanced_fast_string) + CFI_STARTPROC + andl %edx,%edx + jz 2f + movl %edx,%ecx +1: rep + movsb +2: xorl %eax,%eax + ret + + .section .fixup,"ax" +12: movl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + .section __ex_table,"a" + .align 8 + .quad 1b,12b + .previous + CFI_ENDPROC +ENDPROC(copy_user_enhanced_fast_string) -- cgit v1.1 From 101068c1f4a947ffa08f2782c78e40097300754d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:16 -0700 Subject: x86, mem: memcpy_64.S: Optimize memcpy by enhanced REP MOVSB/STOSB Support memcpy() with enhanced rep movsb. On processors supporting enhanced rep movsb, the alternative memcpy() function using enhanced rep movsb overrides the original function and the fast string function. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-8-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/memcpy_64.S | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e..daab21d 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,6 +4,7 @@ #include #include +#include /* * memcpy - Copy a memory block. @@ -37,6 +38,23 @@ .Lmemcpy_e: .previous +/* + * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than + * memcpy_c. Use memcpy_c_e when possible. + * + * This gets patched over the unrolled variant (below) via the + * alternative instructions framework: + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c_e: + movq %rdi, %rax + + movl %edx, %ecx + rep movsb + ret +.Lmemcpy_e_e: + .previous + ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -171,21 +189,22 @@ ENDPROC(memcpy) ENDPROC(__memcpy) /* - * Some CPUs run faster using the string copy instructions. - * It is also a lot simpler. Use this when possible: - */ - - .section .altinstructions, "a" - .align 8 - .quad memcpy - .quad .Lmemcpy_c - .word X86_FEATURE_REP_GOOD - - /* + * Some CPUs are adding enhanced REP MOVSB/STOSB feature + * If the feature is supported, memcpy_c_e() is the first choice. + * If enhanced rep movsb copy is not available, use fast string copy + * memcpy_c() when possible. This is faster and code is simpler than + * original memcpy(). + * Otherwise, original memcpy() is used. + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + * * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte .Lmemcpy_e - .Lmemcpy_c - .byte .Lmemcpy_e - .Lmemcpy_c + .section .altinstructions, "a" + altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ + .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c + altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ + .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e .previous -- cgit v1.1 From 057e05c1d6440117875f455e59da8691e08f65d5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:17 -0700 Subject: x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB Support memmove() by enhanced rep movsb. On processors supporting enhanced REP MOVSB/STOSB, the alternative memmove() function using enhanced rep movsb overrides the original function. The patch doesn't change the backward memmove case to use enhanced rep movsb. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-9-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/memmove_64.S | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb843..d0ec9c2 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,6 +8,7 @@ #define _STRING_C #include #include +#include #undef memmove @@ -24,6 +25,7 @@ */ ENTRY(memmove) CFI_STARTPROC + /* Handle more 32bytes in loop */ mov %rdi, %rax cmp $0x20, %rdx @@ -31,8 +33,13 @@ ENTRY(memmove) /* Decide forward/backward copy mode */ cmp %rdi, %rsi - jb 2f + jge .Lmemmove_begin_forward + mov %rsi, %r8 + add %rdx, %r8 + cmp %rdi, %r8 + jg 2f +.Lmemmove_begin_forward: /* * movsq instruction have many startup latency * so we handle small size by general register. @@ -78,6 +85,8 @@ ENTRY(memmove) rep movsq movq %r11, (%r10) jmp 13f +.Lmemmove_end_forward: + /* * Handle data backward by movsq. */ @@ -194,4 +203,22 @@ ENTRY(memmove) 13: retq CFI_ENDPROC + + .section .altinstr_replacement,"ax" +.Lmemmove_begin_forward_efs: + /* Forward moving data. */ + movq %rdx, %rcx + rep movsb + retq +.Lmemmove_end_forward_efs: + .previous + + .section .altinstructions,"a" + .align 8 + .quad .Lmemmove_begin_forward + .quad .Lmemmove_begin_forward_efs + .word X86_FEATURE_ERMS + .byte .Lmemmove_end_forward-.Lmemmove_begin_forward + .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs + .previous ENDPROC(memmove) -- cgit v1.1 From 2f19e06ac30771c7cb96fd61d8aeacfa74dac21c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:18 -0700 Subject: x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB Support memset() with enhanced rep stosb. On processors supporting enhanced REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb overrides the fast string alternative memset_c and the original function. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/memset_64.S | 54 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d3442..79bd454 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -2,9 +2,13 @@ #include #include +#include +#include /* - * ISO C memset - set a memory block to a byte value. + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well. * * rdi destination * rsi value (char) @@ -31,6 +35,28 @@ .Lmemset_e: .previous +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c_e: + movq %rdi,%r9 + movb %sil,%al + movl %edx,%ecx + rep stosb + movq %r9,%rax + ret +.Lmemset_e_e: + .previous + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -112,16 +138,20 @@ ENTRY(__memset) ENDPROC(memset) ENDPROC(__memset) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include - + /* Some CPUs support enhanced REP MOVSB/STOSB feature. + * It is recommended to use this when possible. + * + * If enhanced REP MOVSB/STOSB feature is not available, use fast string + * instructions. + * + * Otherwise, use original memset function. + * + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + */ .section .altinstructions,"a" - .align 8 - .quad memset - .quad .Lmemset_c - .word X86_FEATURE_REP_GOOD - .byte .Lfinal - memset - .byte .Lmemset_e - .Lmemset_c + altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-memset,.Lmemset_e-.Lmemset_c + altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e .previous -- cgit v1.1 From 26afb7c661080ae3f1f13ddf7f0c58c4f931c22b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 May 2011 16:30:30 +0200 Subject: x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit As reported in BZ #30352: https://bugzilla.kernel.org/show_bug.cgi?id=30352 there's a kernel bug related to reading the last allowed page on x86_64. The _copy_to_user() and _copy_from_user() functions use the following check for address limit: if (buf + size >= limit) fail(); while it should be more permissive: if (buf + size > limit) fail(); That's because the size represents the number of bytes being read/write from/to buf address AND including the buf address. So the copy function will actually never touch the limit address even if "buf + size == limit". Following program fails to use the last page as buffer due to the wrong limit check: #include #include #include #define PAGE_SIZE (4096) #define LAST_PAGE ((void*)(0x7fffffffe000)) int main() { int fds[2], err; void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); assert(ptr == LAST_PAGE); err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds); assert(err == 0); err = send(fds[0], ptr, PAGE_SIZE, 0); perror("send"); assert(err == PAGE_SIZE); err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL); perror("recv"); assert(err == PAGE_SIZE); return 0; } The other place checking the addr limit is the access_ok() function, which is working properly. There's just a misleading comment for the __range_not_ok() macro - which this patch fixes as well. The last page of the user-space address range is a guard page and Brian Gerst observed that the guard page itself due to an erratum on K8 cpus (#121 Sequential Execution Across Non-Canonical Boundary Causes Processor Hang). However, the test code is using the last valid page before the guard page. The bug is that the last byte before the guard page can't be read because of the off-by-one error. The guard page is left in place. This bug would normally not show up because the last page is part of the process stack and never accessed via syscalls. Signed-off-by: Jiri Olsa Acked-by: Brian Gerst Acked-by: Linus Torvalds Cc: Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 2 +- arch/x86/lib/copy_user_64.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index abd3e0e..99f0ad7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -42,7 +42,7 @@ * Returns 0 if the range is valid, nonzero otherwise. * * This is equivalent to the following test: - * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) + * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) * * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index d17a117..0248402 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -79,7 +79,7 @@ ENTRY(_copy_to_user) addq %rdx,%rcx jc bad_to_user cmpq TI_addr_limit(%rax),%rcx - jae bad_to_user + ja bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ copy_user_generic_unrolled,copy_user_generic_string, \ copy_user_enhanced_fast_string @@ -94,7 +94,7 @@ ENTRY(_copy_from_user) addq %rdx,%rcx jc bad_from_user cmpq TI_addr_limit(%rax),%rcx - jae bad_from_user + ja bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ copy_user_generic_unrolled,copy_user_generic_string, \ copy_user_enhanced_fast_string -- cgit v1.1