From 849620fab413355eff48232eac5a8c53c57615c5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 14 May 2009 17:10:52 +0200 Subject: Revert "oprofile: discover counters for op ppro too" This reverts commit 59512900baab03c5629f2ff5efad1d5d4e682ece. arch_perfmon_setup_counters() is actually never called for ppro, so there is no code that changes the numbers in op_ppro_spec. The patch as it is has no effect. Cc: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 8 +++----- arch/x86/oprofile/op_x86_model.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fb..2a12399 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -213,9 +213,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs) } -struct op_x86_model_spec op_ppro_spec = { - .num_counters = 2, /* can be overriden */ - .num_controls = 2, /* dito */ +struct op_x86_model_spec const op_ppro_spec = { + .num_counters = 2, + .num_controls = 2, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, @@ -251,8 +251,6 @@ void arch_perfmon_setup_counters(void) op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; - op_ppro_spec.num_counters = num_counters; - op_ppro_spec.num_controls = num_counters; } struct op_x86_model_spec op_arch_perfmon_spec = { diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 825e790..2317149 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -45,7 +45,7 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; -extern struct op_x86_model_spec op_ppro_spec; +extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; -- cgit v1.1 From e419294ed3c98cccc145202e4fe165bfd8099d63 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Sun, 12 Oct 2008 15:12:34 -0400 Subject: x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init The function arch_perfmon_init() in nmi_int.c is model specific. This patch moves it to op_model_ppro.c by using the init function pointer in struct op_x86_model_spec. Cc: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 21 +++++++++------------ arch/x86/oprofile/op_model_ppro.c | 9 ++++++++- arch/x86/oprofile/op_x86_model.h | 2 -- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3b285e6..dd85153 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -427,7 +427,7 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/core_2"; break; case 26: - arch_perfmon_setup_counters(); + model = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; break; case 28: @@ -442,16 +442,6 @@ static int __init ppro_init(char **cpu_type) return 1; } -static int __init arch_perfmon_init(char **cpu_type) -{ - if (!cpu_has_arch_perfmon) - return 0; - *cpu_type = "i386/arch_perfmon"; - model = &op_arch_perfmon_spec; - arch_perfmon_setup_counters(); - return 1; -} - /* in order to get sysfs right */ static int using_nmi; @@ -509,8 +499,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) break; } - if (!cpu_type && !arch_perfmon_init(&cpu_type)) + if (cpu_type) + break; + + if (!cpu_has_arch_perfmon) return -ENODEV; + + /* use arch perfmon as fallback */ + cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; break; default: diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 2a12399..ae58119 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -233,7 +233,7 @@ struct op_x86_model_spec const op_ppro_spec = { * the specific CPU. */ -void arch_perfmon_setup_counters(void) +static void arch_perfmon_setup_counters(void) { union cpuid10_eax eax; @@ -253,7 +253,14 @@ void arch_perfmon_setup_counters(void) op_arch_perfmon_spec.num_controls = num_counters; } +static int arch_perfmon_init(struct oprofile_operations *ignore) +{ + arch_perfmon_setup_counters(); + return 0; +} + struct op_x86_model_spec op_arch_perfmon_spec = { + .init = &arch_perfmon_init, /* num_counters/num_controls filled in at runtime */ .fill_in_addresses = &ppro_fill_in_addresses, /* user space does the cpuid check for available events */ diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 2317149..ed27783 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -51,6 +51,4 @@ extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; -extern void arch_perfmon_setup_counters(void); - #endif /* OP_X86_MODEL_H */ -- cgit v1.1 From 06552ccc36abeb12e37efc16c384dc7f30794f85 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 28 May 2009 02:12:36 +0200 Subject: x86/oprofile: minor style changes in struct op_x86_model_spec Some vertical alignments. Variables are now located in the beginning of the struct. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_x86_model.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index ed27783..bd8157d 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -32,17 +32,17 @@ struct pt_regs; * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { - int (*init)(struct oprofile_operations *ops); - void (*exit)(void); - unsigned int num_counters; - unsigned int num_controls; - void (*fill_in_addresses)(struct op_msrs * const msrs); - void (*setup_ctrs)(struct op_msrs const * const msrs); - int (*check_ctrs)(struct pt_regs * const regs, - struct op_msrs const * const msrs); - void (*start)(struct op_msrs const * const msrs); - void (*stop)(struct op_msrs const * const msrs); - void (*shutdown)(struct op_msrs const * const msrs); + unsigned int num_counters; + unsigned int num_controls; + int (*init)(struct oprofile_operations *ops); + void (*exit)(void); + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_msrs const * const msrs); + int (*check_ctrs)(struct pt_regs * const regs, + struct op_msrs const * const msrs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; -- cgit v1.1 From 9063759540daac40cc1f402f83a3be6b489f8583 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 10 Mar 2009 19:15:57 +0100 Subject: x86/oprofile: remove #ifdefs in ibs functions IBS code is moved to separate functions. This allows the removal of #ifdefs in functions. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 80 +++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8fdf06e..b54c088 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -220,6 +220,50 @@ op_amd_handle_ibs(struct pt_regs * const regs, return 1; } +static inline void op_amd_start_ibs(void) +{ + unsigned int low, high; + if (has_ibs && ibs_config.fetch_enabled) { + low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ + + IBS_FETCH_HIGH_ENABLE; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (has_ibs && ibs_config.op_enabled) { + low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ + + IBS_OP_LOW_ENABLE; + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } +} + +static void op_amd_stop_ibs(void) +{ + unsigned int low, high; + if (has_ibs && ibs_config.fetch_enabled) { + /* clear max count and enable */ + low = 0; + high = 0; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (has_ibs && ibs_config.op_enabled) { + /* clear max count and enable */ + low = 0; + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } +} + +#else + +static inline int op_amd_handle_ibs(struct pt_regs * const regs, + struct op_msrs const * const msrs) { } +static inline void op_amd_start_ibs(void) { } +static inline void op_amd_stop_ibs(void) { } + #endif static int op_amd_check_ctrs(struct pt_regs * const regs, @@ -238,9 +282,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, } } -#ifdef CONFIG_OPROFILE_IBS op_amd_handle_ibs(regs, msrs); -#endif /* See op_model_ppro.c */ return 1; @@ -258,25 +300,9 @@ static void op_amd_start(struct op_msrs const * const msrs) } } -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ - + IBS_FETCH_HIGH_ENABLE; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } - - if (has_ibs && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) - + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ - + IBS_OP_LOW_ENABLE; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif + op_amd_start_ibs(); } - static void op_amd_stop(struct op_msrs const * const msrs) { unsigned int low, high; @@ -294,21 +320,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) CTRL_WRITE(low, high, msrs, i); } -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } - - if (has_ibs && ibs_config.op_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif + op_amd_stop_ibs(); } static void op_amd_shutdown(struct op_msrs const * const msrs) -- cgit v1.1 From d20f24c66011f8a397bca6c5d1a6a7c7e612d2d7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Sun, 11 Jan 2009 13:01:16 +0100 Subject: x86/oprofile: simplify AMD cpu init code Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index dd85153..ae0ab03 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -460,27 +460,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) /* Needs to be at least an Athlon (or hammer in 32bit mode) */ switch (family) { - default: - return -ENODEV; case 6: - model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: - model = &op_amd_spec; - /* Actually it could be i386/hammer too, but give - user space an consistent name. */ + /* + * Actually it could be i386/hammer too, but + * give user space an consistent name. + */ cpu_type = "x86-64/hammer"; break; case 0x10: - model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: - model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; + default: + return -ENODEV; } + model = &op_amd_spec; break; case X86_VENDOR_INTEL: -- cgit v1.1 From ff9faa8b676e195476b86f03fe58db0f01bda8f3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 22 May 2009 15:36:29 +0200 Subject: x86/oprofile: move common macros to op_x86_model.h There are duplicate macro implementations in model specific code. This patch moves all common macros to op_x86_model.h. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 8 -------- arch/x86/oprofile/op_model_p4.c | 2 -- arch/x86/oprofile/op_model_ppro.c | 8 -------- arch/x86/oprofile/op_x86_model.h | 9 +++++++++ 4 files changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index b54c088..4b9254a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -26,22 +26,14 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_CLEAR_LO(x) (x &= (1<<21)) #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 819b131..420c15e 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -366,8 +366,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index ae58119..a922a1a 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -26,19 +26,11 @@ static int num_counters = 2; static int counter_width = 32; -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT(val, e) (val |= e) static u64 *reset_value; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index bd8157d..c80ec7d 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -11,6 +11,15 @@ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H +#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) + struct op_saved_msr { unsigned int high; unsigned int low; -- cgit v1.1 From d2731a4387ad6c6bca07abfe9ed41d450fb6d665 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 22 May 2009 19:47:38 +0200 Subject: x86/oprofile: remove MSR macros for AMD cpus The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and maintain. This patch replaces them by rdmsr()/wrmsr() functions and simplifies the code. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 4b9254a..c6181c2 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -26,12 +26,7 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_CLEAR_LO(x) (x &= (1<<21)) #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) @@ -101,17 +96,17 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) for (i = 0 ; i < NUM_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; - CTR_WRITE(1, msrs, i); + wrmsr(msrs->counters[i].addr, -1, -1); } /* enable active counters */ @@ -119,9 +114,9 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; - CTR_WRITE(counter_config[i].count, msrs, i); + wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1); - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_SET_ENABLE(low); @@ -133,7 +128,7 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_HOST_ONLY(high, 0); CTRL_SET_GUEST_ONLY(high, 0); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } else { reset_value[i] = 0; } @@ -267,10 +262,10 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, for (i = 0 ; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; - CTR_READ(low, high, msrs, i); + rdmsr(msrs->counters[i].addr, low, high); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1); } } @@ -286,9 +281,9 @@ static void op_amd_start(struct op_msrs const * const msrs) int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } } @@ -307,9 +302,9 @@ static void op_amd_stop(struct op_msrs const * const msrs) for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (!reset_value[i]) continue; - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } op_amd_stop_ibs(); -- cgit v1.1 From 74c9a5c341bb1f6cbb5095b07c77230f19682ce8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 22 May 2009 19:47:38 +0200 Subject: x86/oprofile: remove MSR macros for ppro cpus The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and maintain. This patch replaces them by rdmsr()/wrmsr() functions and simplifies the code. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index a922a1a..6c5d288 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -27,9 +27,6 @@ static int num_counters = 2; static int counter_width = 32; #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) - -#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) #define CTRL_CLEAR(x) (x &= (1<<21)) #define CTRL_SET_EVENT(val, e) (val |= e) @@ -88,9 +85,9 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) for (i = 0 ; i < num_counters; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_CLEAR(low); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } /* avoid a false detection of ctr overflows in NMI handler */ @@ -107,14 +104,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) wrmsrl(msrs->counters[i].addr, -reset_value[i]); - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_CLEAR(low); CTRL_SET_ENABLE(low); CTRL_SET_USR(low, counter_config[i].user); CTRL_SET_KERN(low, counter_config[i].kernel); CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT(low, counter_config[i].event); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } else { reset_value[i] = 0; } @@ -162,9 +159,9 @@ static void ppro_start(struct op_msrs const * const msrs) return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } } } @@ -180,9 +177,9 @@ static void ppro_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTRL_READ(low, high, msrs, i); + rdmsr(msrs->controls[i].addr, low, high); CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + wrmsr(msrs->controls[i].addr, low, high); } } -- cgit v1.1 From 1131a478245b00664ae2dbc0f68db987b51fa806 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 20:23:23 +0200 Subject: x86/oprofile: remove MSR macros for p4 cpus The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and maintain. This patch replaces them by rdmsr()/wrmsr() functions and simplifies the code. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_p4.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 420c15e..365d8a9 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -350,8 +350,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) -#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) @@ -361,13 +359,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) -#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) @@ -513,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) if (ev->bindings[i].virt_counter & counter_bit) { /* modify ESCR */ - ESCR_READ(escr, high, ev, i); + rdmsr(ev->bindings[i].escr_address, escr, high); ESCR_CLEAR(escr); if (stag == 0) { ESCR_SET_USR_0(escr, counter_config[ctr].user); @@ -524,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } ESCR_SET_EVENT_SELECT(escr, ev->event_select); ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); - ESCR_WRITE(escr, high, ev, i); + wrmsr(ev->bindings[i].escr_address, escr, high); /* modify CCCR */ - CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); @@ -535,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) CCCR_SET_PMI_OVF_0(cccr); else CCCR_SET_PMI_OVF_1(cccr); - CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); return; } } @@ -582,7 +578,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); - CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address, + -(u32)counter_config[i].count, -1); } else { reset_value[i] = 0; } @@ -622,14 +619,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, real = VIRT_CTR(stag, i); - CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); + rdmsr(p4_counters[real].cccr_address, low, high); + rdmsr(p4_counters[real].counter_address, ctr, high); if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); + wrmsr(p4_counters[real].counter_address, + -(u32)reset_value[i], -1); CCCR_CLEAR_OVF(low); - CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); + wrmsr(p4_counters[real].cccr_address, low, high); + wrmsr(p4_counters[real].counter_address, + -(u32)reset_value[i], -1); } } @@ -651,9 +650,9 @@ static void p4_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_ENABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } @@ -668,9 +667,9 @@ static void p4_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_DISABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } -- cgit v1.1 From ec064c093e254f4433afb17dcef7f964c76436af Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 15:05:50 +0200 Subject: x86/oprofile: fix and cleanup CTRL_SET_* macros This patch fixes missing braces around macro parameters. Macro definitions from intel_arch_perfmon.h are used where possible. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 1 - arch/x86/oprofile/op_x86_model.h | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 6c5d288..61ee8f6 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "op_x86_model.h" #include "op_counter.h" diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index c80ec7d..a207b1c 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -11,14 +11,16 @@ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) +#include + +#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0) +#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0) +#define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE) +#define CTRL_SET_ENABLE(val) ((val) |= ARCH_PERFMON_EVENTSEL_INT) +#define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE) +#define CTRL_SET_KERN(val, k) ((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0)) +#define CTRL_SET_USR(val, u) ((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0)) +#define CTRL_SET_UM(val, m) ((val) |= ((m) << 8)) struct op_saved_msr { unsigned int high; -- cgit v1.1 From 9c59354b48ce9cf28048b02fea73dd0236f876ea Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 18:16:43 +0200 Subject: x86/oprofile: remove unused macros for AMD virtualization profiling The use of the macros has no effect. The oprofilefs has to be extended first to support these features. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c6181c2..aaa7ffa 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -31,8 +31,6 @@ #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) -#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) -#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; @@ -125,9 +123,6 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT_LOW(low, counter_config[i].event); CTRL_SET_EVENT_HIGH(high, counter_config[i].event); - CTRL_SET_HOST_ONLY(high, 0); - CTRL_SET_GUEST_ONLY(high, 0); - wrmsr(msrs->controls[i].addr, low, high); } else { reset_value[i] = 0; -- cgit v1.1 From ef8828ddf828174785421af67c281144d4b8e796 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 19:31:44 +0200 Subject: x86/oprofile: pass the model to setup_ctrs() functions In follow-on patches the setup_ctrs() functions will need data that describes the model. This patch extends the function argument list to pass a pointer of the model to these function. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/op_model_amd.c | 3 ++- arch/x86/oprofile/op_model_p4.c | 3 ++- arch/x86/oprofile/op_model_ppro.c | 3 ++- arch/x86/oprofile/op_x86_model.h | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index ae0ab03..c31f87b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -125,7 +125,7 @@ static void nmi_cpu_setup(void *dummy) int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); spin_lock(&oprofilefs_lock); - model->setup_ctrs(msrs); + model->setup_ctrs(model, msrs); spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index aaa7ffa..86e0a01 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -85,7 +85,8 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) } -static void op_amd_setup_ctrs(struct op_msrs const * const msrs) +static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { unsigned int low, high; int i; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 365d8a9..05ba028 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -542,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } -static void p4_setup_ctrs(struct op_msrs const * const msrs) +static void p4_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 61ee8f6..40b44ee 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -51,7 +51,8 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) } -static void ppro_setup_ctrs(struct op_msrs const * const msrs) +static void ppro_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { unsigned int low, high; int i; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index a207b1c..6161c7f 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -48,7 +48,8 @@ struct op_x86_model_spec { int (*init)(struct oprofile_operations *ops); void (*exit)(void); void (*fill_in_addresses)(struct op_msrs * const msrs); - void (*setup_ctrs)(struct op_msrs const * const msrs); + void (*setup_ctrs)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); -- cgit v1.1 From 3370d358569755625aba4d9a846a040ce691d9ed Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 15:10:32 +0200 Subject: x86/oprofile: replace macros to calculate control register This patch introduces op_x86_get_ctrl() to calculate the value of the performance control register. This is generic code usable for all models. The event and reserved masks are model specific and stored in struct op_x86_model_spec. 64 bit MSR functions are used now. The patch removes many hard to read macros used for ctrl calculation. The function op_x86_get_ctrl() is common code and the first step to further merge performance counter implementations for x86 models. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 20 +++++++++++++++++++ arch/x86/oprofile/op_model_amd.c | 41 +++++++++++++++------------------------ arch/x86/oprofile/op_model_ppro.c | 29 +++++++++++++-------------- arch/x86/oprofile/op_x86_model.h | 15 ++++++++++---- 4 files changed, 60 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index c31f87b..388ee15 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; +/* common functions */ + +u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config) +{ + u64 val = 0; + u16 event = (u16)counter_config->event; + + val |= ARCH_PERFMON_EVENTSEL_INT; + val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; + val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; + val |= (counter_config->unit_mask & 0xFF) << 8; + event &= model->event_mask ? model->event_mask : 0xFF; + val |= event & 0xFF; + val |= (event & 0x0F00) << 24; + + return val; +} + + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 86e0a01..2406ab8 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -25,12 +25,11 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#define OP_EVENT_MASK 0x0FFF + +#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) -#define CTRL_CLEAR_LO(x) (x &= (1<<21)) -#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) -#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) -#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) static unsigned long reset_value[NUM_COUNTERS]; @@ -84,21 +83,19 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) } } - static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; - rdmsr(msrs->controls[i].addr, low, high); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ @@ -112,19 +109,11 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, for (i = 0; i < NUM_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; - wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1); - - rdmsr(msrs->controls[i].addr, low, high); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[i].event); - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[i]); + wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } @@ -486,14 +475,16 @@ static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ struct op_x86_model_spec const op_amd_spec = { - .init = op_amd_init, - .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .reserved = MSR_AMD_EVENTSEL_RESERVED, + .event_mask = OP_EVENT_MASK, + .init = op_amd_init, + .exit = op_amd_exit, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown + .shutdown = &op_amd_shutdown, }; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 40b44ee..3092f99 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -10,6 +10,7 @@ * @author Philippe Elie * @author Graydon Hoare * @author Andi Kleen + * @author Robert Richter */ #include @@ -26,8 +27,8 @@ static int num_counters = 2; static int counter_width = 32; #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) -#define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_EVENT(val, e) (val |= e) + +#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) static u64 *reset_value; @@ -54,7 +55,7 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) static void ppro_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) { @@ -85,9 +86,9 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, for (i = 0 ; i < num_counters; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; - rdmsr(msrs->controls[i].addr, low, high); - CTRL_CLEAR(low); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ @@ -101,17 +102,11 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, for (i = 0; i < num_counters; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - - rdmsr(msrs->controls[i].addr, low, high); - CTRL_CLEAR(low); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT(low, counter_config[i].event); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[i]); + wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } @@ -205,6 +200,7 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = 2, .num_controls = 2, + .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, @@ -249,6 +245,7 @@ static int arch_perfmon_init(struct oprofile_operations *ignore) } struct op_x86_model_spec op_arch_perfmon_spec = { + .reserved = MSR_PPRO_EVENTSEL_RESERVED, .init = &arch_perfmon_init, /* num_counters/num_controls filled in at runtime */ .fill_in_addresses = &ppro_fill_in_addresses, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 6161c7f..3220d4c 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -6,21 +6,19 @@ * @remark Read the file COPYING * * @author Graydon Hoare + * @author Robert Richter */ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H +#include #include #define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0) #define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0) #define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE) -#define CTRL_SET_ENABLE(val) ((val) |= ARCH_PERFMON_EVENTSEL_INT) #define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE) -#define CTRL_SET_KERN(val, k) ((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0)) -#define CTRL_SET_USR(val, u) ((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0)) -#define CTRL_SET_UM(val, m) ((val) |= ((m) << 8)) struct op_saved_msr { unsigned int high; @@ -39,12 +37,16 @@ struct op_msrs { struct pt_regs; +struct oprofile_operations; + /* The model vtable abstracts the differences between * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { unsigned int num_counters; unsigned int num_controls; + u64 reserved; + u16 event_mask; int (*init)(struct oprofile_operations *ops); void (*exit)(void); void (*fill_in_addresses)(struct op_msrs * const msrs); @@ -57,6 +59,11 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; +struct op_counter_config; + +extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config); + extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; -- cgit v1.1 From 42399adb239d4f1413899cc618ecf640779e79df Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 17:59:06 +0200 Subject: x86/oprofile: replace CTR_OVERFLOWED macros The patch replaces all CTR_OVERFLOWED macros. 64 bit MSR functions and 64 bit counter values are used now. Thus, it will be easier to later extend the models to use more than 32 bit width counters. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 16 ++++++++-------- arch/x86/oprofile/op_model_p4.c | 6 +++--- arch/x86/oprofile/op_model_ppro.c | 10 ++++------ 3 files changed, 15 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 2406ab8..b5d678f 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -26,11 +26,10 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 #define OP_EVENT_MASK 0x0FFF +#define OP_CTR_OVERFLOW (1ULL<<31) #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - static unsigned long reset_value[NUM_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS @@ -241,17 +240,18 @@ static inline void op_amd_stop_ibs(void) { } static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; - rdmsr(msrs->counters[i].addr, low, high); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1); - } + rdmsrl(msrs->counters[i].addr, val); + /* bit is clear if overflowed: */ + if (val & OP_CTR_OVERFLOW) + continue; + oprofile_add_sample(regs, i); + wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1); } op_amd_handle_ibs(regs, msrs); diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 05ba028..ac4ca28 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -32,6 +32,8 @@ #define NUM_CCCRS_HT2 9 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) +#define OP_CTR_OVERFLOW (1ULL<<31) + static unsigned int num_counters = NUM_COUNTERS_NON_HT; static unsigned int num_controls = NUM_CONTROLS_NON_HT; @@ -362,8 +364,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) - /* this assigns a "stagger" to the current CPU, which is used throughout the code in this module as an extra array offset, to select the "even" @@ -622,7 +622,7 @@ static int p4_check_ctrs(struct pt_regs * const regs, rdmsr(p4_counters[real].cccr_address, low, high); rdmsr(p4_counters[real].counter_address, ctr, high); - if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); wrmsr(p4_counters[real].counter_address, -(u32)reset_value[i], -1); diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 3092f99..82db396 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -26,8 +26,6 @@ static int num_counters = 2; static int counter_width = 32; -#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) - #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) static u64 *reset_value; @@ -124,10 +122,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs, if (!reset_value[i]) continue; rdmsrl(msrs->counters[i].addr, val); - if (CTR_OVERFLOWED(val)) { - oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - } + if (val & (1ULL << (counter_width - 1))) + continue; + oprofile_add_sample(regs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); } /* Only P6 based Pentium M need to re-unmask the apic vector but it -- cgit v1.1 From dea3766ca052a4f572b16a23a322553c064d75af Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 18:11:52 +0200 Subject: x86/oprofile: replace CTRL_SET_*ACTIVE macros The patch replaces all CTRL_SET_*ACTIVE macros. 64 bit MSR functions and 64 bit counter values are used now. The code uses bit masks from . Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 16 ++++++++-------- arch/x86/oprofile/op_model_ppro.c | 16 ++++++++-------- arch/x86/oprofile/op_x86_model.h | 2 -- 3 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index b5d678f..4ac9d28 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -262,13 +262,13 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, static void op_amd_start(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (reset_value[i]) { - rdmsr(msrs->controls[i].addr, low, high); - CTRL_SET_ACTIVE(low); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } @@ -277,7 +277,7 @@ static void op_amd_start(struct op_msrs const * const msrs) static void op_amd_stop(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; /* @@ -287,9 +287,9 @@ static void op_amd_stop(struct op_msrs const * const msrs) for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (!reset_value[i]) continue; - rdmsr(msrs->controls[i].addr, low, high); - CTRL_SET_INACTIVE(low); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } op_amd_stop_ibs(); diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 82db396..566b43f 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -145,16 +145,16 @@ static int ppro_check_ctrs(struct pt_regs * const regs, static void ppro_start(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { - rdmsr(msrs->controls[i].addr, low, high); - CTRL_SET_ACTIVE(low); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } } @@ -162,7 +162,7 @@ static void ppro_start(struct op_msrs const * const msrs) static void ppro_stop(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) @@ -170,9 +170,9 @@ static void ppro_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - rdmsr(msrs->controls[i].addr, low, high); - CTRL_SET_INACTIVE(low); - wrmsr(msrs->controls[i].addr, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 3220d4c..1c45777 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -17,8 +17,6 @@ #define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0) #define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0) -#define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE) -#define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE) struct op_saved_msr { unsigned int high; -- cgit v1.1 From 217d3cfb959756cb493fc03106c0253baa420ce8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 4 Jun 2009 02:36:44 +0200 Subject: x86/oprofile: replace CTR*_IS_RESERVED macros The patch replaces all CTR*_IS_RESERVED macros. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 10 +++++----- arch/x86/oprofile/op_model_p4.c | 10 +++++----- arch/x86/oprofile/op_model_ppro.c | 10 +++++----- arch/x86/oprofile/op_x86_model.h | 3 --- 4 files changed, 15 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 4ac9d28..c5c5eec 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -90,7 +90,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; @@ -99,14 +99,14 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->counters[i].addr)) continue; wrmsr(msrs->counters[i].addr, -1, -1); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1); rdmsrl(msrs->controls[i].addr, val); @@ -300,11 +300,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } } diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac4ca28..9db0ca9 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -559,7 +559,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); @@ -569,14 +569,14 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { + if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address, @@ -679,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + if (msrs->counters[i].addr) release_perfctr_nmi(msrs->counters[i].addr); } /* @@ -688,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) * This saves a few bits. */ for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + if (msrs->controls[i].addr) release_evntsel_nmi(msrs->controls[i].addr); } } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 566b43f..0a261a5 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -82,7 +82,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0 ; i < num_counters; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; @@ -91,14 +91,14 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < num_counters; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->counters[i].addr)) continue; wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < num_counters; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; wrmsrl(msrs->counters[i].addr, -reset_value[i]); rdmsrl(msrs->controls[i].addr, val); @@ -181,11 +181,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + if (msrs->counters[i].addr) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); } for (i = 0 ; i < num_counters ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + if (msrs->controls[i].addr) release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } if (reset_value) { diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 1c45777..69f1eb4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -15,9 +15,6 @@ #include #include -#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0) -#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0) - struct op_saved_msr { unsigned int high; unsigned int low; -- cgit v1.1 From bbc5986d2db427fdd61b6116ff8b9ed988e663a8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 May 2009 17:38:19 +0200 Subject: x86/oprofile: use 64 bit wrmsr functions This patch replaces some wrmsr() functions with wrmsrl(). Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 7 ++++--- arch/x86/oprofile/op_model_p4.c | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c5c5eec..9bf9017 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -101,14 +101,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, for (i = 0; i < NUM_COUNTERS; ++i) { if (unlikely(!msrs->counters[i].addr)) continue; - wrmsr(msrs->counters[i].addr, -1, -1); + wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; - wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1); + wrmsrl(msrs->counters[i].addr, + -(s64)counter_config[i].count); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; val |= op_x86_get_ctrl(model, &counter_config[i]); @@ -251,7 +252,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, if (val & OP_CTR_OVERFLOW) continue; oprofile_add_sample(regs, i); - wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1); + wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]); } op_amd_handle_ibs(regs, msrs); diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9db0ca9..f01e53b 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -579,8 +579,8 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); - wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address, - -(u32)counter_config[i].count, -1); + wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, + -(s64)counter_config[i].count); } else { reset_value[i] = 0; } @@ -624,12 +624,12 @@ static int p4_check_ctrs(struct pt_regs * const regs, rdmsr(p4_counters[real].counter_address, ctr, high); if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); - wrmsr(p4_counters[real].counter_address, - -(u32)reset_value[i], -1); + wrmsrl(p4_counters[real].counter_address, + -(s64)reset_value[i]); CCCR_CLEAR_OVF(low); wrmsr(p4_counters[real].cccr_address, low, high); - wrmsr(p4_counters[real].counter_address, - -(u32)reset_value[i], -1); + wrmsrl(p4_counters[real].counter_address, + -(s64)reset_value[i]); } } -- cgit v1.1 From 95e74e62c1540b1115fe8cec5b592f22960f2bb2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 3 Jun 2009 19:09:27 +0200 Subject: x86/oprofile: use 64 bit values to save MSR states This patch removes struct op_saved_msr and replaces it by an u64 variable. This makes code easier and it is possible to use 64 bit MSR functions. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 28 ++++++++-------------------- arch/x86/oprofile/op_x86_model.h | 9 ++------- 2 files changed, 10 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 388ee15..3b84b78 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -78,19 +78,13 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) unsigned int i; for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + if (counters[i].addr) + rdmsrl(counters[i].addr, counters[i].saved); } for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } + if (controls[i].addr) + rdmsrl(controls[i].addr, controls[i].saved); } } @@ -204,19 +198,13 @@ static void nmi_restore_registers(struct op_msrs *msrs) unsigned int i; for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } + if (controls[i].addr) + wrmsrl(controls[i].addr, controls[i].saved); } for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + if (counters[i].addr) + wrmsrl(counters[i].addr, counters[i].saved); } } diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 69f1eb4..fda52b4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -15,14 +15,9 @@ #include #include -struct op_saved_msr { - unsigned int high; - unsigned int low; -}; - struct op_msr { - unsigned long addr; - struct op_saved_msr saved; + unsigned long addr; + u64 saved; }; struct op_msrs { -- cgit v1.1 From 1a245c45343651a87ff63afc5ddeb8e24d731835 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 5 Jun 2009 15:54:24 +0200 Subject: x86/oprofile: remove some local variables in MSR save/restore functions The patch removes some local variables in these functions. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3b84b78..80b63d5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -71,18 +71,16 @@ static int profile_exceptions_notify(struct notifier_block *self, static void nmi_cpu_save_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrs; ++i) { + for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) rdmsrl(counters[i].addr, counters[i].saved); } - for (i = 0; i < nr_ctrls; ++i) { + for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) rdmsrl(controls[i].addr, controls[i].saved); } @@ -191,18 +189,16 @@ static int nmi_setup(void) static void nmi_restore_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrls; ++i) { + for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) wrmsrl(controls[i].addr, controls[i].saved); } - for (i = 0; i < nr_ctrs; ++i) { + for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) wrmsrl(counters[i].addr, counters[i].saved); } -- cgit v1.1 From c572ae4efd1b0a5cc76c5e6aae05c1b182b6a69c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 3 Jun 2009 20:10:39 +0200 Subject: x86/oprofile: use 64 bit values in IBS functions The IBS code internally uses 32 bit values (a low and a high value) to represent a 64 bit value. This patch changes this and now 64 bit values are used instead. 64 bit MSR functions can be used now. No functional changes. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 131 ++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 70 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 9bf9017..6493ef7 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -35,16 +35,18 @@ static unsigned long reset_value[NUM_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ -#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ -#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ -#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL /*IbsOpCtl bits */ -#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ -#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) -#define IBS_FETCH_SIZE 6 -#define IBS_OP_SIZE 12 +#define IBS_FETCH_SIZE 6 +#define IBS_OP_SIZE 12 static int has_ibs; /* AMD Family10h and later */ @@ -126,66 +128,63 @@ static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - u32 low, high; - u64 msr; + u64 val, ctl; struct op_entry entry; if (!has_ibs) return 1; if (ibs_config.fetch_enabled) { - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - if (high & IBS_FETCH_HIGH_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); - oprofile_write_reserve(&entry, regs, msr, + rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); + if (ctl & IBS_FETCH_VAL) { + rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); + oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - oprofile_add_data(&entry, low); - oprofile_add_data(&entry, high); - rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data(&entry, (u32)ctl); + oprofile_add_data(&entry, (u32)(ctl >> 32)); + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); oprofile_write_commit(&entry); /* reenable the IRQ */ - high &= ~IBS_FETCH_HIGH_VALID_BIT; - high |= IBS_FETCH_HIGH_ENABLE; - low &= IBS_FETCH_LOW_MAX_CNT_MASK; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); + ctl |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } } if (ibs_config.op_enabled) { - rdmsr(MSR_AMD64_IBSOPCTL, low, high); - if (low & IBS_OP_LOW_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSOPRIP, msr); - oprofile_write_reserve(&entry, regs, msr, + rdmsrl(MSR_AMD64_IBSOPCTL, ctl); + if (ctl & IBS_OP_VAL) { + rdmsrl(MSR_AMD64_IBSOPRIP, val); + oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA2, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA3, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCLINAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA2, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + rdmsrl(MSR_AMD64_IBSOPDATA3, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + rdmsrl(MSR_AMD64_IBSDCLINAD, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); + rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); + oprofile_add_data(&entry, (u32)val); + oprofile_add_data(&entry, (u32)(val >> 32)); oprofile_write_commit(&entry); /* reenable the IRQ */ - high = 0; - low &= ~IBS_OP_LOW_VALID_BIT; - low |= IBS_OP_LOW_ENABLE; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); + ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; + ctl |= IBS_OP_ENABLE; + wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } @@ -194,39 +193,31 @@ op_amd_handle_ibs(struct pt_regs * const regs, static inline void op_amd_start_ibs(void) { - unsigned int low, high; + u64 val; if (has_ibs && ibs_config.fetch_enabled) { - low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ - + IBS_FETCH_HIGH_ENABLE; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; + val |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (has_ibs && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) - + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ - + IBS_OP_LOW_ENABLE; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); + val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; + val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + val |= IBS_OP_ENABLE; + wrmsrl(MSR_AMD64_IBSOPCTL, val); } } static void op_amd_stop_ibs(void) { - unsigned int low, high; - if (has_ibs && ibs_config.fetch_enabled) { + if (has_ibs && ibs_config.fetch_enabled) /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); - if (has_ibs && ibs_config.op_enabled) { + if (has_ibs && ibs_config.op_enabled) /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } + wrmsrl(MSR_AMD64_IBSOPCTL, 0); } #else -- cgit v1.1 From 51563a0e5650d0d76539625388d72d62b34c726e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 3 Jun 2009 20:54:56 +0200 Subject: x86/oprofile: introduce oprofile_add_data64() The IBS implemention writes 64 bit register values to the cpu buffer by writing two 32 values using oprofile_add_data(). This patch introduces oprofile_add_data64() to write a single 64 bit value to the buffer. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 6493ef7..cc93046 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -140,13 +140,10 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); - oprofile_add_data(&entry, (u32)ctl); - oprofile_add_data(&entry, (u32)(ctl >> 32)); + oprofile_add_data64(&entry, val); + oprofile_add_data64(&entry, ctl); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ @@ -162,23 +159,17 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsrl(MSR_AMD64_IBSOPRIP, val); oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA2, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA3, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCLINAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ -- cgit v1.1 From 802070f5474af1a49435a9528aede47bb18abd47 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 12 Jun 2009 18:32:07 +0200 Subject: x86/oprofile: fix initialization of arch_perfmon for core_i7 Commit: e419294 x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init introduced a bug in the initialization of core_i7 leading to the incorrect model setup to &op_ppro_spec. This patch fixes this. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7826dfc..28ee490 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -406,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; + struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */ if (force_arch_perfmon && cpu_has_arch_perfmon) return 0; @@ -432,7 +433,7 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/core_2"; break; case 26: - model = &op_arch_perfmon_spec; + spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; break; case 28: @@ -443,7 +444,7 @@ static int __init ppro_init(char **cpu_type) return 0; } - model = &op_ppro_spec; + model = spec; return 1; } -- cgit v1.1 From c64b04fe6e0cb7c78e01751a44ef56cf20344e87 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 14 Jun 2009 00:59:50 +0530 Subject: x86, cpu: cpu/proc.c display cache alignment and address sizes for 32 bit 32 bits can also access x86_cache_alignment, x86_phys_bits and x86_virt_bits, make them available to user space just as on 64 bits. Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1244921390.11733.30.camel@ht.satnam> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/proc.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d5e3039..f82706a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); #endif seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); -#ifdef CONFIG_X86_64 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", c->x86_phys_bits, c->x86_virt_bits); -#endif seq_printf(m, "power management:"); for (i = 0; i < 32; i++) { -- cgit v1.1 From ac5672f82c39ff2f8dce81bf3e68b1dfc41f366f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 14 Apr 2009 14:29:44 -0700 Subject: x86/paravirt: split paravirt definitions into paravirt_types.h Split the monolithic asm/paravirt.h into separate paravirt.h (inlines and other "active" definitions), and paravirt_types.h (types, constants and other "passive" definitions). This makes it easier to use the type/constant definitions without pulling in everything else and causing circular dependency problems. [ Impact: cleanup ] Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/paravirt.h | 711 +-------------------------------- arch/x86/include/asm/paravirt_types.h | 720 ++++++++++++++++++++++++++++++++++ 2 files changed, 721 insertions(+), 710 deletions(-) create mode 100644 arch/x86/include/asm/paravirt_types.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4fb37c8..6a07af4 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,689 +7,11 @@ #include #include -/* Bitmask of what can be clobbered: usually at least eax. */ -#define CLBR_NONE 0 -#define CLBR_EAX (1 << 0) -#define CLBR_ECX (1 << 1) -#define CLBR_EDX (1 << 2) -#define CLBR_EDI (1 << 3) - -#ifdef CONFIG_X86_32 -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 4) - 1) - -#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) -#define CLBR_SCRATCH (0) -#else -#define CLBR_RAX CLBR_EAX -#define CLBR_RCX CLBR_ECX -#define CLBR_RDX CLBR_EDX -#define CLBR_RDI CLBR_EDI -#define CLBR_RSI (1 << 4) -#define CLBR_R8 (1 << 5) -#define CLBR_R9 (1 << 6) -#define CLBR_R10 (1 << 7) -#define CLBR_R11 (1 << 8) - -#define CLBR_ANY ((1 << 9) - 1) - -#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ - CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX) -#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) - -#include -#endif /* X86_64 */ - -#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) +#include #ifndef __ASSEMBLY__ #include #include -#include -#include - -struct page; -struct thread_struct; -struct desc_ptr; -struct tss_struct; -struct mm_struct; -struct desc_struct; -struct task_struct; - -/* - * Wrapper type for pointers to code which uses the non-standard - * calling convention. See PV_CALL_SAVE_REGS_THUNK below. - */ -struct paravirt_callee_save { - void *func; -}; - -/* general info */ -struct pv_info { - unsigned int kernel_rpl; - int shared_kernel_pmd; - int paravirt_enabled; - const char *name; -}; - -struct pv_init_ops { - /* - * Patch may replace one of the defined code sequences with - * arbitrary code, subject to the same register constraints. - * This generally means the code is not free to clobber any - * registers other than EAX. The patch function should return - * the number of bytes of code generated, as we nop pad the - * rest in generic code. - */ - unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, - unsigned long addr, unsigned len); - - /* Basic arch-specific setup */ - void (*arch_setup)(void); - char *(*memory_setup)(void); - void (*post_allocator_init)(void); - - /* Print a banner to identify the environment */ - void (*banner)(void); -}; - - -struct pv_lazy_ops { - /* Set deferred update mode, used for batching operations. */ - void (*enter)(void); - void (*leave)(void); -}; - -struct pv_time_ops { - void (*time_init)(void); - - /* Set and set time of day */ - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long); - - unsigned long long (*sched_clock)(void); - unsigned long (*get_tsc_khz)(void); -}; - -struct pv_cpu_ops { - /* hooks for various privileged instructions */ - unsigned long (*get_debugreg)(int regno); - void (*set_debugreg)(int regno, unsigned long value); - - void (*clts)(void); - - unsigned long (*read_cr0)(void); - void (*write_cr0)(unsigned long); - - unsigned long (*read_cr4_safe)(void); - unsigned long (*read_cr4)(void); - void (*write_cr4)(unsigned long); - -#ifdef CONFIG_X86_64 - unsigned long (*read_cr8)(void); - void (*write_cr8)(unsigned long); -#endif - - /* Segment descriptor handling */ - void (*load_tr_desc)(void); - void (*load_gdt)(const struct desc_ptr *); - void (*load_idt)(const struct desc_ptr *); - void (*store_gdt)(struct desc_ptr *); - void (*store_idt)(struct desc_ptr *); - void (*set_ldt)(const void *desc, unsigned entries); - unsigned long (*store_tr)(void); - void (*load_tls)(struct thread_struct *t, unsigned int cpu); -#ifdef CONFIG_X86_64 - void (*load_gs_index)(unsigned int idx); -#endif - void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, - const void *desc); - void (*write_gdt_entry)(struct desc_struct *, - int entrynum, const void *desc, int size); - void (*write_idt_entry)(gate_desc *, - int entrynum, const gate_desc *gate); - void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); - void (*free_ldt)(struct desc_struct *ldt, unsigned entries); - - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); - - void (*set_iopl_mask)(unsigned mask); - - void (*wbinvd)(void); - void (*io_delay)(void); - - /* cpuid emulation, mostly so that caps bits can be disabled */ - void (*cpuid)(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); - - /* MSR, PMC and TSR operations. - err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (*read_msr_amd)(unsigned int msr, int *err); - u64 (*read_msr)(unsigned int msr, int *err); - int (*write_msr)(unsigned int msr, unsigned low, unsigned high); - - u64 (*read_tsc)(void); - u64 (*read_pmc)(int counter); - unsigned long long (*read_tscp)(unsigned int *aux); - - /* - * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) - */ - void (*irq_enable_sysexit)(void); - - /* - * Switch to usermode gs and return to 64-bit usermode using - * sysret. Only used in 64-bit kernels to return to 64-bit - * processes. Usermode register state, including %rsp, must - * already be restored. - */ - void (*usergs_sysret64)(void); - - /* - * Switch to usermode gs and return to 32-bit usermode using - * sysret. Used to return to 32-on-64 compat processes. - * Other usermode register state, including %esp, must already - * be restored. - */ - void (*usergs_sysret32)(void); - - /* Normal iret. Jump to this with the standard iret stack - frame set up. */ - void (*iret)(void); - - void (*swapgs)(void); - - void (*start_context_switch)(struct task_struct *prev); - void (*end_context_switch)(struct task_struct *next); -}; - -struct pv_irq_ops { - void (*init_IRQ)(void); - - /* - * Get/set interrupt state. save_fl and restore_fl are only - * expected to use X86_EFLAGS_IF; all other bits - * returned from save_fl are undefined, and may be ignored by - * restore_fl. - * - * NOTE: These functions callers expect the callee to preserve - * more registers than the standard C calling convention. - */ - struct paravirt_callee_save save_fl; - struct paravirt_callee_save restore_fl; - struct paravirt_callee_save irq_disable; - struct paravirt_callee_save irq_enable; - - void (*safe_halt)(void); - void (*halt)(void); - -#ifdef CONFIG_X86_64 - void (*adjust_exception_frame)(void); -#endif -}; - -struct pv_apic_ops { -#ifdef CONFIG_X86_LOCAL_APIC - void (*setup_boot_clock)(void); - void (*setup_secondary_clock)(void); - - void (*startup_ipi_hook)(int phys_apicid, - unsigned long start_eip, - unsigned long start_esp); -#endif -}; - -struct pv_mmu_ops { - /* - * Called before/after init_mm pagetable setup. setup_start - * may reset %cr3, and may pre-install parts of the pagetable; - * pagetable setup is expected to preserve any existing - * mapping. - */ - void (*pagetable_setup_start)(pgd_t *pgd_base); - void (*pagetable_setup_done)(pgd_t *pgd_base); - - unsigned long (*read_cr2)(void); - void (*write_cr2)(unsigned long); - - unsigned long (*read_cr3)(void); - void (*write_cr3)(unsigned long); - - /* - * Hooks for intercepting the creation/use/destruction of an - * mm_struct. - */ - void (*activate_mm)(struct mm_struct *prev, - struct mm_struct *next); - void (*dup_mmap)(struct mm_struct *oldmm, - struct mm_struct *mm); - void (*exit_mmap)(struct mm_struct *mm); - - - /* TLB operations */ - void (*flush_tlb_user)(void); - void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const struct cpumask *cpus, - struct mm_struct *mm, - unsigned long va); - - /* Hooks for allocating and freeing a pagetable top-level */ - int (*pgd_alloc)(struct mm_struct *mm); - void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); - - /* - * Hooks for allocating/releasing pagetable pages when they're - * attached to a pagetable - */ - void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); - void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); - void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); - void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); - void (*release_pte)(unsigned long pfn); - void (*release_pmd)(unsigned long pfn); - void (*release_pud)(unsigned long pfn); - - /* Pagetable manipulation functions */ - void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); - void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*pte_update)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); - - pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); - void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); - - struct paravirt_callee_save pte_val; - struct paravirt_callee_save make_pte; - - struct paravirt_callee_save pgd_val; - struct paravirt_callee_save make_pgd; - -#if PAGETABLE_LEVELS >= 3 -#ifdef CONFIG_X86_PAE - void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); - void (*pmd_clear)(pmd_t *pmdp); - -#endif /* CONFIG_X86_PAE */ - - void (*set_pud)(pud_t *pudp, pud_t pudval); - - struct paravirt_callee_save pmd_val; - struct paravirt_callee_save make_pmd; - -#if PAGETABLE_LEVELS == 4 - struct paravirt_callee_save pud_val; - struct paravirt_callee_save make_pud; - - void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); -#endif /* PAGETABLE_LEVELS == 4 */ -#endif /* PAGETABLE_LEVELS >= 3 */ - -#ifdef CONFIG_HIGHPTE - void *(*kmap_atomic_pte)(struct page *page, enum km_type type); -#endif - - struct pv_lazy_ops lazy_mode; - - /* dom0 ops */ - - /* Sometimes the physical address is a pfn, and sometimes its - an mfn. We can tell which is which from the index. */ - void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, - phys_addr_t phys, pgprot_t flags); -}; - -struct raw_spinlock; -struct pv_lock_ops { - int (*spin_is_locked)(struct raw_spinlock *lock); - int (*spin_is_contended)(struct raw_spinlock *lock); - void (*spin_lock)(struct raw_spinlock *lock); - void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); - int (*spin_trylock)(struct raw_spinlock *lock); - void (*spin_unlock)(struct raw_spinlock *lock); -}; - -/* This contains all the paravirt structures: we get a convenient - * number for each function using the offset which we use to indicate - * what to patch. */ -struct paravirt_patch_template { - struct pv_init_ops pv_init_ops; - struct pv_time_ops pv_time_ops; - struct pv_cpu_ops pv_cpu_ops; - struct pv_irq_ops pv_irq_ops; - struct pv_apic_ops pv_apic_ops; - struct pv_mmu_ops pv_mmu_ops; - struct pv_lock_ops pv_lock_ops; -}; - -extern struct pv_info pv_info; -extern struct pv_init_ops pv_init_ops; -extern struct pv_time_ops pv_time_ops; -extern struct pv_cpu_ops pv_cpu_ops; -extern struct pv_irq_ops pv_irq_ops; -extern struct pv_apic_ops pv_apic_ops; -extern struct pv_mmu_ops pv_mmu_ops; -extern struct pv_lock_ops pv_lock_ops; - -#define PARAVIRT_PATCH(x) \ - (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) - -#define paravirt_type(op) \ - [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(op)) -#define paravirt_clobber(clobber) \ - [paravirt_clobber] "i" (clobber) - -/* - * Generate some code, and mark it as patchable by the - * apply_paravirt() alternate instruction patcher. - */ -#define _paravirt_alt(insn_string, type, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR " 771b\n" \ - " .byte " type "\n" \ - " .byte 772b-771b\n" \ - " .short " clobber "\n" \ - ".popsection\n" - -/* Generate patchable code, with the default asm parameters. */ -#define paravirt_alt(insn_string) \ - _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") - -/* Simple instruction patching code. */ -#define DEF_NATIVE(ops, name, code) \ - extern const char start_##ops##_##name[], end_##ops##_##name[]; \ - asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") - -unsigned paravirt_patch_nop(void); -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_ignore(unsigned len); -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len); -unsigned paravirt_patch_jmp(void *insnbuf, const void *target, - unsigned long addr, unsigned len); -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len); - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end); - -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len); - -int paravirt_disable_iospace(void); - -/* - * This generates an indirect call based on the operation type number. - * The type number, computed in PARAVIRT_PATCH, is derived from the - * offset into the paravirt_patch_template structure, and can therefore be - * freely converted back into a structure offset. - */ -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" - -/* - * These macros are intended to wrap calls through one of the paravirt - * ops structs, so that they can be later identified and patched at - * runtime. - * - * Normally, a call to a pv_op function is a simple indirect call: - * (pv_op_struct.operations)(args...). - * - * Unfortunately, this is a relatively slow operation for modern CPUs, - * because it cannot necessarily determine what the destination - * address is. In this case, the address is a runtime constant, so at - * the very least we can patch the call to e a simple direct call, or - * ideally, patch an inline implementation into the callsite. (Direct - * calls are essentially free, because the call and return addresses - * are completely predictable.) - * - * For i386, these macros rely on the standard gcc "regparm(3)" calling - * convention, in which the first three arguments are placed in %eax, - * %edx, %ecx (in that order), and the remaining arguments are placed - * on the stack. All caller-save registers (eax,edx,ecx) are expected - * to be modified (either clobbered or used for return values). - * X86_64, on the other hand, already specifies a register-based calling - * conventions, returning at %rax, with parameteres going on %rdi, %rsi, - * %rdx, and %rcx. Note that for this reason, x86_64 does not need any - * special handling for dealing with 4 arguments, unlike i386. - * However, x86_64 also have to clobber all caller saved registers, which - * unfortunately, are quite a bit (r8 - r11) - * - * The call instruction itself is marked by placing its start address - * and size into the .parainstructions section, so that - * apply_paravirt() in arch/i386/kernel/alternative.c can do the - * appropriate patching under the control of the backend pv_init_ops - * implementation. - * - * Unfortunately there's no way to get gcc to generate the args setup - * for the call, and then allow the call itself to be generated by an - * inline asm. Because of this, we must do the complete arg setup and - * return value handling from within these macros. This is fairly - * cumbersome. - * - * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. - * It could be extended to more arguments, but there would be little - * to be gained from that. For each number of arguments, there are - * the two VCALL and CALL variants for void and non-void functions. - * - * When there is a return value, the invoker of the macro must specify - * the return type. The macro then uses sizeof() on that type to - * determine whether its a 32 or 64 bit value, and places the return - * in the right register(s) (just %eax for 32-bit, and %edx:%eax for - * 64-bit). For x86_64 machines, it just returns at %rax regardless of - * the return value size. - * - * 64-bit arguments are passed as a pair of adjacent 32-bit arguments - * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments - * in low,high order - * - * Small structures are passed and returned in registers. The macro - * calling convention can't directly deal with this, so the wrapper - * functions must do this. - * - * These PVOP_* macros are only defined within this header. This - * means that all uses must be wrapped in inline functions. This also - * makes sure the incoming and outgoing types are always correct. - */ -#ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS \ - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS - -#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) -#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) -#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) - -#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ - "=c" (__ecx) -#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS - -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) -#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS - -#define EXTRA_CLOBBERS -#define VEXTRA_CLOBBERS -#else /* CONFIG_X86_64 */ -#define PVOP_VCALL_ARGS \ - unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax - -#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) -#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) -#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) -#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) - -#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ - "=S" (__esi), "=d" (__edx), \ - "=c" (__ecx) -#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) - -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) -#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS - -#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" -#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_PARAVIRT_DEBUG -#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) -#else -#define PVOP_TEST_NULL(op) ((void)op) -#endif - -#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ - pre, post, ...) \ - ({ \ - rettype __ret; \ - PVOP_CALL_ARGS; \ - PVOP_TEST_NULL(op); \ - /* This is 32-bit specific, but is okay in 64-bit */ \ - /* since this condition will never hold */ \ - if (sizeof(rettype) > sizeof(unsigned long)) { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)((((u64)__edx) << 32) | __eax); \ - } else { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)__eax; \ - } \ - __ret; \ - }) - -#define __PVOP_CALL(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ - EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) - -#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ - PVOP_CALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) - - -#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ - ({ \ - PVOP_VCALL_ARGS; \ - PVOP_TEST_NULL(op); \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - }) - -#define __PVOP_VCALL(op, pre, post, ...) \ - ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ - VEXTRA_CLOBBERS, \ - pre, post, ##__VA_ARGS__) - -#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ - PVOP_VCALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) - - - -#define PVOP_CALL0(rettype, op) \ - __PVOP_CALL(rettype, op, "", "") -#define PVOP_VCALL0(op) \ - __PVOP_VCALL(op, "", "") - -#define PVOP_CALLEE0(rettype, op) \ - __PVOP_CALLEESAVE(rettype, op, "", "") -#define PVOP_VCALLEE0(op) \ - __PVOP_VCALLEESAVE(op, "", "") - - -#define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) -#define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) - -#define PVOP_CALLEE1(rettype, op, arg1) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) -#define PVOP_VCALLEE1(op, arg1) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) - - -#define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) -#define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - -#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) -#define PVOP_VCALLEE2(op, arg1, arg2) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - - -#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) -#define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) - -/* This is the only difference in x86_64. We can make it much simpler */ -#ifdef CONFIG_X86_32 -#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ - PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) -#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) -#else -#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ - PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ - PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -#endif static inline int paravirt_enabled(void) { @@ -1393,20 +715,6 @@ static inline void pmd_clear(pmd_t *pmdp) } #endif /* CONFIG_X86_PAE */ -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); - -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); - #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { @@ -1437,12 +745,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, pv_mmu_ops.set_fixmap(idx, phys, flags); } -void _paravirt_nop(void); -u32 _paravirt_ident_32(u32); -u64 _paravirt_ident_64(u64); - -#define paravirt_nop ((void *)_paravirt_nop) - #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static inline int __raw_spin_is_locked(struct raw_spinlock *lock) @@ -1479,17 +781,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) #endif -/* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 instrtype; /* type of this instruction */ - u8 len; /* length of original instruction */ - u16 clobbers; /* what registers you may clobber */ -}; - -extern struct paravirt_patch_site __parainstructions[], - __parainstructions_end[]; - #ifdef CONFIG_X86_32 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;" #define PV_RESTORE_REGS "popl %edx; popl %ecx;" diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h new file mode 100644 index 0000000..2b3371b --- /dev/null +++ b/arch/x86/include/asm/paravirt_types.h @@ -0,0 +1,720 @@ +#ifndef _ASM_X86_PARAVIRT_TYPES_H +#define _ASM_X86_PARAVIRT_TYPES_H + +/* Bitmask of what can be clobbered: usually at least eax. */ +#define CLBR_NONE 0 +#define CLBR_EAX (1 << 0) +#define CLBR_ECX (1 << 1) +#define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) + +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) +#define CLBR_SCRATCH (0) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) +#define CLBR_R8 (1 << 5) +#define CLBR_R9 (1 << 6) +#define CLBR_R10 (1 << 7) +#define CLBR_R11 (1 << 8) + +#define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX) +#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) + +#endif /* X86_64 */ + +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + +#ifndef __ASSEMBLY__ + +#include +#include + +struct page; +struct thread_struct; +struct desc_ptr; +struct tss_struct; +struct mm_struct; +struct desc_struct; +struct task_struct; +struct cpumask; + +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + +/* general info */ +struct pv_info { + unsigned int kernel_rpl; + int shared_kernel_pmd; + int paravirt_enabled; + const char *name; +}; + +struct pv_init_ops { + /* + * Patch may replace one of the defined code sequences with + * arbitrary code, subject to the same register constraints. + * This generally means the code is not free to clobber any + * registers other than EAX. The patch function should return + * the number of bytes of code generated, as we nop pad the + * rest in generic code. + */ + unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, + unsigned long addr, unsigned len); + + /* Basic arch-specific setup */ + void (*arch_setup)(void); + char *(*memory_setup)(void); + void (*post_allocator_init)(void); + + /* Print a banner to identify the environment */ + void (*banner)(void); +}; + + +struct pv_lazy_ops { + /* Set deferred update mode, used for batching operations. */ + void (*enter)(void); + void (*leave)(void); +}; + +struct pv_time_ops { + void (*time_init)(void); + + /* Set and set time of day */ + unsigned long (*get_wallclock)(void); + int (*set_wallclock)(unsigned long); + + unsigned long long (*sched_clock)(void); + unsigned long (*get_tsc_khz)(void); +}; + +struct pv_cpu_ops { + /* hooks for various privileged instructions */ + unsigned long (*get_debugreg)(int regno); + void (*set_debugreg)(int regno, unsigned long value); + + void (*clts)(void); + + unsigned long (*read_cr0)(void); + void (*write_cr0)(unsigned long); + + unsigned long (*read_cr4_safe)(void); + unsigned long (*read_cr4)(void); + void (*write_cr4)(unsigned long); + +#ifdef CONFIG_X86_64 + unsigned long (*read_cr8)(void); + void (*write_cr8)(unsigned long); +#endif + + /* Segment descriptor handling */ + void (*load_tr_desc)(void); + void (*load_gdt)(const struct desc_ptr *); + void (*load_idt)(const struct desc_ptr *); + void (*store_gdt)(struct desc_ptr *); + void (*store_idt)(struct desc_ptr *); + void (*set_ldt)(const void *desc, unsigned entries); + unsigned long (*store_tr)(void); + void (*load_tls)(struct thread_struct *t, unsigned int cpu); +#ifdef CONFIG_X86_64 + void (*load_gs_index)(unsigned int idx); +#endif + void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, + const void *desc); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, const void *desc, int size); + void (*write_idt_entry)(gate_desc *, + int entrynum, const gate_desc *gate); + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + + void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); + + void (*set_iopl_mask)(unsigned mask); + + void (*wbinvd)(void); + void (*io_delay)(void); + + /* cpuid emulation, mostly so that caps bits can be disabled */ + void (*cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + /* MSR, PMC and TSR operations. + err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (*read_msr_amd)(unsigned int msr, int *err); + u64 (*read_msr)(unsigned int msr, int *err); + int (*write_msr)(unsigned int msr, unsigned low, unsigned high); + + u64 (*read_tsc)(void); + u64 (*read_pmc)(int counter); + unsigned long long (*read_tscp)(unsigned int *aux); + + /* + * Atomically enable interrupts and return to userspace. This + * is only ever used to return to 32-bit processes; in a + * 64-bit kernel, it's used for 32-on-64 compat processes, but + * never native 64-bit processes. (Jump, not call.) + */ + void (*irq_enable_sysexit)(void); + + /* + * Switch to usermode gs and return to 64-bit usermode using + * sysret. Only used in 64-bit kernels to return to 64-bit + * processes. Usermode register state, including %rsp, must + * already be restored. + */ + void (*usergs_sysret64)(void); + + /* + * Switch to usermode gs and return to 32-bit usermode using + * sysret. Used to return to 32-on-64 compat processes. + * Other usermode register state, including %esp, must already + * be restored. + */ + void (*usergs_sysret32)(void); + + /* Normal iret. Jump to this with the standard iret stack + frame set up. */ + void (*iret)(void); + + void (*swapgs)(void); + + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); +}; + +struct pv_irq_ops { + void (*init_IRQ)(void); + + /* + * Get/set interrupt state. save_fl and restore_fl are only + * expected to use X86_EFLAGS_IF; all other bits + * returned from save_fl are undefined, and may be ignored by + * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. + */ + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + + void (*safe_halt)(void); + void (*halt)(void); + +#ifdef CONFIG_X86_64 + void (*adjust_exception_frame)(void); +#endif +}; + +struct pv_apic_ops { +#ifdef CONFIG_X86_LOCAL_APIC + void (*setup_boot_clock)(void); + void (*setup_secondary_clock)(void); + + void (*startup_ipi_hook)(int phys_apicid, + unsigned long start_eip, + unsigned long start_esp); +#endif +}; + +struct pv_mmu_ops { + /* + * Called before/after init_mm pagetable setup. setup_start + * may reset %cr3, and may pre-install parts of the pagetable; + * pagetable setup is expected to preserve any existing + * mapping. + */ + void (*pagetable_setup_start)(pgd_t *pgd_base); + void (*pagetable_setup_done)(pgd_t *pgd_base); + + unsigned long (*read_cr2)(void); + void (*write_cr2)(unsigned long); + + unsigned long (*read_cr3)(void); + void (*write_cr3)(unsigned long); + + /* + * Hooks for intercepting the creation/use/destruction of an + * mm_struct. + */ + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + + + /* TLB operations */ + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, + unsigned long va); + + /* Hooks for allocating and freeing a pagetable top-level */ + int (*pgd_alloc)(struct mm_struct *mm); + void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); + + /* + * Hooks for allocating/releasing pagetable pages when they're + * attached to a pagetable + */ + void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); + void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); + void (*release_pte)(unsigned long pfn); + void (*release_pmd)(unsigned long pfn); + void (*release_pud)(unsigned long pfn); + + /* Pagetable manipulation functions */ + void (*set_pte)(pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); + void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (*pte_update)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + + pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; + + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*pmd_clear)(pmd_t *pmdp); + +#endif /* CONFIG_X86_PAE */ + + void (*set_pud)(pud_t *pudp, pud_t pudval); + + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; + +#if PAGETABLE_LEVELS == 4 + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; + + void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); +#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* PAGETABLE_LEVELS >= 3 */ + +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + + struct pv_lazy_ops lazy_mode; + + /* dom0 ops */ + + /* Sometimes the physical address is a pfn, and sometimes its + an mfn. We can tell which is which from the index. */ + void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags); +}; + +struct raw_spinlock; +struct pv_lock_ops { + int (*spin_is_locked)(struct raw_spinlock *lock); + int (*spin_is_contended)(struct raw_spinlock *lock); + void (*spin_lock)(struct raw_spinlock *lock); + void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); + int (*spin_trylock)(struct raw_spinlock *lock); + void (*spin_unlock)(struct raw_spinlock *lock); +}; + +/* This contains all the paravirt structures: we get a convenient + * number for each function using the offset which we use to indicate + * what to patch. */ +struct paravirt_patch_template { + struct pv_init_ops pv_init_ops; + struct pv_time_ops pv_time_ops; + struct pv_cpu_ops pv_cpu_ops; + struct pv_irq_ops pv_irq_ops; + struct pv_apic_ops pv_apic_ops; + struct pv_mmu_ops pv_mmu_ops; + struct pv_lock_ops pv_lock_ops; +}; + +extern struct pv_info pv_info; +extern struct pv_init_ops pv_init_ops; +extern struct pv_time_ops pv_time_ops; +extern struct pv_cpu_ops pv_cpu_ops; +extern struct pv_irq_ops pv_irq_ops; +extern struct pv_apic_ops pv_apic_ops; +extern struct pv_mmu_ops pv_mmu_ops; +extern struct pv_lock_ops pv_lock_ops; + +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) + +#define paravirt_type(op) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ + [paravirt_opptr] "i" (&(op)) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + ".popsection\n" + +/* Generate patchable code, with the default asm parameters. */ +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + +/* Simple instruction patching code. */ +#define DEF_NATIVE(ops, name, code) \ + extern const char start_##ops##_##name[], end_##ops##_##name[]; \ + asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") + +unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); +unsigned paravirt_patch_ignore(unsigned len); +unsigned paravirt_patch_call(void *insnbuf, + const void *target, u16 tgt_clobbers, + unsigned long addr, u16 site_clobbers, + unsigned len); +unsigned paravirt_patch_jmp(void *insnbuf, const void *target, + unsigned long addr, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len); + +unsigned paravirt_patch_insns(void *insnbuf, unsigned len, + const char *start, const char *end); + +unsigned native_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len); + +int paravirt_disable_iospace(void); + +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_patch_template structure, and can therefore be + * freely converted back into a structure offset. + */ +#define PARAVIRT_CALL "call *%c[paravirt_opptr];" + +/* + * These macros are intended to wrap calls through one of the paravirt + * ops structs, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (pv_op_struct.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to e a simple direct call, or + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * For i386, these macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * X86_64, on the other hand, already specifies a register-based calling + * conventions, returning at %rax, with parameteres going on %rdi, %rsi, + * %rdx, and %rcx. Note that for this reason, x86_64 does not need any + * special handling for dealing with 4 arguments, unlike i386. + * However, x86_64 also have to clobber all caller saved registers, which + * unfortunately, are quite a bit (r8 - r11) + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend pv_init_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * the two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether its a 32 or 64 bit value, and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). For x86_64 machines, it just returns at %rax regardless of + * the return value size. + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments + * in low,high order + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do this. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ +#ifdef CONFIG_X86_32 +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + +#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + +#define EXTRA_CLOBBERS +#define VEXTRA_CLOBBERS +#else /* CONFIG_X86_64 */ +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + +#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ + "=S" (__esi), "=d" (__edx), \ + "=c" (__ecx) +#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + +#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" +#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_PARAVIRT_DEBUG +#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) +#else +#define PVOP_TEST_NULL(op) ((void)op) +#endif + +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ + pre, post, ...) \ + ({ \ + rettype __ret; \ + PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ + /* This is 32-bit specific, but is okay in 64-bit */ \ + /* since this condition will never hold */ \ + if (sizeof(rettype) > sizeof(unsigned long)) { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : call_clbr \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ + } else { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : call_clbr \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + __ret = (rettype)__eax; \ + } \ + __ret; \ + }) + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ + ({ \ + PVOP_VCALL_ARGS; \ + PVOP_TEST_NULL(op); \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : call_clbr \ + : paravirt_type(op), \ + paravirt_clobber(clbr), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + }) + +#define __PVOP_VCALL(op, pre, post, ...) \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, \ + pre, post, ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + + +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op, "", "") +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op, "", "") + +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op, "", "") +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op, "", "") + + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALLEE2(op, arg1, arg2) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) + +/* This is the only difference in x86_64. We can make it much simpler */ +#ifdef CONFIG_X86_32 +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#else +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) +#endif + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + +void paravirt_enter_lazy_mmu(void); +void paravirt_leave_lazy_mmu(void); + +void _paravirt_nop(void); +u32 _paravirt_ident_32(u32); +u64 _paravirt_ident_64(u64); + +#define paravirt_nop ((void *)_paravirt_nop) + +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch_site { + u8 *instr; /* original instructions */ + u8 instrtype; /* type of this instruction */ + u8 len; /* length of original instruction */ + u16 clobbers; /* what registers you may clobber */ +}; + +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_PARAVIRT_TYPES_H */ -- cgit v1.1 From e6e9cac8c3417b43498b243c1f8f11780e157168 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 00:40:59 -0700 Subject: x86: split out core __math_state_restore Split the core fpu state restoration out into __math_state_restore, which assumes that cr0.TS is clear and that the fpu context has been initialized. This will be used during context switch. There are two reasons this is desireable: - There's a small clarification. When __switch_to() calls math_state_restore, it relies on the fact that tsk_used_math() returns true, and so will never do a blocking init_fpu(). __math_state_restore() does not have (or need) that logic, so the question never arises. - It allows the clts() to be moved earler in __switch_to() so it can be performed while cpu context updates are batched (will be done in a later patch). [ Impact: refactor code to make reuse cleaner; no functional change ] Signed-off-by: Jeremy Fitzhardinge Cc: Alok Kataria Cc: Rusty Russell --- arch/x86/include/asm/i387.h | 1 + arch/x86/kernel/traps.c | 33 +++++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 175adf5..2e75292 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -26,6 +26,7 @@ extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); +extern void __math_state_restore(void); extern void init_thread_xstate(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5f935f0..71b9166 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -814,6 +814,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) } /* + * __math_state_restore assumes that cr0.TS is already clear and the + * fpu state is all ready for use. Used during context switch. + */ +void __math_state_restore(void) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = thread->task; + + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(tsk))) { + stts(); + force_sig(SIGSEGV, tsk); + return; + } + + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; +} + +/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * @@ -844,17 +866,8 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ - if (unlikely(restore_fpu_checking(tsk))) { - stts(); - force_sig(SIGSEGV, tsk); - return; - } - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ - tsk->fpu_counter++; + __math_state_restore(); } EXPORT_SYMBOL_GPL(math_state_restore); -- cgit v1.1 From 2fcddce10f6771cfa0c56fd1e826d50d67d100b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 00:45:26 -0700 Subject: x86-32: make sure clts is batched during context switch If we're preloading the fpu state during context switch, make sure the clts happens while we're batching the cpu context update, then do the actual __math_state_restore once the updates are flushed. This allows more efficient context switches when running paravirtualized, as all the hypercalls can be folded together into one. [ Impact: optimise paravirtual FPU context switch ] Signed-off-by: Jeremy Fitzhardinge Cc: Alok Kataria Cc: Rusty Russell --- arch/x86/kernel/process_32.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524..a80eddd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + bool preload_fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - __unlazy_fpu(prev_p); + /* + * If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; + __unlazy_fpu(prev_p); /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter > 5) + if (preload_fpu) prefetch(next->xstate); /* @@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); + /* If we're going to preload the fpu context, make sure clts + is run while we're batching the cpu state updates. */ + if (preload_fpu) + clts(); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); - /* If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - * - * tsk_used_math() checks prevent calling math_state_restore(), - * which can sleep in the case of !tsk_used_math() - */ - if (tsk_used_math(next_p) && next_p->fpu_counter > 5) - math_state_restore(); + if (preload_fpu) + __math_state_restore(); /* * Restore %gs if needed (which is common) -- cgit v1.1 From 16d9dbf0c2bd167fdd942b83592d59696c7b73bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 00:50:27 -0700 Subject: x86-64: move unlazy_fpu() into lazy cpu state part of context switch Make sure that unlazy_fpu()'s stts gets batched along with the other cpu state changes during context switch. (32-bit already does this.) This makes sure it gets batched when running paravirtualized. [ Impact: optimise paravirtual FPU context switch ] Signed-off-by: Jeremy Fitzhardinge Cc: Alok Kataria Cc: Rusty Russell --- arch/x86/kernel/process_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb54..c9b8904 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -419,6 +419,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) load_TLS(next, cpu); + /* Must be after DS reload */ + unlazy_fpu(prev_p); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -459,9 +462,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); prev->gsindex = gsindex; - /* Must be after DS reload */ - unlazy_fpu(prev_p); - /* * Switch the PDA and FPU contexts. */ -- cgit v1.1 From 17950c5b243f99cbabef173415ee988c52104d7e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 01:01:01 -0700 Subject: x86-64: move clts into batch cpu state updates when preloading fpu When a task is likely to be using the fpu, we preload its state during the context switch, rather than waiting for it to run an fpu instruction. Make sure the clts() happens while we're doing batched fpu state updates to optimise paravirtualized context switches. [ Impact: optimise paravirtual FPU context switch ] Signed-off-by: Jeremy Fitzhardinge Cc: Alok Kataria Cc: Rusty Russell --- arch/x86/kernel/process_64.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c9b8904..a28279d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); unsigned fsindex, gsindex; + bool preload_fpu; + + /* + * If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter > 5) + if (preload_fpu) prefetch(next->xstate); /* @@ -422,6 +430,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Must be after DS reload */ unlazy_fpu(prev_p); + /* Make sure cpu is ready for new context */ + if (preload_fpu) + clts(); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); - /* If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - * - * tsk_used_math() checks prevent calling math_state_restore(), - * which can sleep in the case of !tsk_used_math() + /* + * Preload the FPU context, now that we've determined that the + * task is likely to be using it. */ - if (tsk_used_math(next_p) && next_p->fpu_counter > 5) - math_state_restore(); + if (preload_fpu) + __math_state_restore(); return prev_p; } -- cgit v1.1 From 21e70878215f620fe99ea7d7c74bc641aeec932f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 18 Jun 2009 17:09:27 +0530 Subject: x86: oprofile/op_model_amd.c set return values for op_amd_handle_ibs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit op_amd_handle_ibs() should return 0 when IBS is not present or not defined. Fix compilation warning: CC [M] arch/x86/oprofile/op_model_amd.o arch/x86/oprofile/op_model_amd.c: In function ‘op_amd_handle_ibs’: arch/x86/oprofile/op_model_amd.c:217: warning: no return statement in function returning non-void Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index cc93046..e95268e 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -132,7 +132,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, struct op_entry entry; if (!has_ibs) - return 1; + return 0; if (ibs_config.fetch_enabled) { rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); @@ -214,7 +214,10 @@ static void op_amd_stop_ibs(void) #else static inline int op_amd_handle_ibs(struct pt_regs * const regs, - struct op_msrs const * const msrs) { } + struct op_msrs const * const msrs) +{ + return 0; +} static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } -- cgit v1.1 From 4adc667593f83a18a8e54ce94f250fd166a272ac Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jun 2009 21:48:16 +0200 Subject: x86: add copies of some headers to convert to asm-generic Just an intermediate step to make reviewing easier. These files are identical copies of the existing headers. Signed-off-by: Arnd Bergmann LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/generic-mman.h | 20 ++++++++ arch/x86/include/asm/generic-module.h | 80 ++++++++++++++++++++++++++++++ arch/x86/include/asm/generic-scatterlist.h | 33 ++++++++++++ arch/x86/include/asm/generic-types.h | 30 +++++++++++ arch/x86/include/asm/generic-ucontext.h | 18 +++++++ 5 files changed, 181 insertions(+) create mode 100644 arch/x86/include/asm/generic-mman.h create mode 100644 arch/x86/include/asm/generic-module.h create mode 100644 arch/x86/include/asm/generic-scatterlist.h create mode 100644 arch/x86/include/asm/generic-types.h create mode 100644 arch/x86/include/asm/generic-ucontext.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h new file mode 100644 index 0000000..751af25 --- /dev/null +++ b/arch/x86/include/asm/generic-mman.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_MMAN_H +#define _ASM_X86_MMAN_H + +#include + +#define MAP_32BIT 0x40 /* only give out 32bit addresses */ + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h new file mode 100644 index 0000000..47d6274 --- /dev/null +++ b/arch/x86/include/asm/generic-module.h @@ -0,0 +1,80 @@ +#ifndef _ASM_X86_MODULE_H +#define _ASM_X86_MODULE_H + +/* x86_32/64 are simple */ +struct mod_arch_specific {}; + +#ifdef CONFIG_X86_32 +# define Elf_Shdr Elf32_Shdr +# define Elf_Sym Elf32_Sym +# define Elf_Ehdr Elf32_Ehdr +#else +# define Elf_Shdr Elf64_Shdr +# define Elf_Sym Elf64_Sym +# define Elf_Ehdr Elf64_Ehdr +#endif + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M386 +#define MODULE_PROC_FAMILY "386 " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_X86_ELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_4KSTACKS +# define MODULE_STACKSIZE "4KSTACKS " +# else +# define MODULE_STACKSIZE "" +# endif +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#endif + +#endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h new file mode 100644 index 0000000..263d397 --- /dev/null +++ b/arch/x86/include/asm/generic-scatterlist.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_SCATTERLIST_H +#define _ASM_X86_SCATTERLIST_H + +#include + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; + unsigned int dma_length; +}; + +#define ARCH_HAS_SG_CHAIN +#define ISA_DMA_THRESHOLD (0x00ffffff) + +/* + * These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#ifdef CONFIG_X86_32 +# define sg_dma_len(sg) ((sg)->length) +#else +# define sg_dma_len(sg) ((sg)->dma_length) +#endif + +#endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h new file mode 100644 index 0000000..09b9774 --- /dev/null +++ b/arch/x86/include/asm/generic-types.h @@ -0,0 +1,30 @@ +#ifndef _ASM_X86_TYPES_H +#define _ASM_X86_TYPES_H + +#include + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +typedef u64 dma64_addr_t; +#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) +/* DMA addresses come in 32-bit and 64-bit flavours. */ +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_TYPES_H */ diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h new file mode 100644 index 0000000..87324cf --- /dev/null +++ b/arch/x86/include/asm/generic-ucontext.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_UCONTEXT_H +#define _ASM_X86_UCONTEXT_H + +#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state + * information in the memory layout pointed + * by the fpstate pointer in the ucontext's + * sigcontext struct (uc_mcontext). + */ + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* _ASM_X86_UCONTEXT_H */ -- cgit v1.1 From 7bfd124d6dae7d394e73753300594a81a022fe7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jun 2009 21:48:17 +0200 Subject: x86: convert trivial headers to asm-generic version For these nine header files, the asm-generic version should be semantically identical to what is in x86. Change the contents to be binary identical, for better review. Signed-off-by: Arnd Bergmann LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/ioctls.h | 40 ++++++++++++++++++++++++++++------------ arch/x86/include/asm/ipcbuf.h | 15 ++++++++++----- arch/x86/include/asm/msgbuf.h | 30 +++++++++++++++++++----------- arch/x86/include/asm/param.h | 8 +++++--- arch/x86/include/asm/shmbuf.h | 28 ++++++++++++++++++---------- arch/x86/include/asm/socket.h | 10 +++++----- arch/x86/include/asm/sockios.h | 6 +++--- arch/x86/include/asm/termbits.h | 8 ++++---- 8 files changed, 92 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h index 0d5b23b7..a799e20 100644 --- a/arch/x86/include/asm/ioctls.h +++ b/arch/x86/include/asm/ioctls.h @@ -1,12 +1,23 @@ -#ifndef _ASM_X86_IOCTLS_H -#define _ASM_X86_IOCTLS_H +#ifndef __ASM_GENERIC_IOCTLS_H +#define __ASM_GENERIC_IOCTLS_H -#include +#include + +/* + * These are the most common definitions for tty ioctl numbers. + * Most of them do not use the recommended _IOC(), but there is + * probably some source code out there hardcoding the number, + * so we might as well use them for all new platforms. + * + * The architectures that use different values here typically + * try to be compatible with some Unix variants for the same + * architecture. + */ /* 0x54 is just a magic number to make these relatively unique ('T') */ #define TCGETS 0x5401 -#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */ +#define TCSETS 0x5402 #define TCSETSW 0x5403 #define TCSETSF 0x5404 #define TCGETA 0x5405 @@ -43,7 +54,6 @@ #define TIOCSETD 0x5423 #define TIOCGETD 0x5424 #define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */ #define TIOCSBRK 0x5427 /* BSD compatibility */ #define TIOCCBRK 0x5428 /* BSD compatibility */ #define TIOCGSID 0x5429 /* Return the session ID of FD */ @@ -53,8 +63,7 @@ #define TCSETSF2 _IOW('T', 0x2D, struct termios2) #define TIOCGRS485 0x542E #define TIOCSRS485 0x542F -#define TIOCGPTN _IOR('T', 0x30, unsigned int) - /* Get Pty Number (of pty-mux device) */ +#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ #define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ #define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ #define TCSETX 0x5433 @@ -76,9 +85,16 @@ #define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ -#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ -#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ -#define FIOQSIZE 0x5460 + +/* + * some architectures define FIOQSIZE as 0x545E, which is used for + * TIOCGHAYESESP on others + */ +#ifndef FIOQSIZE +# define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +# define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +# define FIOQSIZE 0x5460 +#endif /* Used for packet mode */ #define TIOCPKT_DATA 0 @@ -89,6 +105,6 @@ #define TIOCPKT_NOSTOP 16 #define TIOCPKT_DOSTOP 32 -#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#endif /* _ASM_X86_IOCTLS_H */ +#endif /* __ASM_GENERIC_IOCTLS_H */ diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h index ee678fd..888ca1c 100644 --- a/arch/x86/include/asm/ipcbuf.h +++ b/arch/x86/include/asm/ipcbuf.h @@ -1,13 +1,18 @@ -#ifndef _ASM_X86_IPCBUF_H -#define _ASM_X86_IPCBUF_H +#ifndef __ASM_GENERIC_IPCBUF_H +#define __ASM_GENERIC_IPCBUF_H /* - * The ipc64_perm structure for x86 architecture. + * The generic ipc64_perm structure: * Note extra padding because this structure is passed back and forth * between kernel and user space. * + * ipc64_perm was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * * Pad space is left for: - * - 32-bit mode_t and seq + * - 32-bit mode_t on architectures that only had 16 bit + * - 32-bit seq * - 2 miscellaneous 32-bit values */ @@ -25,4 +30,4 @@ struct ipc64_perm { unsigned long __unused2; }; -#endif /* _ASM_X86_IPCBUF_H */ +#endif /* __ASM_GENERIC_IPCBUF_H */ diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h index 7e4e948..aec850d 100644 --- a/arch/x86/include/asm/msgbuf.h +++ b/arch/x86/include/asm/msgbuf.h @@ -1,30 +1,38 @@ -#ifndef _ASM_X86_MSGBUF_H -#define _ASM_X86_MSGBUF_H +#ifndef __ASM_GENERIC_MSGBUF_H +#define __ASM_GENERIC_MSGBUF_H +#include /* - * The msqid64_ds structure for i386 architecture. + * generic msqid64_ds structure. + * * Note extra padding because this structure is passed back and forth * between kernel and user space. * - * Pad space on i386 is left for: + * msqid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first three padding words. + * On big-endian systems, the padding is in the wrong place. + * + * Pad space is left for: * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values - * - * Pad space on x8664 is left for: - * - 2 miscellaneous 64-bit values */ + struct msqid64_ds { struct ipc64_perm msg_perm; __kernel_time_t msg_stime; /* last msgsnd time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused1; #endif __kernel_time_t msg_rtime; /* last msgrcv time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused2; #endif __kernel_time_t msg_ctime; /* last change time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused3; #endif unsigned long msg_cbytes; /* current number of bytes on queue */ @@ -36,4 +44,4 @@ struct msqid64_ds { unsigned long __unused5; }; -#endif /* _ASM_X86_MSGBUF_H */ +#endif /* __ASM_GENERIC_MSGBUF_H */ diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h index 6f0d042..cdf8251 100644 --- a/arch/x86/include/asm/param.h +++ b/arch/x86/include/asm/param.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PARAM_H -#define _ASM_X86_PARAM_H +#ifndef __ASM_GENERIC_PARAM_H +#define __ASM_GENERIC_PARAM_H #ifdef __KERNEL__ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ @@ -11,7 +11,9 @@ #define HZ 100 #endif +#ifndef EXEC_PAGESIZE #define EXEC_PAGESIZE 4096 +#endif #ifndef NOGROUP #define NOGROUP (-1) @@ -19,4 +21,4 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#endif /* _ASM_X86_PARAM_H */ +#endif /* __ASM_GENERIC_PARAM_H */ diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h index b51413b..5768fa6 100644 --- a/arch/x86/include/asm/shmbuf.h +++ b/arch/x86/include/asm/shmbuf.h @@ -1,32 +1,40 @@ -#ifndef _ASM_X86_SHMBUF_H -#define _ASM_X86_SHMBUF_H +#ifndef __ASM_GENERIC_SHMBUF_H +#define __ASM_GENERIC_SHMBUF_H + +#include /* * The shmid64_ds structure for x86 architecture. * Note extra padding because this structure is passed back and forth * between kernel and user space. * - * Pad space on 32 bit is left for: + * shmid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first two padding words. + * On big-endian systems, the padding is in the wrong place. + * + * + * Pad space is left for: * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values - * - * Pad space on 64 bit is left for: - * - 2 miscellaneous 64-bit values */ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ size_t shm_segsz; /* size of segment (bytes) */ __kernel_time_t shm_atime; /* last attach time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused1; #endif __kernel_time_t shm_dtime; /* last detach time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused2; #endif __kernel_time_t shm_ctime; /* last change time */ -#ifdef __i386__ +#if __BITS_PER_LONG != 64 unsigned long __unused3; #endif __kernel_pid_t shm_cpid; /* pid of creator */ @@ -48,4 +56,4 @@ struct shminfo64 { unsigned long __unused4; }; -#endif /* _ASM_X86_SHMBUF_H */ +#endif /* __ASM_GENERIC_SHMBUF_H */ diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index ca8bf2c..d4ae42a 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SOCKET_H -#define _ASM_X86_SOCKET_H +#ifndef __ASM_GENERIC_SOCKET_H +#define __ASM_GENERIC_SOCKET_H #include @@ -38,8 +38,8 @@ #define SO_BINDTODEVICE 25 /* Socket filtering */ -#define SO_ATTACH_FILTER 26 -#define SO_DETACH_FILTER 27 +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 @@ -57,4 +57,4 @@ #define SO_TIMESTAMPING 37 #define SCM_TIMESTAMPING SO_TIMESTAMPING -#endif /* _ASM_X86_SOCKET_H */ +#endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h index 49cc72b..9a61a36 100644 --- a/arch/x86/include/asm/sockios.h +++ b/arch/x86/include/asm/sockios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SOCKIOS_H -#define _ASM_X86_SOCKIOS_H +#ifndef __ASM_GENERIC_SOCKIOS_H +#define __ASM_GENERIC_SOCKIOS_H /* Socket-level I/O control calls. */ #define FIOSETOWN 0x8901 @@ -10,4 +10,4 @@ #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ -#endif /* _ASM_X86_SOCKIOS_H */ +#endif /* __ASM_GENERIC_SOCKIOS_H */ diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h index af1b70e..1c9773d 100644 --- a/arch/x86/include/asm/termbits.h +++ b/arch/x86/include/asm/termbits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TERMBITS_H -#define _ASM_X86_TERMBITS_H +#ifndef __ASM_GENERIC_TERMBITS_H +#define __ASM_GENERIC_TERMBITS_H #include @@ -140,7 +140,7 @@ struct ktermios { #define HUPCL 0002000 #define CLOCAL 0004000 #define CBAUDEX 0010000 -#define BOTHER 0010000 /* non standard rate */ +#define BOTHER 0010000 #define B57600 0010001 #define B115200 0010002 #define B230400 0010003 @@ -195,4 +195,4 @@ struct ktermios { #define TCSADRAIN 1 #define TCSAFLUSH 2 -#endif /* _ASM_X86_TERMBITS_H */ +#endif /* __ASM_GENERIC_TERMBITS_H */ -- cgit v1.1 From 06f5013aa8eb5895ced2c71d13f5114103605555 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jun 2009 21:48:18 +0200 Subject: x86: convert almost generic headers to asm-generic version In x86, mman.h, module.h, scatterlist.h, types.h and ucontext.h can use the asm-generic version by just defining the x86 specific parts locally and falling back on the generic code for the common bits. This patch illustrates the differences between the x86 and asm-generic versions by changing a file that is initially identical to the x86 version to one that is identical to the asm-generic version. Signed-off-by: Arnd Bergmann LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/generic-mman.h | 8 +-- arch/x86/include/asm/generic-module.h | 92 ++++++------------------------ arch/x86/include/asm/generic-scatterlist.h | 34 +++++++---- arch/x86/include/asm/generic-types.h | 32 +++++++---- arch/x86/include/asm/generic-ucontext.h | 12 +--- arch/x86/include/asm/mman.h | 14 +---- arch/x86/include/asm/module.h | 13 +---- arch/x86/include/asm/scatterlist.h | 27 +-------- arch/x86/include/asm/types.h | 12 +--- arch/x86/include/asm/ucontext.h | 8 +-- 10 files changed, 73 insertions(+), 179 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h index 751af25..7cab4de 100644 --- a/arch/x86/include/asm/generic-mman.h +++ b/arch/x86/include/asm/generic-mman.h @@ -1,10 +1,8 @@ -#ifndef _ASM_X86_MMAN_H -#define _ASM_X86_MMAN_H +#ifndef __ASM_GENERIC_MMAN_H +#define __ASM_GENERIC_MMAN_H #include -#define MAP_32BIT 0x40 /* only give out 32bit addresses */ - #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ @@ -17,4 +15,4 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#endif /* _ASM_X86_MMAN_H */ +#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h index 47d6274..ed5b44d 100644 --- a/arch/x86/include/asm/generic-module.h +++ b/arch/x86/include/asm/generic-module.h @@ -1,80 +1,22 @@ -#ifndef _ASM_X86_MODULE_H -#define _ASM_X86_MODULE_H +#ifndef __ASM_GENERIC_MODULE_H +#define __ASM_GENERIC_MODULE_H -/* x86_32/64 are simple */ -struct mod_arch_specific {}; +/* + * Many architectures just need a simple module + * loader without arch specific data. + */ +struct mod_arch_specific +{ +}; -#ifdef CONFIG_X86_32 -# define Elf_Shdr Elf32_Shdr -# define Elf_Sym Elf32_Sym -# define Elf_Ehdr Elf32_Ehdr +#ifdef CONFIG_64BIT +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr #else -# define Elf_Shdr Elf64_Shdr -# define Elf_Sym Elf64_Sym -# define Elf_Ehdr Elf64_Ehdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr #endif -#ifdef CONFIG_X86_64 -/* X86_64 does not define MODULE_PROC_FAMILY */ -#elif defined CONFIG_M386 -#define MODULE_PROC_FAMILY "386 " -#elif defined CONFIG_M486 -#define MODULE_PROC_FAMILY "486 " -#elif defined CONFIG_M586 -#define MODULE_PROC_FAMILY "586 " -#elif defined CONFIG_M586TSC -#define MODULE_PROC_FAMILY "586TSC " -#elif defined CONFIG_M586MMX -#define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " -#elif defined CONFIG_M686 -#define MODULE_PROC_FAMILY "686 " -#elif defined CONFIG_MPENTIUMII -#define MODULE_PROC_FAMILY "PENTIUMII " -#elif defined CONFIG_MPENTIUMIII -#define MODULE_PROC_FAMILY "PENTIUMIII " -#elif defined CONFIG_MPENTIUMM -#define MODULE_PROC_FAMILY "PENTIUMM " -#elif defined CONFIG_MPENTIUM4 -#define MODULE_PROC_FAMILY "PENTIUM4 " -#elif defined CONFIG_MK6 -#define MODULE_PROC_FAMILY "K6 " -#elif defined CONFIG_MK7 -#define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_X86_ELAN -#define MODULE_PROC_FAMILY "ELAN " -#elif defined CONFIG_MCRUSOE -#define MODULE_PROC_FAMILY "CRUSOE " -#elif defined CONFIG_MEFFICEON -#define MODULE_PROC_FAMILY "EFFICEON " -#elif defined CONFIG_MWINCHIPC6 -#define MODULE_PROC_FAMILY "WINCHIPC6 " -#elif defined CONFIG_MWINCHIP3D -#define MODULE_PROC_FAMILY "WINCHIP3D " -#elif defined CONFIG_MCYRIXIII -#define MODULE_PROC_FAMILY "CYRIXIII " -#elif defined CONFIG_MVIAC3_2 -#define MODULE_PROC_FAMILY "VIAC3-2 " -#elif defined CONFIG_MVIAC7 -#define MODULE_PROC_FAMILY "VIAC7 " -#elif defined CONFIG_MGEODEGX1 -#define MODULE_PROC_FAMILY "GEODEGX1 " -#elif defined CONFIG_MGEODE_LX -#define MODULE_PROC_FAMILY "GEODE " -#else -#error unknown processor family -#endif - -#ifdef CONFIG_X86_32 -# ifdef CONFIG_4KSTACKS -# define MODULE_STACKSIZE "4KSTACKS " -# else -# define MODULE_STACKSIZE "" -# endif -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE -#endif - -#endif /* _ASM_X86_MODULE_H */ +#endif /* __ASM_GENERIC_MODULE_H */ diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h index 263d397..8b94544 100644 --- a/arch/x86/include/asm/generic-scatterlist.h +++ b/arch/x86/include/asm/generic-scatterlist.h @@ -1,7 +1,7 @@ -#ifndef _ASM_X86_SCATTERLIST_H -#define _ASM_X86_SCATTERLIST_H +#ifndef __ASM_GENERIC_SCATTERLIST_H +#define __ASM_GENERIC_SCATTERLIST_H -#include +#include struct scatterlist { #ifdef CONFIG_DEBUG_SG @@ -14,20 +14,30 @@ struct scatterlist { unsigned int dma_length; }; -#define ARCH_HAS_SG_CHAIN -#define ISA_DMA_THRESHOLD (0x00ffffff) - /* - * These macros should be used after a pci_map_sg call has been done + * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. * You should only work with the number of sg entries pci_map_sg - * returns. + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. */ #define sg_dma_address(sg) ((sg)->dma_address) -#ifdef CONFIG_X86_32 -# define sg_dma_len(sg) ((sg)->length) +#ifndef sg_dma_len +/* + * Normally, you have an iommu on 64 bit machines, but not on 32 bit + * machines. Architectures that are differnt should override this. + */ +#if __BITS_PER_LONG == 64 +#define sg_dma_len(sg) ((sg)->dma_length) #else -# define sg_dma_len(sg) ((sg)->dma_length) +#define sg_dma_len(sg) ((sg)->length) +#endif /* 64 bit */ +#endif /* sg_dma_len */ + +#ifndef ISA_DMA_THRESHOLD +#define ISA_DMA_THRESHOLD (~0UL) #endif -#endif /* _ASM_X86_SCATTERLIST_H */ +#define ARCH_HAS_SG_CHAIN + +#endif /* __ASM_GENERIC_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h index 09b9774..fba7d33 100644 --- a/arch/x86/include/asm/generic-types.h +++ b/arch/x86/include/asm/generic-types.h @@ -1,6 +1,9 @@ -#ifndef _ASM_X86_TYPES_H -#define _ASM_X86_TYPES_H - +#ifndef _ASM_GENERIC_TYPES_H +#define _ASM_GENERIC_TYPES_H +/* + * int-ll64 is used practically everywhere now, + * so use it as a reasonable default. + */ #include #ifndef __ASSEMBLY__ @@ -13,18 +16,27 @@ typedef unsigned short umode_t; * These aren't exported outside the kernel to avoid name space clashes */ #ifdef __KERNEL__ - #ifndef __ASSEMBLY__ - -typedef u64 dma64_addr_t; -#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) -/* DMA addresses come in 32-bit and 64-bit flavours. */ +/* + * DMA addresses may be very different from physical addresses + * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit + * systems, while sparc64 uses 32 bit DMA addresses for 64 bit + * physical addresses. + * This default defines dma_addr_t to have the same size as + * phys_addr_t, which is the most common way. + * Do not define the dma64_addr_t type, which never really + * worked. + */ +#ifndef dma_addr_t +#ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 dma_addr_t; #else typedef u32 dma_addr_t; -#endif +#endif /* CONFIG_PHYS_ADDR_T_64BIT */ +#endif /* dma_addr_t */ #endif /* __ASSEMBLY__ */ + #endif /* __KERNEL__ */ -#endif /* _ASM_X86_TYPES_H */ +#endif /* _ASM_GENERIC_TYPES_H */ diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h index 87324cf..ad77343 100644 --- a/arch/x86/include/asm/generic-ucontext.h +++ b/arch/x86/include/asm/generic-ucontext.h @@ -1,11 +1,5 @@ -#ifndef _ASM_X86_UCONTEXT_H -#define _ASM_X86_UCONTEXT_H - -#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state - * information in the memory layout pointed - * by the fpstate pointer in the ucontext's - * sigcontext struct (uc_mcontext). - */ +#ifndef __ASM_GENERIC_UCONTEXT_H +#define __ASM_GENERIC_UCONTEXT_H struct ucontext { unsigned long uc_flags; @@ -15,4 +9,4 @@ struct ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; -#endif /* _ASM_X86_UCONTEXT_H */ +#endif /* __ASM_GENERIC_UCONTEXT_H */ diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 751af25..063d8c9 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -1,20 +1,8 @@ #ifndef _ASM_X86_MMAN_H #define _ASM_X86_MMAN_H -#include - #define MAP_32BIT 0x40 /* only give out 32bit addresses */ -#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ -#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ -#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MAP_LOCKED 0x2000 /* pages are locked */ -#define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ - -#define MCL_CURRENT 1 /* lock all current mappings */ -#define MCL_FUTURE 2 /* lock all future mappings */ +#include #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 47d6274..4a7a192 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -1,18 +1,7 @@ #ifndef _ASM_X86_MODULE_H #define _ASM_X86_MODULE_H -/* x86_32/64 are simple */ -struct mod_arch_specific {}; - -#ifdef CONFIG_X86_32 -# define Elf_Shdr Elf32_Shdr -# define Elf_Sym Elf32_Sym -# define Elf_Ehdr Elf32_Ehdr -#else -# define Elf_Shdr Elf64_Shdr -# define Elf_Sym Elf64_Sym -# define Elf_Ehdr Elf64_Ehdr -#endif +#include #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index 263d397..2097d68 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h @@ -1,33 +1,8 @@ #ifndef _ASM_X86_SCATTERLIST_H #define _ASM_X86_SCATTERLIST_H -#include - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - unsigned int length; - dma_addr_t dma_address; - unsigned int dma_length; -}; - -#define ARCH_HAS_SG_CHAIN #define ISA_DMA_THRESHOLD (0x00ffffff) -/* - * These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#ifdef CONFIG_X86_32 -# define sg_dma_len(sg) ((sg)->length) -#else -# define sg_dma_len(sg) ((sg)->dma_length) -#endif +#include #endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index 09b9774..f2fe528 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h @@ -1,19 +1,11 @@ #ifndef _ASM_X86_TYPES_H #define _ASM_X86_TYPES_H -#include +#define dma_addr_t dma_addr_t -#ifndef __ASSEMBLY__ - -typedef unsigned short umode_t; +#include -#endif /* __ASSEMBLY__ */ - -/* - * These aren't exported outside the kernel to avoid name space clashes - */ #ifdef __KERNEL__ - #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h index 87324cf..7cfc436 100644 --- a/arch/x86/include/asm/ucontext.h +++ b/arch/x86/include/asm/ucontext.h @@ -7,12 +7,6 @@ * sigcontext struct (uc_mcontext). */ -struct ucontext { - unsigned long uc_flags; - struct ucontext *uc_link; - stack_t uc_stack; - struct sigcontext uc_mcontext; - sigset_t uc_sigmask; /* mask last for extensibility */ -}; +#include #endif /* _ASM_X86_UCONTEXT_H */ -- cgit v1.1 From 69d5ffdaad7b77b97229b55c36afb20e5bebd29e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jun 2009 21:48:19 +0200 Subject: x86: convert termios.h to the asm-generic version This patch turned out more controversial than expected and may get dropped in the future. I'm including it for reference anyway. The user_termio_to_kernel_termios and kernel_termios_to_user_termio functions on x86 are lacking error checking from get_user and are not portable to big-endian systems, so the asm-generic header has to differ in this regard. Signed-off-by: Arnd Bergmann LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/termios.h | 86 +++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index c4ee805..d0922ad 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h @@ -1,5 +1,12 @@ -#ifndef _ASM_X86_TERMIOS_H -#define _ASM_X86_TERMIOS_H +#ifndef _ASM_GENERIC_TERMIOS_H +#define _ASM_GENERIC_TERMIOS_H +/* + * Most architectures have straight copies of the x86 code, with + * varying levels of bug fixes on top. Usually it's a good idea + * to use this generic version instead, but be careful to avoid + * ABI changes. + * New architectures should not provide their own version. + */ #include #include @@ -54,37 +61,57 @@ struct termio { /* * Translate a "termio" structure into a "termios". Ugh. */ -#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ - unsigned short __tmp; \ - get_user(__tmp,&(termio)->x); \ - *(unsigned short *) &(termios)->x = __tmp; \ -} - static inline int user_termio_to_kernel_termios(struct ktermios *termios, - struct termio __user *termio) + const struct termio __user *termio) { - SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); - SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); - SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); - SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); - get_user(termios->c_line, &termio->c_line); - return copy_from_user(termios->c_cc, termio->c_cc, NCC); + unsigned short tmp; + + if (get_user(tmp, &termio->c_iflag) < 0) + goto fault; + termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; + + if (get_user(tmp, &termio->c_oflag) < 0) + goto fault; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; + + if (get_user(tmp, &termio->c_cflag) < 0) + goto fault; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; + + if (get_user(tmp, &termio->c_lflag) < 0) + goto fault; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; + + if (get_user(termios->c_line, &termio->c_line) < 0) + goto fault; + + if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) + goto fault; + + return 0; + + fault: + return -EFAULT; } /* * Translate a "termios" structure into a "termio". Ugh. */ static inline int kernel_termios_to_user_termio(struct termio __user *termio, - struct ktermios *termios) + struct ktermios *termios) { - put_user((termios)->c_iflag, &(termio)->c_iflag); - put_user((termios)->c_oflag, &(termio)->c_oflag); - put_user((termios)->c_cflag, &(termio)->c_cflag); - put_user((termios)->c_lflag, &(termio)->c_lflag); - put_user((termios)->c_line, &(termio)->c_line); - return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); + if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || + put_user(termios->c_oflag, &termio->c_oflag) < 0 || + put_user(termios->c_cflag, &termio->c_cflag) < 0 || + put_user(termios->c_lflag, &termio->c_lflag) < 0 || + put_user(termios->c_line, &termio->c_line) < 0 || + copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) + return -EFAULT; + + return 0; } +#ifdef TCGETS2 static inline int user_termios_to_kernel_termios(struct ktermios *k, struct termios2 __user *u) { @@ -108,7 +135,20 @@ static inline int kernel_termios_to_user_termios_1(struct termios __user *u, { return copy_to_user(u, k, sizeof(struct termios)); } +#else /* TCGETS2 */ +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} +#endif /* TCGETS2 */ #endif /* __KERNEL__ */ -#endif /* _ASM_X86_TERMIOS_H */ +#endif /* _ASM_GENERIC_TERMIOS_H */ -- cgit v1.1 From 73a2d096fdf23aa841f7595d114a11ec85a85e4d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jun 2009 21:48:20 +0200 Subject: x86: remove all now-duplicate header files All files that have been made identical to the asm-generic version in the previous patches can now be removed, guaranteeing that this does not introduce semantic changes. Signed-off-by: Arnd Bergmann LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/generic-mman.h | 18 --- arch/x86/include/asm/generic-module.h | 22 ---- arch/x86/include/asm/generic-scatterlist.h | 43 ------- arch/x86/include/asm/generic-types.h | 42 ------ arch/x86/include/asm/generic-ucontext.h | 12 -- arch/x86/include/asm/ioctls.h | 111 +--------------- arch/x86/include/asm/ipcbuf.h | 34 +---- arch/x86/include/asm/mman.h | 2 +- arch/x86/include/asm/module.h | 2 +- arch/x86/include/asm/msgbuf.h | 48 +------ arch/x86/include/asm/param.h | 25 +--- arch/x86/include/asm/scatterlist.h | 2 +- arch/x86/include/asm/shmbuf.h | 60 +-------- arch/x86/include/asm/socket.h | 61 +-------- arch/x86/include/asm/sockios.h | 14 +- arch/x86/include/asm/termbits.h | 199 +---------------------------- arch/x86/include/asm/termios.h | 155 +--------------------- arch/x86/include/asm/types.h | 2 +- arch/x86/include/asm/ucontext.h | 2 +- 19 files changed, 14 insertions(+), 840 deletions(-) delete mode 100644 arch/x86/include/asm/generic-mman.h delete mode 100644 arch/x86/include/asm/generic-module.h delete mode 100644 arch/x86/include/asm/generic-scatterlist.h delete mode 100644 arch/x86/include/asm/generic-types.h delete mode 100644 arch/x86/include/asm/generic-ucontext.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h deleted file mode 100644 index 7cab4de..0000000 --- a/arch/x86/include/asm/generic-mman.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __ASM_GENERIC_MMAN_H -#define __ASM_GENERIC_MMAN_H - -#include - -#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ -#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ -#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MAP_LOCKED 0x2000 /* pages are locked */ -#define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ - -#define MCL_CURRENT 1 /* lock all current mappings */ -#define MCL_FUTURE 2 /* lock all future mappings */ - -#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h deleted file mode 100644 index ed5b44d..0000000 --- a/arch/x86/include/asm/generic-module.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_GENERIC_MODULE_H -#define __ASM_GENERIC_MODULE_H - -/* - * Many architectures just need a simple module - * loader without arch specific data. - */ -struct mod_arch_specific -{ -}; - -#ifdef CONFIG_64BIT -#define Elf_Shdr Elf64_Shdr -#define Elf_Sym Elf64_Sym -#define Elf_Ehdr Elf64_Ehdr -#else -#define Elf_Shdr Elf32_Shdr -#define Elf_Sym Elf32_Sym -#define Elf_Ehdr Elf32_Ehdr -#endif - -#endif /* __ASM_GENERIC_MODULE_H */ diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h deleted file mode 100644 index 8b94544..0000000 --- a/arch/x86/include/asm/generic-scatterlist.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __ASM_GENERIC_SCATTERLIST_H -#define __ASM_GENERIC_SCATTERLIST_H - -#include - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - unsigned int length; - dma_addr_t dma_address; - unsigned int dma_length; -}; - -/* - * These macros should be used after a dma_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#ifndef sg_dma_len -/* - * Normally, you have an iommu on 64 bit machines, but not on 32 bit - * machines. Architectures that are differnt should override this. - */ -#if __BITS_PER_LONG == 64 -#define sg_dma_len(sg) ((sg)->dma_length) -#else -#define sg_dma_len(sg) ((sg)->length) -#endif /* 64 bit */ -#endif /* sg_dma_len */ - -#ifndef ISA_DMA_THRESHOLD -#define ISA_DMA_THRESHOLD (~0UL) -#endif - -#define ARCH_HAS_SG_CHAIN - -#endif /* __ASM_GENERIC_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h deleted file mode 100644 index fba7d33..0000000 --- a/arch/x86/include/asm/generic-types.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _ASM_GENERIC_TYPES_H -#define _ASM_GENERIC_TYPES_H -/* - * int-ll64 is used practically everywhere now, - * so use it as a reasonable default. - */ -#include - -#ifndef __ASSEMBLY__ - -typedef unsigned short umode_t; - -#endif /* __ASSEMBLY__ */ - -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -/* - * DMA addresses may be very different from physical addresses - * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit - * systems, while sparc64 uses 32 bit DMA addresses for 64 bit - * physical addresses. - * This default defines dma_addr_t to have the same size as - * phys_addr_t, which is the most common way. - * Do not define the dma64_addr_t type, which never really - * worked. - */ -#ifndef dma_addr_t -#ifdef CONFIG_PHYS_ADDR_T_64BIT -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif /* CONFIG_PHYS_ADDR_T_64BIT */ -#endif /* dma_addr_t */ - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _ASM_GENERIC_TYPES_H */ diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h deleted file mode 100644 index ad77343..0000000 --- a/arch/x86/include/asm/generic-ucontext.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __ASM_GENERIC_UCONTEXT_H -#define __ASM_GENERIC_UCONTEXT_H - -struct ucontext { - unsigned long uc_flags; - struct ucontext *uc_link; - stack_t uc_stack; - struct sigcontext uc_mcontext; - sigset_t uc_sigmask; /* mask last for extensibility */ -}; - -#endif /* __ASM_GENERIC_UCONTEXT_H */ diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h index a799e20..ec34c76 100644 --- a/arch/x86/include/asm/ioctls.h +++ b/arch/x86/include/asm/ioctls.h @@ -1,110 +1 @@ -#ifndef __ASM_GENERIC_IOCTLS_H -#define __ASM_GENERIC_IOCTLS_H - -#include - -/* - * These are the most common definitions for tty ioctl numbers. - * Most of them do not use the recommended _IOC(), but there is - * probably some source code out there hardcoding the number, - * so we might as well use them for all new platforms. - * - * The architectures that use different values here typically - * try to be compatible with some Unix variants for the same - * architecture. - */ - -/* 0x54 is just a magic number to make these relatively unique ('T') */ - -#define TCGETS 0x5401 -#define TCSETS 0x5402 -#define TCSETSW 0x5403 -#define TCSETSF 0x5404 -#define TCGETA 0x5405 -#define TCSETA 0x5406 -#define TCSETAW 0x5407 -#define TCSETAF 0x5408 -#define TCSBRK 0x5409 -#define TCXONC 0x540A -#define TCFLSH 0x540B -#define TIOCEXCL 0x540C -#define TIOCNXCL 0x540D -#define TIOCSCTTY 0x540E -#define TIOCGPGRP 0x540F -#define TIOCSPGRP 0x5410 -#define TIOCOUTQ 0x5411 -#define TIOCSTI 0x5412 -#define TIOCGWINSZ 0x5413 -#define TIOCSWINSZ 0x5414 -#define TIOCMGET 0x5415 -#define TIOCMBIS 0x5416 -#define TIOCMBIC 0x5417 -#define TIOCMSET 0x5418 -#define TIOCGSOFTCAR 0x5419 -#define TIOCSSOFTCAR 0x541A -#define FIONREAD 0x541B -#define TIOCINQ FIONREAD -#define TIOCLINUX 0x541C -#define TIOCCONS 0x541D -#define TIOCGSERIAL 0x541E -#define TIOCSSERIAL 0x541F -#define TIOCPKT 0x5420 -#define FIONBIO 0x5421 -#define TIOCNOTTY 0x5422 -#define TIOCSETD 0x5423 -#define TIOCGETD 0x5424 -#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -#define TIOCSBRK 0x5427 /* BSD compatibility */ -#define TIOCCBRK 0x5428 /* BSD compatibility */ -#define TIOCGSID 0x5429 /* Return the session ID of FD */ -#define TCGETS2 _IOR('T', 0x2A, struct termios2) -#define TCSETS2 _IOW('T', 0x2B, struct termios2) -#define TCSETSW2 _IOW('T', 0x2C, struct termios2) -#define TCSETSF2 _IOW('T', 0x2D, struct termios2) -#define TIOCGRS485 0x542E -#define TIOCSRS485 0x542F -#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ -#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ -#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ -#define TCSETX 0x5433 -#define TCSETXF 0x5434 -#define TCSETXW 0x5435 - -#define FIONCLEX 0x5450 -#define FIOCLEX 0x5451 -#define FIOASYNC 0x5452 -#define TIOCSERCONFIG 0x5453 -#define TIOCSERGWILD 0x5454 -#define TIOCSERSWILD 0x5455 -#define TIOCGLCKTRMIOS 0x5456 -#define TIOCSLCKTRMIOS 0x5457 -#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ -#define TIOCSERGETLSR 0x5459 /* Get line status register */ -#define TIOCSERGETMULTI 0x545A /* Get multiport config */ -#define TIOCSERSETMULTI 0x545B /* Set multiport config */ - -#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ -#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ - -/* - * some architectures define FIOQSIZE as 0x545E, which is used for - * TIOCGHAYESESP on others - */ -#ifndef FIOQSIZE -# define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ -# define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ -# define FIOQSIZE 0x5460 -#endif - -/* Used for packet mode */ -#define TIOCPKT_DATA 0 -#define TIOCPKT_FLUSHREAD 1 -#define TIOCPKT_FLUSHWRITE 2 -#define TIOCPKT_STOP 4 -#define TIOCPKT_START 8 -#define TIOCPKT_NOSTOP 16 -#define TIOCPKT_DOSTOP 32 - -#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ - -#endif /* __ASM_GENERIC_IOCTLS_H */ +#include diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h index 888ca1c..84c7e51 100644 --- a/arch/x86/include/asm/ipcbuf.h +++ b/arch/x86/include/asm/ipcbuf.h @@ -1,33 +1 @@ -#ifndef __ASM_GENERIC_IPCBUF_H -#define __ASM_GENERIC_IPCBUF_H - -/* - * The generic ipc64_perm structure: - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * ipc64_perm was originally meant to be architecture specific, but - * everyone just ended up making identical copies without specific - * optimizations, so we may just as well all use the same one. - * - * Pad space is left for: - * - 32-bit mode_t on architectures that only had 16 bit - * - 32-bit seq - * - 2 miscellaneous 32-bit values - */ - -struct ipc64_perm { - __kernel_key_t key; - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_uid32_t cuid; - __kernel_gid32_t cgid; - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned short __pad2; - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* __ASM_GENERIC_IPCBUF_H */ +#include diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 063d8c9..593e51d 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -3,6 +3,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ -#include +#include #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 4a7a192..555bc12 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_MODULE_H #define _ASM_X86_MODULE_H -#include +#include #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h index aec850d..809134c 100644 --- a/arch/x86/include/asm/msgbuf.h +++ b/arch/x86/include/asm/msgbuf.h @@ -1,47 +1 @@ -#ifndef __ASM_GENERIC_MSGBUF_H -#define __ASM_GENERIC_MSGBUF_H - -#include -/* - * generic msqid64_ds structure. - * - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * msqid64_ds was originally meant to be architecture specific, but - * everyone just ended up making identical copies without specific - * optimizations, so we may just as well all use the same one. - * - * 64 bit architectures typically define a 64 bit __kernel_time_t, - * so they do not need the first three padding words. - * On big-endian systems, the padding is in the wrong place. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused1; -#endif - __kernel_time_t msg_rtime; /* last msgrcv time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused2; -#endif - __kernel_time_t msg_ctime; /* last change time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused3; -#endif - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused4; - unsigned long __unused5; -}; - -#endif /* __ASM_GENERIC_MSGBUF_H */ +#include diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h index cdf8251..965d454 100644 --- a/arch/x86/include/asm/param.h +++ b/arch/x86/include/asm/param.h @@ -1,24 +1 @@ -#ifndef __ASM_GENERIC_PARAM_H -#define __ASM_GENERIC_PARAM_H - -#ifdef __KERNEL__ -# define HZ CONFIG_HZ /* Internal kernel timer frequency */ -# define USER_HZ 100 /* some user interfaces are */ -# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ -#endif - -#ifndef HZ -#define HZ 100 -#endif - -#ifndef EXEC_PAGESIZE -#define EXEC_PAGESIZE 4096 -#endif - -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - -#endif /* __ASM_GENERIC_PARAM_H */ +#include diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index 2097d68..75af592 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h @@ -3,6 +3,6 @@ #define ISA_DMA_THRESHOLD (0x00ffffff) -#include +#include #endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h index 5768fa6..83c05fc 100644 --- a/arch/x86/include/asm/shmbuf.h +++ b/arch/x86/include/asm/shmbuf.h @@ -1,59 +1 @@ -#ifndef __ASM_GENERIC_SHMBUF_H -#define __ASM_GENERIC_SHMBUF_H - -#include - -/* - * The shmid64_ds structure for x86 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * shmid64_ds was originally meant to be architecture specific, but - * everyone just ended up making identical copies without specific - * optimizations, so we may just as well all use the same one. - * - * 64 bit architectures typically define a 64 bit __kernel_time_t, - * so they do not need the first two padding words. - * On big-endian systems, the padding is in the wrong place. - * - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused1; -#endif - __kernel_time_t shm_dtime; /* last detach time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused2; -#endif - __kernel_time_t shm_ctime; /* last change time */ -#if __BITS_PER_LONG != 64 - unsigned long __unused3; -#endif - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused4; - unsigned long __unused5; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* __ASM_GENERIC_SHMBUF_H */ +#include diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index d4ae42a..6b71384 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h @@ -1,60 +1 @@ -#ifndef __ASM_GENERIC_SOCKET_H -#define __ASM_GENERIC_SOCKET_H - -#include - -/* For setsockopt(2) */ -#define SOL_SOCKET 1 - -#define SO_DEBUG 1 -#define SO_REUSEADDR 2 -#define SO_TYPE 3 -#define SO_ERROR 4 -#define SO_DONTROUTE 5 -#define SO_BROADCAST 6 -#define SO_SNDBUF 7 -#define SO_RCVBUF 8 -#define SO_SNDBUFFORCE 32 -#define SO_RCVBUFFORCE 33 -#define SO_KEEPALIVE 9 -#define SO_OOBINLINE 10 -#define SO_NO_CHECK 11 -#define SO_PRIORITY 12 -#define SO_LINGER 13 -#define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ -#define SO_PASSCRED 16 -#define SO_PEERCRED 17 -#define SO_RCVLOWAT 18 -#define SO_SNDLOWAT 19 -#define SO_RCVTIMEO 20 -#define SO_SNDTIMEO 21 - -/* Security levels - as per NRL IPv6 - don't actually do anything */ -#define SO_SECURITY_AUTHENTICATION 22 -#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 -#define SO_SECURITY_ENCRYPTION_NETWORK 24 - -#define SO_BINDTODEVICE 25 - -/* Socket filtering */ -#define SO_ATTACH_FILTER 26 -#define SO_DETACH_FILTER 27 - -#define SO_PEERNAME 28 -#define SO_TIMESTAMP 29 -#define SCM_TIMESTAMP SO_TIMESTAMP - -#define SO_ACCEPTCONN 30 - -#define SO_PEERSEC 31 -#define SO_PASSSEC 34 -#define SO_TIMESTAMPNS 35 -#define SCM_TIMESTAMPNS SO_TIMESTAMPNS - -#define SO_MARK 36 - -#define SO_TIMESTAMPING 37 -#define SCM_TIMESTAMPING SO_TIMESTAMPING - -#endif /* __ASM_GENERIC_SOCKET_H */ +#include diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h index 9a61a36..def6d47 100644 --- a/arch/x86/include/asm/sockios.h +++ b/arch/x86/include/asm/sockios.h @@ -1,13 +1 @@ -#ifndef __ASM_GENERIC_SOCKIOS_H -#define __ASM_GENERIC_SOCKIOS_H - -/* Socket-level I/O control calls. */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ - -#endif /* __ASM_GENERIC_SOCKIOS_H */ +#include diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h index 1c9773d..3935b10 100644 --- a/arch/x86/include/asm/termbits.h +++ b/arch/x86/include/asm/termbits.h @@ -1,198 +1 @@ -#ifndef __ASM_GENERIC_TERMBITS_H -#define __ASM_GENERIC_TERMBITS_H - -#include - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ -#define IGNBRK 0000001 -#define BRKINT 0000002 -#define IGNPAR 0000004 -#define PARMRK 0000010 -#define INPCK 0000020 -#define ISTRIP 0000040 -#define INLCR 0000100 -#define IGNCR 0000200 -#define ICRNL 0000400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 0010000 -#define IMAXBEL 0020000 -#define IUTF8 0040000 - -/* c_oflag bits */ -#define OPOST 0000001 -#define OLCUC 0000002 -#define ONLCR 0000004 -#define OCRNL 0000010 -#define ONOCR 0000020 -#define ONLRET 0000040 -#define OFILL 0000100 -#define OFDEL 0000200 -#define NLDLY 0000400 -#define NL0 0000000 -#define NL1 0000400 -#define CRDLY 0003000 -#define CR0 0000000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 0000000 -#define TAB1 0004000 -#define TAB2 0010000 -#define TAB3 0014000 -#define XTABS 0014000 -#define BSDLY 0020000 -#define BS0 0000000 -#define BS1 0020000 -#define VTDLY 0040000 -#define VT0 0000000 -#define VT1 0040000 -#define FFDLY 0100000 -#define FF0 0000000 -#define FF1 0100000 - -/* c_cflag bit meaning */ -#define CBAUD 0010017 -#define B0 0000000 /* hang up */ -#define B50 0000001 -#define B75 0000002 -#define B110 0000003 -#define B134 0000004 -#define B150 0000005 -#define B200 0000006 -#define B300 0000007 -#define B600 0000010 -#define B1200 0000011 -#define B1800 0000012 -#define B2400 0000013 -#define B4800 0000014 -#define B9600 0000015 -#define B19200 0000016 -#define B38400 0000017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 0000060 -#define CS5 0000000 -#define CS6 0000020 -#define CS7 0000040 -#define CS8 0000060 -#define CSTOPB 0000100 -#define CREAD 0000200 -#define PARENB 0000400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 0010000 -#define BOTHER 0010000 -#define B57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B500000 0010005 -#define B576000 0010006 -#define B921600 0010007 -#define B1000000 0010010 -#define B1152000 0010011 -#define B1500000 0010012 -#define B2000000 0010013 -#define B2500000 0010014 -#define B3000000 0010015 -#define B3500000 0010016 -#define B4000000 0010017 -#define CIBAUD 002003600000 /* input baud rate */ -#define CMSPAR 010000000000 /* mark or space (stick) parity */ -#define CRTSCTS 020000000000 /* flow control */ - -#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ - -/* c_lflag bits */ -#define ISIG 0000001 -#define ICANON 0000002 -#define XCASE 0000004 -#define ECHO 0000010 -#define ECHOE 0000020 -#define ECHOK 0000040 -#define ECHONL 0000100 -#define NOFLSH 0000200 -#define TOSTOP 0000400 -#define ECHOCTL 0001000 -#define ECHOPRT 0002000 -#define ECHOKE 0004000 -#define FLUSHO 0010000 -#define PENDIN 0040000 -#define IEXTEN 0100000 - -/* tcflow() and TCXONC use these */ -#define TCOOFF 0 -#define TCOON 1 -#define TCIOFF 2 -#define TCION 3 - -/* tcflush() and TCFLSH use these */ -#define TCIFLUSH 0 -#define TCOFLUSH 1 -#define TCIOFLUSH 2 - -/* tcsetattr uses these */ -#define TCSANOW 0 -#define TCSADRAIN 1 -#define TCSAFLUSH 2 - -#endif /* __ASM_GENERIC_TERMBITS_H */ +#include diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index d0922ad..280d78a 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h @@ -1,154 +1 @@ -#ifndef _ASM_GENERIC_TERMIOS_H -#define _ASM_GENERIC_TERMIOS_H -/* - * Most architectures have straight copies of the x86 code, with - * varying levels of bug fixes on top. Usually it's a good idea - * to use this generic version instead, but be careful to avoid - * ABI changes. - * New architectures should not provide their own version. - */ - -#include -#include - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ - -#include - -/* intr=^C quit=^\ erase=del kill=^U - eof=^D vtime=\0 vmin=\1 sxtc=\0 - start=^Q stop=^S susp=^Z eol=\0 - reprint=^R discard=^U werase=^W lnext=^V - eol2=\0 -*/ -#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -static inline int user_termio_to_kernel_termios(struct ktermios *termios, - const struct termio __user *termio) -{ - unsigned short tmp; - - if (get_user(tmp, &termio->c_iflag) < 0) - goto fault; - termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; - - if (get_user(tmp, &termio->c_oflag) < 0) - goto fault; - termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; - - if (get_user(tmp, &termio->c_cflag) < 0) - goto fault; - termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; - - if (get_user(tmp, &termio->c_lflag) < 0) - goto fault; - termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; - - if (get_user(termios->c_line, &termio->c_line) < 0) - goto fault; - - if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) - goto fault; - - return 0; - - fault: - return -EFAULT; -} - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -static inline int kernel_termios_to_user_termio(struct termio __user *termio, - struct ktermios *termios) -{ - if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || - put_user(termios->c_oflag, &termio->c_oflag) < 0 || - put_user(termios->c_cflag, &termio->c_cflag) < 0 || - put_user(termios->c_lflag, &termio->c_lflag) < 0 || - put_user(termios->c_line, &termio->c_line) < 0 || - copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) - return -EFAULT; - - return 0; -} - -#ifdef TCGETS2 -static inline int user_termios_to_kernel_termios(struct ktermios *k, - struct termios2 __user *u) -{ - return copy_from_user(k, u, sizeof(struct termios2)); -} - -static inline int kernel_termios_to_user_termios(struct termios2 __user *u, - struct ktermios *k) -{ - return copy_to_user(u, k, sizeof(struct termios2)); -} - -static inline int user_termios_to_kernel_termios_1(struct ktermios *k, - struct termios __user *u) -{ - return copy_from_user(k, u, sizeof(struct termios)); -} - -static inline int kernel_termios_to_user_termios_1(struct termios __user *u, - struct ktermios *k) -{ - return copy_to_user(u, k, sizeof(struct termios)); -} -#else /* TCGETS2 */ -static inline int user_termios_to_kernel_termios(struct ktermios *k, - struct termios __user *u) -{ - return copy_from_user(k, u, sizeof(struct termios)); -} - -static inline int kernel_termios_to_user_termios(struct termios __user *u, - struct ktermios *k) -{ - return copy_to_user(u, k, sizeof(struct termios)); -} -#endif /* TCGETS2 */ - -#endif /* __KERNEL__ */ - -#endif /* _ASM_GENERIC_TERMIOS_H */ +#include diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index f2fe528..df1da20 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h @@ -3,7 +3,7 @@ #define dma_addr_t dma_addr_t -#include +#include #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h index 7cfc436..b7c29c8 100644 --- a/arch/x86/include/asm/ucontext.h +++ b/arch/x86/include/asm/ucontext.h @@ -7,6 +7,6 @@ * sigcontext struct (uc_mcontext). */ -#include +#include #endif /* _ASM_X86_UCONTEXT_H */ -- cgit v1.1 From feaa0457ec8351cae855edc9a3052ac49322538e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 20 Jun 2009 16:15:40 +0530 Subject: x86: ds.c fix invalid assignment Fixes the type mixups that cause the following sparse warnings: CHECK arch/x86/kernel/ds.c arch/x86/kernel/ds.c:549:19: warning: incorrect type in argument 2 (invalid types) arch/x86/kernel/ds.c:549:19: expected bad type enum bts_field field arch/x86/kernel/ds.c:549:19: got int arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:514:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:514:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:514:7: error: invalid assignment arch/x86/kernel/ds.c:520:35: error: incompatible types for operation (*) arch/x86/kernel/ds.c:520:35: left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field arch/x86/kernel/ds.c:520:35: right side has type bad type enum bts_field field arch/x86/kernel/ds.c:520:7: error: invalid assignment arch/x86/kernel/ds.c:520:35: error: incompatible types for operation (*) Signed-off-by: Jaswinder Singh Rajput Cc: Markus Metzger LKML-Reference: <1245494740.8613.12.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 48bfe13..ef42a03 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -509,15 +509,15 @@ enum bts_field { bts_escape = ((unsigned long)-1 & ~bts_qual_mask) }; -static inline unsigned long bts_get(const char *base, enum bts_field field) +static inline unsigned long bts_get(const char *base, unsigned long field) { base += (ds_cfg.sizeof_ptr_field * field); return *(unsigned long *)base; } -static inline void bts_set(char *base, enum bts_field field, unsigned long val) +static inline void bts_set(char *base, unsigned long field, unsigned long val) { - base += (ds_cfg.sizeof_ptr_field * field);; + base += (ds_cfg.sizeof_ptr_field * field); (*(unsigned long *)base) = val; } -- cgit v1.1 From e487683990972bf9aa4e688434c46ead76748bca Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 20 Jun 2009 23:27:16 -0700 Subject: x86, mce: fix typo in comment in asm/mce.h Fix comment to match the actual declaration. Signed-off-by: Borislav Petkov Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 5cdd8d1..b50b9e9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -9,7 +9,7 @@ */ #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ -- cgit v1.1 From a95436e44a76a32dcbe7c8df59701ddde53017c1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 20 Jun 2009 23:28:22 -0700 Subject: x86, mce: use atomic_inc_return() instead of add by 1 Use atomic_inc_return() instead of atomic_add_return() by 1. Signed-off-by: Borislav Petkov Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 284d1de..7da8fec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -242,7 +242,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) /* * Make sure only one CPU runs in machine check panic */ - if (atomic_add_return(1, &mce_paniced) > 1) + if (atomic_inc_return(&mce_paniced) > 1) wait_for_panic(); barrier(); @@ -705,7 +705,7 @@ static int mce_start(int *no_way_out) * global_nwo should be updated before mce_callin */ smp_wmb(); - order = atomic_add_return(1, &mce_callin); + order = atomic_inc_return(&mce_callin); /* * Wait for everyone. -- cgit v1.1 From c9944881acf02b6f25fa62a0441a98b7dc0d7ae6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 24 Jun 2009 13:42:40 +0800 Subject: crypto: aes-ni - Don't print message with KERN_ERR on old system When the aes-intel module is loaded on a system that does not have the AES instructions, it prints Intel AES-NI instructions are not detected. at level KERN_ERR. Since aes-intel is aliased to "aes" it will be tried whenever anything uses AES and spam the console. This doesn't match existing practice for how to handle "no hardware" when initializing a module, so downgrade the message to KERN_INFO. Signed-off-by: Roland Dreier Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c580c5e..d3ec8d5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -636,7 +636,7 @@ static int __init aesni_init(void) int err; if (!cpu_has_aes) { - printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); + printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); return -ENODEV; } if ((err = crypto_register_alg(&aesni_alg))) -- cgit v1.1 From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- arch/x86/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef..a48a900 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y -config HAVE_DYNAMIC_PER_CPU_AREA - def_bool y - config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP -- cgit v1.1 From 204fba4aa303ea4a7bb726a539bf4a5b9e3203d0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:45 +0900 Subject: percpu: cleanup percpu array definitions Currently, the following three different ways to define percpu arrays are in use. 1. DEFINE_PER_CPU(elem_type[array_len], array_name); 2. DEFINE_PER_CPU(elem_type, array_name[array_len]); 3. DEFINE_PER_CPU(elem_type, array_name)[array_len]; Unify to #1 which correctly separates the roles of the two parameters and thus allows more flexibility in the way percpu variables are defined. [ Impact: cleanup ] Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Ingo Molnar Cc: Tony Luck Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Jeremy Fitzhardinge Cc: linux-mm@kvack.org Cc: Christoph Lameter Cc: David S. Miller --- arch/x86/kernel/cpu/cpu_debug.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- arch/x86/kernel/cpu/perf_counter.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 6b2a52d..dca325c 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -30,8 +30,8 @@ #include #include -static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); -static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); +static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); +static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); static DEFINE_PER_CPU(int, cpu_priv_count); static DEFINE_MUTEX(cpu_debug_lock); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae216..bd2a2fa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,7 @@ struct threshold_bank { struct threshold_block *blocks; cpumask_var_t cpus; }; -static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); +static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); #ifdef CONFIG_SMP static unsigned char shared_bank[NR_BANKS] = { diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 76dfef2..4946288 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -862,7 +862,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) x86_pmu_disable_counter(hwc, idx); } -static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. -- cgit v1.1 From 245b2e70eabd797932adb263a65da0bab3711753 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:48 +0900 Subject: percpu: clean up percpu variable definitions Percpu variable definition is about to be updated such that all percpu symbols including the static ones must be unique. Update percpu variable definitions accordingly. * as,cfq: rename ioc_count uniquely * cpufreq: rename cpu_dbs_info uniquely * xen: move nesting_count out of xen_evtchn_do_upcall() and rename it * mm: move ratelimits out of balance_dirty_pages_ratelimited_nr() and rename it * ipv4,6: rename cookie_scratch uniquely * x86 perf_counter: rename prev_left to pmc_prev_left, irq_entry to pmc_irq_entry and nmi_entry to pmc_nmi_entry * perf_counter: rename disable_count to perf_disable_count * ftrace: rename test_event_disable to ftrace_test_event_disable * kmemleak: rename test_pointer to kmemleak_test_pointer * mce: rename next_interval to mce_next_interval [ Impact: percpu usage cleanups, no duplicate static percpu var names ] Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Ivan Kokshaysky Cc: Jens Axboe Cc: Dave Jones Cc: Jeremy Fitzhardinge Cc: linux-mm Cc: David S. Miller Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Li Zefan Cc: Catalin Marinas Cc: Andi Kleen --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- arch/x86/kernel/cpu/perf_counter.c | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 284d1de..cba8cd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1091,7 +1091,7 @@ void mce_log_therm_throt_event(__u64 status) */ static int check_interval = 5 * 60; /* 5 minutes */ -static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ +static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); static void mcheck_timer(unsigned long data) @@ -1110,7 +1110,7 @@ static void mcheck_timer(unsigned long data) * Alert userspace if needed. If we logged an MCE, reduce the * polling interval, otherwise increase the polling interval. */ - n = &__get_cpu_var(next_interval); + n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) *n = max(*n/2, HZ/100); else @@ -1311,7 +1311,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) static void mce_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(next_interval); + int *n = &__get_cpu_var(mce_next_interval); if (mce_ignore_ce) return; @@ -1914,7 +1914,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: t->expires = round_jiffies(jiffies + - __get_cpu_var(next_interval)); + __get_cpu_var(mce_next_interval)); add_timer_on(t, cpu); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 4946288..5fdf63a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -862,7 +862,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) x86_pmu_disable_counter(hwc, idx); } -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], prev_left); +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -901,7 +901,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, if (left > x86_pmu.max_period) left = x86_pmu.max_period; - per_cpu(prev_left[idx], smp_processor_id()) = left; + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* * The hw counter starts counting from this counter offset, @@ -1089,7 +1089,7 @@ void perf_counter_print_debug(void) rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); - prev_left = per_cpu(prev_left[idx], cpu); + prev_left = per_cpu(pmc_prev_left[idx], cpu); pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); @@ -1561,8 +1561,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) entry->ip[entry->nr++] = ip; } -static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); static void @@ -1709,9 +1709,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) struct perf_callchain_entry *entry; if (in_nmi()) - entry = &__get_cpu_var(nmi_entry); + entry = &__get_cpu_var(pmc_nmi_entry); else - entry = &__get_cpu_var(irq_entry); + entry = &__get_cpu_var(pmc_irq_entry); entry->nr = 0; -- cgit v1.1 From 2495fbf7effa6868f5d74124ae9b22a57980755b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 26 Jun 2009 10:53:57 -0700 Subject: x86, setup: remove obsolete pre-Kconfig CONFIG_VIDEO_ variables There were a set of pre-Kconfig configuration variables defined in the video code. There is absolutely no evidence that they have been tweaked by anybody in modern history, so just get rid of them and hope nobody notices. If someone does complain, these should be made real Kconfig variables. Reported-by: Robert P. J. Day Signed-off-by: H. Peter Anvin --- arch/x86/boot/video-vesa.c | 7 +------ arch/x86/boot/video-vga.c | 10 ---------- arch/x86/boot/video.c | 5 ----- arch/x86/boot/video.h | 20 ++------------------ 4 files changed, 3 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index c700147..d7ef26b 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -31,7 +31,6 @@ static inline void vesa_store_mode_params_graphics(void) {} static int vesa_probe(void) { -#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) struct biosregs ireg, oreg; u16 mode; addr_t mode_ptr; @@ -49,8 +48,7 @@ static int vesa_probe(void) vginfo.signature != VESA_MAGIC || vginfo.version < 0x0102) return 0; /* Not present */ -#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */ -#ifdef CONFIG_VIDEO_VESA + set_fs(vginfo.video_mode_ptr.seg); mode_ptr = vginfo.video_mode_ptr.off; @@ -102,9 +100,6 @@ static int vesa_probe(void) } return nmodes; -#else - return 0; -#endif /* CONFIG_VIDEO_VESA */ } static int vesa_set_mode(struct mode_info *mode) diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 8f8d827..819caa1 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -47,14 +47,6 @@ static u8 vga_set_basic_mode(void) initregs(&ireg); -#ifdef CONFIG_VIDEO_400_HACK - if (adapter >= ADAPTER_VGA) { - ireg.ax = 0x1202; - ireg.bx = 0x0030; - intcall(0x10, &ireg, NULL); - } -#endif - ax = 0x0f00; intcall(0x10, &ireg, &oreg); mode = oreg.al; @@ -62,11 +54,9 @@ static u8 vga_set_basic_mode(void) set_fs(0); rows = rdfs8(0x484); /* rows minus one */ -#ifndef CONFIG_VIDEO_400_HACK if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && (rows == 0 || rows == 24)) return mode; -#endif if (mode != 3 && mode != 7) mode = 3; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index bad728b..d42da38 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -221,7 +221,6 @@ static unsigned int mode_menu(void) } } -#ifdef CONFIG_VIDEO_RETAIN /* Save screen content to the heap */ static struct saved_screen { int x, y; @@ -299,10 +298,6 @@ static void restore_screen(void) ireg.dl = saved.curx; intcall(0x10, &ireg, NULL); } -#else -#define save_screen() ((void)0) -#define restore_screen() ((void)0) -#endif void set_video(void) { diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index 5bb174a..ff339c5 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -17,19 +17,8 @@ #include -/* Enable autodetection of SVGA adapters and modes. */ -#undef CONFIG_VIDEO_SVGA - -/* Enable autodetection of VESA modes */ -#define CONFIG_VIDEO_VESA - -/* Retain screen contents when switching modes */ -#define CONFIG_VIDEO_RETAIN - -/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */ -#undef CONFIG_VIDEO_400_HACK - -/* This code uses an extended set of video mode numbers. These include: +/* + * This code uses an extended set of video mode numbers. These include: * Aliases for standard modes * NORMAL_VGA (-1) * EXTENDED_VGA (-2) @@ -67,13 +56,8 @@ /* The "recalculate timings" flag */ #define VIDEO_RECALC 0x8000 -/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ -#ifdef CONFIG_VIDEO_RETAIN void store_screen(void); #define DO_STORE() store_screen() -#else -#define DO_STORE() ((void)0) -#endif /* CONFIG_VIDEO_RETAIN */ /* * Mode table structures -- cgit v1.1 From 087975b06b00af9bf888fab6f94ae113c5cd80bd Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 27 Jun 2009 15:35:15 +0900 Subject: x86: Clean up dump_pagetable() Use pgtable access helpers for 32-bit version dump_pagetable() and get rid of __typeof__() operators. This needs to make pmd_pfn() available for 2-level pgtable. Also, remove some casts for 64-bit version dump_pagetable(). Signed-off-by: Akinobu Mita LKML-Reference: <20090627063514.GA2834@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 10 ++++----- arch/x86/mm/fault.c | 51 +++++++++++++++++------------------------- 2 files changed, 26 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3cc06e3..af5481e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -134,6 +134,11 @@ static inline unsigned long pte_pfn(pte_t pte) return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) @@ -422,11 +427,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; -} - static inline int pud_large(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 78a5fff..9bf7e52 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address, tsk->thread.screen_bitmap |= 1 << bit; } -static void dump_pagetable(unsigned long address) +static bool low_pfn(unsigned long pfn) { - __typeof__(pte_val(__pte(0))) page; + return pfn < max_low_pfn; +} - page = read_cr3(); - page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; +static void dump_pagetable(unsigned long address) +{ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(address)]; + pmd_t *pmd; + pte_t *pte; #ifdef CONFIG_X86_PAE - printk("*pdpt = %016Lx ", page); - if ((page >> PAGE_SHIFT) < max_low_pfn - && page & _PAGE_PRESENT) { - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; - printk(KERN_CONT "*pde = %016Lx ", page); - page &= ~_PAGE_NX; - } -#else - printk("*pde = %08lx ", page); + printk("*pdpt = %016Lx ", pgd_val(*pgd)); + if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) + goto out; #endif + pmd = pmd_offset(pud_offset(pgd, address), address); + printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); /* * We must not directly access the pte in the highpte @@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address) * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ - if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT) - && !(page & _PAGE_PSE)) { - - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; - printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); - } + if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) + goto out; + pte = pte_offset_kernel(pmd, address); + printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); +out: printk("\n"); } @@ -449,16 +444,12 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long address) { - pgd_t *pgd; + pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd_t *pgd = base + pgd_index(address); pud_t *pud; pmd_t *pmd; pte_t *pte; - pgd = (pgd_t *)read_cr3(); - - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); - - pgd += pgd_index(address); if (bad_address(pgd)) goto bad; -- cgit v1.1 From ce0c0f9eec2f377055e8b23c6fa192202381e022 Mon Sep 17 00:00:00 2001 From: "Figo.zhang" Date: Sun, 28 Jun 2009 18:07:39 +0800 Subject: x86, pgtable.h: Clean up types Use "unsigned long" consistently, not "unsigned". Signed-off-by: Figo.zhang LKML-Reference: <1246183659.2530.4.camel@myhost> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af5481e..9de8729 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -356,7 +356,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * this macro returns the index of the entry in the pmd page which would * control the given virtual address */ -static inline unsigned pmd_index(unsigned long address) +static inline unsigned long pmd_index(unsigned long address) { return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } @@ -376,7 +376,7 @@ static inline unsigned pmd_index(unsigned long address) * this function returns the index of the entry in the pte page which would * control the given virtual address */ -static inline unsigned pte_index(unsigned long address) +static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } @@ -462,7 +462,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) #define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) /* to find an entry in a page-table-directory. */ -static inline unsigned pud_index(unsigned long address) +static inline unsigned long pud_index(unsigned long address) { return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); } -- cgit v1.1 From 565b0c1f100408ccbcb04ba458a14da454cb271d Mon Sep 17 00:00:00 2001 From: "Figo.zhang" Date: Mon, 29 Jun 2009 12:02:55 +0800 Subject: x86, highmem_32.c: Clean up comment Signed-off-by: Figo.zhang Cc: Andrew Morton LKML-Reference: <1246248175.5759.12.camel@myhost> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 58f621e..0c6f43c 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -24,7 +24,7 @@ void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) -- cgit v1.1 From a4a874a906ae69c35df4b712fadbc35b15665355 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Thu, 18 Jun 2009 07:05:46 +0800 Subject: kmemcheck: remove duplicated #include Remove duplicated #include in arch/x86/mm/kmemcheck/shadow.c. Signed-off-by: Huang Weiyi Acked-by: Pekka Enberg Signed-off-by: Vegard Nossum --- arch/x86/mm/kmemcheck/shadow.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index e773b6b..3f66b82 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include -- cgit v1.1 From 414f3251aa1b4cbd1e070866971eabc004a7dc20 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 22 Jun 2009 14:31:53 +0200 Subject: kmemcheck: remove useless check This check is a left-over from ancient times. We now have the equivalent check much earlier in both the page fault handler and the debug trap handler (the calls to kmemcheck_active()). Signed-off-by: Vegard Nossum --- arch/x86/mm/kmemcheck/kmemcheck.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 2c55ed0..5b99004 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs) BUG_ON(!irqs_disabled()); - if (data->balance == 0) - return; - if (unlikely(data->balance != 1)) { kmemcheck_show_all(); kmemcheck_error_save_bug(regs); -- cgit v1.1 From 788e5abc5441e9046dd91c995c6f1f75bbd144bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:58 +0900 Subject: percpu: drop @unit_size from embed first chunk allocator The only extra feature @unit_size provides is making dead space at the end of the first chunk which doesn't have any valid usecase. Drop the parameter. This will increase consistency with generalized 4k allocator. James Bottomley spotted missing conversion for the default setup_per_cpu_areas() which caused build breakage on all arcsh which use it. [ Impact: drop unused code path ] Signed-off-by: Tejun Heo Cc: James Bottomley Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef..1472820 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -342,7 +342,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) return -EINVAL; return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); + reserve - PERCPU_FIRST_CHUNK_RESERVE); } /* -- cgit v1.1 From d4b95f80399471e4bce5e992700ff7f06ef91f6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize 4k first chunk allocator Generalize and move x86 setup_pcpu_4k() into pcpu_4k_first_chunk(). setup_pcpu_4k() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free memory, pcpu_4k_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, s/pcpu_populate_pte_fn_t/pcpu_fc_populate_pte_fn_t/ for consistency. [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 78 ++++++++++-------------------------------- 1 file changed, 19 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1472820..ab896b3 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -124,6 +124,19 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } /* + * Helpers for first chunk memory allocation + */ +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +{ + return pcpu_alloc_bootmem(cpu, size, size); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + +/* * Large page remap allocator * * This allocator uses PMD page as unit. A PMD page is allocated for @@ -346,22 +359,11 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k page allocator + * 4k allocator * - * This is the basic allocator. Static percpu area is allocated - * page-by-page and most of initialization is done by the generic - * setup function. + * Boring fallback 4k allocator. This allocator puts more pressure on + * PTE TLBs but other than that behaves nicely on both UMA and NUMA. */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_nr_static_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_nr_static_pages) - return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; - return NULL; -} - static void __init pcpu4k_populate_pte(unsigned long addr) { populate_extra_pte(addr); @@ -369,51 +371,9 @@ static void __init pcpu4k_populate_pte(unsigned long addr) static ssize_t __init setup_pcpu_4k(size_t static_size) { - size_t pages_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - pcpu4k_nr_static_pages = PFN_UP(static_size); - - /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() - * sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); - - /* allocate and copy */ - j = 0; - for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_nr_static_pages; i++) { - void *ptr; - - ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); - goto enomem; - } - - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pcpu4k_pages[j++] = virt_to_page(ptr); - } - - /* we're ready, commit */ - pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", - pcpu4k_nr_static_pages, static_size); - - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, - -1, NULL, pcpu4k_populate_pte); - goto out_free_ar; - -enomem: - while (--j >= 0) - free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); - return ret; + return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpu4k_populate_pte); } /* for explicit first chunk allocator selection */ -- cgit v1.1 From 8c4bfc6e8801616ab2e01c38140b2159b388d2ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize lpage first chunk allocator Generalize and move x86 setup_pcpu_lpage() into pcpu_lpage_first_chunk(). setup_pcpu_lpage() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free/map memory, pcpu_lpage_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, factor out pcpu_calc_fc_sizes() which is common to pcpu_embed_first_chunk() and pcpu_lpage_first_chunk(). [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/include/asm/percpu.h | 9 --- arch/x86/kernel/setup_percpu.c | 169 +++-------------------------------------- arch/x86/mm/pageattr.c | 1 + 3 files changed, 12 insertions(+), 167 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1dd..a18c038 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -156,15 +156,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_NEED_MULTIPLE_NODES -void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ab896b3..4f2e0ac 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size) } /* - * Large page remap allocator - * - * This allocator uses PMD page as unit. A PMD page is allocated for - * each cpu and each is remapped into vmalloc area using PMD mapping. - * As PMD page is quite large, only part of it is used for the first - * chunk. Unused part is returned to the bootmem allocator. - * - * So, the PMD pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more PMD TLB entry pressure but still is much - * better than only using 4k mappings while still being NUMA friendly. + * Large page remapping allocator */ #ifdef CONFIG_NEED_MULTIPLE_NODES -struct pcpul_ent { - unsigned int cpu; - void *ptr; -}; - -static size_t pcpul_size; -static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; - -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +static void __init pcpul_map(void *ptr, size_t size, void *addr) { - size_t off = (size_t)pageno << PAGE_SHIFT; + pmd_t *pmd, pmd_v; - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); + pmd = populate_extra_pmd((unsigned long)addr); + pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); + set_pmd(pmd, pmd_v); } static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { - size_t map_size, dyn_size; - unsigned int cpu; - int i, j; - ssize_t ret; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; @@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -EINVAL; } - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - if (pcpul_size > PMD_SIZE) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } - dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); - pcpul_map = alloc_bootmem(map_size); - - for_each_possible_cpu(cpu) { - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, - PMD_SIZE); - if (!pcpul_map[cpu].ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The 2MB up-rounding bootmem is needed to make - * sure the partial 2MB page is still fully RAM - it's - * not well-specified to have a PAT-incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), - PMD_SIZE - pcpul_size); - - memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); - } - - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&pcpul_vm, PMD_SIZE); - - for_each_possible_cpu(cpu) { - pmd_t *pmd, pmd_v; - - pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + - cpu * PMD_SIZE); - pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), - PAGE_KERNEL_LARGE); - set_pmd(pmd, pmd_v); - } - - /* we're ready, commit */ - pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); - - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - return ret; - -enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; -} - -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu PMD - * mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used PMD - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) -{ - void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); - unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = num_possible_cpus() - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < pmd_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > pmd_addr) - right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * PMD_SIZE + offset; - } - } - - return NULL; + return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b734d7..c106f78 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From a530b7958612bafe2027e21359083dba84f0b3b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: teach large page allocator about NUMA Large page first chunk allocator is primarily used for NUMA machines; however, its NUMA handling is extremely simplistic. Regardless of their proximity, each cpu is put into separate large page just to return most of the allocated space back wasting large amount of vmalloc space and increasing cache footprint. This patch teachs NUMA details to large page allocator. Given processor proximity information, pcpu_lpage_build_unit_map() will find fitting cpu -> unit mapping in which cpus in LOCAL_DISTANCE share the same large page and not too much virtual address space is wasted. This greatly reduces the unit and thus chunk size and wastes much less address space for the first chunk. For example, on 4/4 NUMA machine, the original code occupied 16MB of virtual space for the first chunk while the new code only uses 4MB - one 2MB page for each node. [ Impact: much better space efficiency on NUMA machines ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Jan Beulich Cc: Andi Kleen Cc: David Miller --- arch/x86/kernel/setup_percpu.c | 72 ++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4f2e0ac..7501bb1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -149,36 +149,73 @@ static void __init pcpul_map(void *ptr, size_t size, void *addr) set_pmd(pmd, pmd_v); } +static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; + size_t unit_map_size, unit_size; + int *unit_map; + int nr_units; + ssize_t ret; + + /* on non-NUMA, embedding is better */ + if (!chosen && !pcpu_need_numa()) + return -EINVAL; + + /* need PSE */ + if (!cpu_has_pse) { + pr_warning("PERCPU: lpage allocator requires PSE\n"); + return -EINVAL; + } + /* allocate and build unit_map */ + unit_map_size = num_possible_cpus() * sizeof(int); + unit_map = alloc_bootmem_nopanic(unit_map_size); + if (!unit_map) { + pr_warning("PERCPU: failed to allocate unit_map\n"); + return -ENOMEM; + } + + ret = pcpu_lpage_build_unit_map(static_size, + PERCPU_FIRST_CHUNK_RESERVE, + &dyn_size, &unit_size, PMD_SIZE, + unit_map, pcpu_lpage_cpu_distance); + if (ret < 0) { + pr_warning("PERCPU: failed to build unit_map\n"); + goto out_free; + } + nr_units = ret; + + /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = num_possible_cpus() * PMD_SIZE; - - /* on non-NUMA, embedding is better */ - if (!pcpu_need_numa()) - return -EINVAL; + size_t tot_size = nr_units * unit_size; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - return -EINVAL; + ret = -EINVAL; + goto out_free; } } - /* need PSE */ - if (!cpu_has_pse) { - pr_warning("PERCPU: lpage allocator requires PSE\n"); - return -EINVAL; - } - - return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + dyn_size, unit_size, PMD_SIZE, + unit_map, nr_units, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); +out_free: + if (ret < 0) + free_bootmem(__pa(unit_map), unit_map_size); + return ret; } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) @@ -299,7 +336,8 @@ void __init setup_per_cpu_areas(void) /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; + per_cpu_offset(cpu) = + delta + pcpu_unit_map[cpu] * pcpu_unit_size; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); -- cgit v1.1 From 42204455f160dab0c47f19e1be23f5c927af2d17 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:50:00 +0530 Subject: x86: Clean up mtrr/amd.c: Fix trivial style problems : ERROR: trailing whitespace WARNING: line over 80 characters ERROR: do not use C99 // comments arch/x86/kernel/cpu/mtrr/amd.o: text data bss dec hex filename 501 32 0 533 215 amd.o.before 501 32 0 533 215 amd.o.after md5: 62f795eb840ee2d17b03df89e789e76c amd.o.before.asm 62f795eb840ee2d17b03df89e789e76c amd.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ Also restructured comments to be standard, removed stray return, converted function description to DocBook style, etc. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/amd.c | 97 ++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index ee2331b..33af141 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -7,15 +7,15 @@ static void amd_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) + unsigned long *size, mtrr_type *type) { unsigned long low, high; rdmsr(MSR_K6_UWCCR, low, high); - /* Upper dword is region 1, lower is region 0 */ + /* Upper dword is region 1, lower is region 0 */ if (reg == 1) low = high; - /* The base masks off on the right alignment */ + /* The base masks off on the right alignment */ *base = (low & 0xFFFE0000) >> PAGE_SHIFT; *type = 0; if (low & 1) @@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base, return; } /* - * This needs a little explaining. The size is stored as an - * inverted mask of bits of 128K granularity 15 bits long offset - * 2 bits + * This needs a little explaining. The size is stored as an + * inverted mask of bits of 128K granularity 15 bits long offset + * 2 bits. * - * So to get a size we do invert the mask and add 1 to the lowest - * mask bit (4 as its 2 bits in). This gives us a size we then shift - * to turn into 128K blocks + * So to get a size we do invert the mask and add 1 to the lowest + * mask bit (4 as its 2 bits in). This gives us a size we then shift + * to turn into 128K blocks. * - * eg 111 1111 1111 1100 is 512K + * eg 111 1111 1111 1100 is 512K * - * invert 000 0000 0000 0011 - * +1 000 0000 0000 0100 - * *128K ... + * invert 000 0000 0000 0011 + * +1 000 0000 0000 0100 + * *128K ... */ low = (~low) & 0x1FFFC; *size = (low + 4) << (15 - PAGE_SHIFT); - return; } -static void amd_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - [RETURNS] Nothing. -*/ +/** + * amd_set_mtrr - Set variable MTRR register on the local CPU. + * + * @reg The register to set. + * @base The base address of the region. + * @size The size of the region. If this is 0 the region is disabled. + * @type The type of the region. + * + * Returns nothing. + */ +static void +amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { u32 regs[2]; /* - * Low is MTRR0 , High MTRR 1 + * Low is MTRR0, High MTRR 1 */ rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); /* - * Blank to disable + * Blank to disable */ - if (size == 0) + if (size == 0) { regs[reg] = 0; - else - /* Set the register to the base, the type (off by one) and an - inverted bitmask of the size The size is the only odd - bit. We are fed say 512K We invert this and we get 111 1111 - 1111 1011 but if you subtract one and invert you get the - desired 111 1111 1111 1100 mask - - But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ + } else { + /* + * Set the register to the base, the type (off by one) and an + * inverted bitmask of the size The size is the only odd + * bit. We are fed say 512K We invert this and we get 111 1111 + * 1111 1011 but if you subtract one and invert you get the + * desired 111 1111 1111 1100 mask + * + * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! + */ regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) | (base << PAGE_SHIFT) | (type + 1); + } /* - * The writeback rule is quite specific. See the manual. Its - * disable local interrupts, write back the cache, set the mtrr + * The writeback rule is quite specific. See the manual. Its + * disable local interrupts, write back the cache, set the mtrr */ wbinvd(); wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); } -static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +static int +amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) { - /* Apply the K6 block alignment and size rules - In order - o Uncached or gathering only - o 128K or bigger block - o Power of 2 block - o base suitably aligned to the power - */ + /* + * Apply the K6 block alignment and size rules + * In order + * o Uncached or gathering only + * o 128K or bigger block + * o Power of 2 block + * o base suitably aligned to the power + */ if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) || (size & ~(size - 1)) - size || (base & (size - 1))) return -EINVAL; @@ -115,5 +122,3 @@ int __init amd_init_mtrr(void) set_mtrr_ops(&amd_mtrr_ops); return 0; } - -//arch_initcall(amd_mtrr_init); -- cgit v1.1 From 6c4caa1ab737502190e416b76e6c10d2bf24276a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:50:44 +0530 Subject: x86: Clean up mtrr/centaur.c Remove dead code and fix trivial style problems: ERROR: trailing whitespace X 2 WARNING: line over 80 characters X 3 ROR: trailing whitespace ERROR: do not use C99 // comments X 2 arch/x86/kernel/cpu/mtrr/centaur.o: text data bss dec hex filename 605 32 68 705 2c1 centaur.o.before 605 32 68 705 2c1 centaur.o.after md5: a4865ea98ce3c163bb1d376a3949b3e3 centaur.o.before.asm a4865ea98ce3c163bb1d376a3949b3e3 centaur.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ Standardized comments, DocBook, curly braces, newlines. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/centaur.c | 168 ++++++++----------------------------- 1 file changed, 35 insertions(+), 133 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index cb9aa3a..de89f14 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -1,7 +1,9 @@ #include #include + #include #include + #include "mtrr.h" static struct { @@ -12,25 +14,25 @@ static struct { static u8 centaur_mcr_reserved; static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ -/* - * Report boot time MCR setups +/** + * centaur_get_free_region - Get a free MTRR. + * + * @base: The starting (base) address of the region. + * @size: The size (in bytes) of the region. + * + * Returns: the index of the region on success, else -1 on error. */ - static int centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ { - int i, max; - mtrr_type ltype; unsigned long lbase, lsize; + mtrr_type ltype; + int i, max; max = num_var_ranges; if (replace_reg >= 0 && replace_reg < max) return replace_reg; + for (i = 0; i < max; ++i) { if (centaur_mcr_reserved & (1 << i)) continue; @@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) if (lsize == 0) return i; } + return -ENOSPC; } -void -mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +/* + * Report boot time MCR setups + */ +void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { centaur_mcr[mcr].low = lo; centaur_mcr[mcr].high = hi; @@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base, { *base = centaur_mcr[reg].high >> PAGE_SHIFT; *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; - *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ + *type = MTRR_TYPE_WRCOMB; /* write-combining */ + if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) *type = MTRR_TYPE_UNCACHABLE; if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) *type = MTRR_TYPE_WRBACK; if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) *type = MTRR_TYPE_WRBACK; - } -static void centaur_set_mcr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) +static void +centaur_set_mcr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) { unsigned long low, high; if (size == 0) { - /* Disable */ + /* Disable */ high = low = 0; } else { high = base << PAGE_SHIFT; - if (centaur_mcr_type == 0) - low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ - else { + if (centaur_mcr_type == 0) { + /* Only support write-combining... */ + low = -size << PAGE_SHIFT | 0x1f; + } else { if (type == MTRR_TYPE_UNCACHABLE) - low = -size << PAGE_SHIFT | 0x02; /* NC */ + low = -size << PAGE_SHIFT | 0x02; /* NC */ else - low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ + low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */ } } centaur_mcr[reg].high = high; @@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base, wrmsr(MSR_IDT_MCR0 + reg, low, high); } -#if 0 -/* - * Initialise the later (saner) Winchip MCR variant. In this version - * the BIOS can pass us the registers it has used (but not their values) - * and the control register is read/write - */ - -static void __init -centaur_mcr1_init(void) -{ - unsigned i; - u32 lo, hi; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ - lo &= ~0x1C0; /* clear key */ - lo |= 0x040; /* set key to 1 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ - } - - centaur_mcr_type = 1; - - /* - * Clear any unconfigured MCR's. - */ - - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { - if (!(lo & (1 << (9 + i)))) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - else - /* - * If the BIOS set up an MCR we cannot see it - * but we don't wish to obliterate it - */ - centaur_mcr_reserved |= (1 << i); - } - } - /* - * Throw the main write-combining switch... - * However if OOSTORE is enabled then people have already done far - * cleverer things and we should behave. - */ - - lo |= 15; /* Write combine enables */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -/* - * Initialise the original winchip with read only MCR registers - * no used bitmask for the BIOS to pass on and write only control - */ - -static void __init -centaur_mcr0_init(void) -{ - unsigned i; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - /* Clear any unconfigured MCR's. - * This way we are sure that the centaur_mcr array contains the actual - * values. The disadvantage is that any BIOS tweaks are thus undone. - * - */ - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - } - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ -} - -/* - * Initialise Winchip series MCR registers - */ - -static void __init -centaur_mcr_init(void) -{ - struct set_mtrr_context ctxt; - - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - if (boot_cpu_data.x86_model == 4) - centaur_mcr0_init(); - else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) - centaur_mcr1_init(); - - set_mtrr_done(&ctxt); -} -#endif - -static int centaur_validate_add_page(unsigned long base, - unsigned long size, unsigned int type) +static int +centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type) { /* - * FIXME: Winchip2 supports uncached + * FIXME: Winchip2 supports uncached */ - if (type != MTRR_TYPE_WRCOMB && + if (type != MTRR_TYPE_WRCOMB && (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - printk(KERN_WARNING - "mtrr: only write-combining%s supported\n", - centaur_mcr_type ? " and uncacheable are" - : " is"); + pr_warning("mtrr: only write-combining%s supported\n", + centaur_mcr_type ? " and uncacheable are" : " is"); return -EINVAL; } return 0; @@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base, static struct mtrr_ops centaur_mtrr_ops = { .vendor = X86_VENDOR_CENTAUR, -// .init = centaur_mcr_init, .set = centaur_set_mcr, .get = centaur_get_mcr, .get_free_region = centaur_get_free_region, @@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void) set_mtrr_ops(¢aur_mtrr_ops); return 0; } - -//arch_initcall(centaur_init_mtrr); -- cgit v1.1 From 63f9600fadb10ea739108ae93e3e842d9843c58b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:51:32 +0530 Subject: x86: Clean up mtrr/cleanup.c Fix trivial style problems: WARNING: Use #include instead of WARNING: Use #include instead of Also, nr_mtrr_spare_reg should be unsigned long. arch/x86/kernel/cpu/mtrr/cleanup.o: text data bss dec hex filename 6241 8992 2056 17289 4389 cleanup.o.before 6241 8992 2056 17289 4389 cleanup.o.after The md5 has changed: 1a7a27513aef1825236daf29110fe657 cleanup.o.before.asm bcea358efa2532b6020e338e158447af cleanup.o.after.asm Because a WARN_ON()'s __LINE__ value changed by 3 lines. Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ Did lots of other cleanups to make the code look more consistent. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 350 +++++++++++++++++++------------------ 1 file changed, 176 insertions(+), 174 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 1d584a1..b8aba81 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -1,51 +1,52 @@ -/* MTRR (Memory Type Range Register) cleanup - - Copyright (C) 2009 Yinghai Lu - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - +/* + * MTRR (Memory Type Range Register) cleanup + * + * Copyright (C) 2009 Yinghai Lu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ #include #include #include #include #include -#include #include +#include +#include +#include +#include #include #include -#include -#include #include -#include + #include "mtrr.h" -/* should be related to MTRR_VAR_RANGES nums */ +/* Should be related to MTRR_VAR_RANGES nums */ #define RANGE_NUM 256 struct res_range { - unsigned long start; - unsigned long end; + unsigned long start; + unsigned long end; }; static int __init -add_range(struct res_range *range, int nr_range, unsigned long start, - unsigned long end) +add_range(struct res_range *range, int nr_range, + unsigned long start, unsigned long end) { - /* out of slots */ + /* Out of slots: */ if (nr_range >= RANGE_NUM) return nr_range; @@ -58,12 +59,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start, } static int __init -add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, - unsigned long end) +add_range_with_merge(struct res_range *range, int nr_range, + unsigned long start, unsigned long end) { int i; - /* try to merge it with old one */ + /* Try to merge it with old one: */ for (i = 0; i < nr_range; i++) { unsigned long final_start, final_end; unsigned long common_start, common_end; @@ -84,7 +85,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, return nr_range; } - /* need to add that */ + /* Need to add it: */ return add_range(range, nr_range, start, end); } @@ -117,7 +118,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end) } if (start > range[j].start && end < range[j].end) { - /* find the new spare */ + /* Find the new spare: */ for (i = 0; i < RANGE_NUM; i++) { if (range[i].end == 0) break; @@ -147,13 +148,19 @@ static int __init cmp_range(const void *x1, const void *x2) } struct var_mtrr_range_state { - unsigned long base_pfn; - unsigned long size_pfn; - mtrr_type type; + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; }; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; + static int __initdata debug_print; +#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) + + +#define BIOS_BUG_MSG KERN_WARNING \ + "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, @@ -180,7 +187,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, range[i].start, range[i].end + 1); } - /* take out UC ranges */ + /* Take out UC ranges: */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; if (type != MTRR_TYPE_UNCACHABLE && @@ -244,10 +251,9 @@ static int __initdata nr_range; static unsigned long __init sum_ranges(struct res_range *range, int nr_range) { - unsigned long sum; + unsigned long sum = 0; int i; - sum = 0; for (i = 0; i < nr_range; i++) sum += range[i].end + 1 - range[i].start; @@ -288,7 +294,7 @@ struct var_mtrr_state { static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned int address_bits) + unsigned char type, unsigned int address_bits) { u32 base_lo, base_hi, mask_lo, mask_hi; u64 base, mask; @@ -301,7 +307,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, mask = (1ULL << address_bits) - 1; mask &= ~((((u64)sizek) << 10) - 1); - base = ((u64)basek) << 10; + base = ((u64)basek) << 10; base |= type; mask |= 0x800; @@ -317,15 +323,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, static void __init save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type) + unsigned char type) { range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); range_state[reg].type = type; } -static void __init -set_var_mtrr_all(unsigned int address_bits) +static void __init set_var_mtrr_all(unsigned int address_bits) { unsigned long basek, sizek; unsigned char type; @@ -342,11 +347,11 @@ set_var_mtrr_all(unsigned int address_bits) static unsigned long to_size_factor(unsigned long sizek, char *factorp) { - char factor; unsigned long base = sizek; + char factor; if (base & ((1<<10) - 1)) { - /* not MB alignment */ + /* Not MB-aligned: */ factor = 'K'; } else if (base & ((1<<20) - 1)) { factor = 'M'; @@ -372,11 +377,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long max_align, align; unsigned long sizek; - /* Compute the maximum size I can make a range */ + /* Compute the maximum size with which we can make a range: */ if (range_startk) max_align = ffs(range_startk) - 1; else max_align = 32; + align = fls(range_sizek) - 1; if (align > max_align) align = max_align; @@ -386,11 +392,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, char start_factor = 'K', size_factor = 'K'; unsigned long start_base, size_base; - start_base = to_size_factor(range_startk, - &start_factor), - size_base = to_size_factor(sizek, &size_factor), + start_base = to_size_factor(range_startk, &start_factor); + size_base = to_size_factor(sizek, &size_factor); - printk(KERN_DEBUG "Setting variable MTRR %d, " + Dprintk("Setting variable MTRR %d, " "base: %ld%cB, range: %ld%cB, type %s\n", reg, start_base, start_factor, size_base, size_factor, @@ -425,10 +430,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, chunk_sizek = state->chunk_sizek; gran_sizek = state->gran_sizek; - /* align with gran size, prevent small block used up MTRRs */ + /* Align with gran size, prevent small block used up MTRRs: */ range_basek = ALIGN(state->range_startk, gran_sizek); if ((range_basek > basek) && basek) return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); range_sizek = ALIGN(state->range_sizek, gran_sizek); @@ -439,22 +445,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, } state->range_sizek = range_sizek; - /* try to append some small hole */ + /* Try to append some small hole: */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); - /* no increase */ + /* No increase: */ if (range0_sizek == state->range_sizek) { - if (debug_print) - printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + state->range_sizek)<<10); + Dprintk("rangeX: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + state->range_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, state->range_sizek, MTRR_TYPE_WRBACK); return 0; } - /* only cut back, when it is not the last */ + /* Only cut back when it is not the last: */ if (sizek) { while (range0_basek + range0_sizek > (basek + sizek)) { if (range0_sizek >= chunk_sizek) @@ -470,16 +475,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, second_try: range_basek = range0_basek + range0_sizek; - /* one hole in the middle */ + /* One hole in the middle: */ if (range_basek > basek && range_basek <= (basek + sizek)) second_sizek = range_basek - basek; if (range0_sizek > state->range_sizek) { - /* one hole in middle or at end */ + /* One hole in middle or at the end: */ hole_sizek = range0_sizek - state->range_sizek - second_sizek; - /* hole size should be less than half of range0 size */ + /* Hole size should be less than half of range0 size: */ if (hole_sizek >= (range0_sizek >> 1) && range0_sizek >= chunk_sizek) { range0_sizek -= chunk_sizek; @@ -491,32 +496,30 @@ second_try: } if (range0_sizek) { - if (debug_print) - printk(KERN_DEBUG "range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); + Dprintk("range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, range0_sizek, MTRR_TYPE_WRBACK); } if (range0_sizek < state->range_sizek) { - /* need to handle left over */ + /* Need to handle left over range: */ range_sizek = state->range_sizek - range0_sizek; - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); + Dprintk("range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, MTRR_TYPE_WRBACK); } if (hole_sizek) { hole_basek = range_basek - hole_sizek - second_sizek; - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); + Dprintk("hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, MTRR_TYPE_UNCACHABLE); } @@ -537,23 +540,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, basek = base_pfn << (PAGE_SHIFT - 10); sizek = size_pfn << (PAGE_SHIFT - 10); - /* See if I can merge with the last range */ + /* See if I can merge with the last range: */ if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) { unsigned long endk = basek + sizek; state->range_sizek = endk - state->range_startk; return; } - /* Write the range mtrrs */ + /* Write the range mtrrs: */ if (state->range_sizek != 0) second_sizek = range_to_mtrr_with_hole(state, basek, sizek); - /* Allocate an msr */ + /* Allocate an msr: */ state->range_startk = basek + second_sizek; state->range_sizek = sizek - second_sizek; } -/* mininum size of mtrr block that can take hole */ +/* Mininum size of mtrr block that can take hole: */ static u64 mtrr_chunk_size __initdata = (256ULL<<20); static int __init parse_mtrr_chunk_size_opt(char *p) @@ -565,7 +568,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) } early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); -/* granity of mtrr of block */ +/* Granularity of mtrr of block: */ static u64 mtrr_gran_size __initdata; static int __init parse_mtrr_gran_size_opt(char *p) @@ -577,7 +580,7 @@ static int __init parse_mtrr_gran_size_opt(char *p) } early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); -static int nr_mtrr_spare_reg __initdata = +static unsigned long nr_mtrr_spare_reg __initdata = CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; static int __init parse_mtrr_spare_reg(char *arg) @@ -586,7 +589,6 @@ static int __init parse_mtrr_spare_reg(char *arg) nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); return 0; } - early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); static int __init @@ -594,8 +596,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, u64 chunk_size, u64 gran_size) { struct var_mtrr_state var_state; - int i; int num_reg; + int i; var_state.range_startk = 0; var_state.range_sizek = 0; @@ -605,17 +607,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, memset(range_state, 0, sizeof(range_state)); - /* Write the range etc */ - for (i = 0; i < nr_range; i++) + /* Write the range: */ + for (i = 0; i < nr_range; i++) { set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1); + } - /* Write the last range */ + /* Write the last range: */ if (var_state.range_sizek != 0) range_to_mtrr_with_hole(&var_state, 0, 0); num_reg = var_state.reg; - /* Clear out the extra MTRR's */ + /* Clear out the extra MTRR's: */ while (var_state.reg < num_var_ranges) { save_var_mtrr(var_state.reg, 0, 0, 0); var_state.reg++; @@ -625,11 +628,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, } struct mtrr_cleanup_result { - unsigned long gran_sizek; - unsigned long chunk_sizek; - unsigned long lose_cover_sizek; - unsigned int num_reg; - int bad; + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; }; /* @@ -645,10 +648,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static void __init print_out_mtrr_range_state(void) { - int i; char start_factor = 'K', size_factor = 'K'; unsigned long start_base, size_base; mtrr_type type; + int i; for (i = 0; i < num_var_ranges; i++) { @@ -676,10 +679,10 @@ static int __init mtrr_need_cleanup(void) int i; mtrr_type type; unsigned long size; - /* extra one for all 0 */ + /* Extra one for all 0: */ int num[MTRR_NUM_TYPES + 1]; - /* check entries number */ + /* Check entries number: */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; @@ -693,88 +696,86 @@ static int __init mtrr_need_cleanup(void) num[type]++; } - /* check if we got UC entries */ + /* Check if we got UC entries: */ if (!num[MTRR_TYPE_UNCACHABLE]) return 0; - /* check if we only had WB and UC */ + /* Check if we only had WB and UC */ if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != - num_var_ranges - num[MTRR_NUM_TYPES]) + num_var_ranges - num[MTRR_NUM_TYPES]) return 0; return 1; } static unsigned long __initdata range_sums; -static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, - unsigned long extra_remove_base, - unsigned long extra_remove_size, - int i) + +static void __init +mtrr_calc_range_state(u64 chunk_size, u64 gran_size, + unsigned long x_remove_base, + unsigned long x_remove_size, int i) { - int num_reg; static struct res_range range_new[RANGE_NUM]; - static int nr_range_new; unsigned long range_sums_new; + static int nr_range_new; + int num_reg; - /* convert ranges to var ranges state */ - num_reg = x86_setup_var_mtrrs(range, nr_range, - chunk_size, gran_size); + /* Convert ranges to var ranges state: */ + num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - /* we got new setting in range_state, check it */ + /* We got new setting in range_state, check it: */ memset(range_new, 0, sizeof(range_new)); nr_range_new = x86_get_mtrr_mem_range(range_new, 0, - extra_remove_base, extra_remove_size); + x_remove_base, x_remove_size); range_sums_new = sum_ranges(range_new, nr_range_new); result[i].chunk_sizek = chunk_size >> 10; result[i].gran_sizek = gran_size >> 10; result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { - result[i].lose_cover_sizek = - (range_sums_new - range_sums) << PSHIFT; + result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; result[i].bad = 1; - } else - result[i].lose_cover_sizek = - (range_sums - range_sums_new) << PSHIFT; + } else { + result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; + } - /* double check it */ + /* Double check it: */ if (!result[i].bad && !result[i].lose_cover_sizek) { - if (nr_range_new != nr_range || - memcmp(range, range_new, sizeof(range))) - result[i].bad = 1; + if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; } - if (!result[i].bad && (range_sums - range_sums_new < - min_loss_pfn[num_reg])) { - min_loss_pfn[num_reg] = - range_sums - range_sums_new; - } + if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) + min_loss_pfn[num_reg] = range_sums - range_sums_new; } static void __init mtrr_print_out_one_result(int i) { - char gran_factor, chunk_factor, lose_factor; unsigned long gran_base, chunk_base, lose_base; + char gran_factor, chunk_factor, lose_factor; gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), - printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", - result[i].bad ? "*BAD*" : " ", - gran_base, gran_factor, chunk_base, chunk_factor); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", - result[i].num_reg, result[i].bad ? "-" : "", - lose_base, lose_factor); + + pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad ? "*BAD*" : " ", + gran_base, gran_factor, chunk_base, chunk_factor); + pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad ? "-" : "", + lose_base, lose_factor); } static int __init mtrr_search_optimal_index(void) { - int i; int num_reg_good; int index_good; + int i; if (nr_mtrr_spare_reg >= num_var_ranges) nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { if (!min_loss_pfn[i]) @@ -796,24 +797,24 @@ static int __init mtrr_search_optimal_index(void) return index_good; } - int __init mtrr_cleanup(unsigned address_bits) { - unsigned long extra_remove_base, extra_remove_size; + unsigned long x_remove_base, x_remove_size; unsigned long base, size, def, dummy; - mtrr_type type; u64 chunk_size, gran_size; + mtrr_type type; int index_good; int i; if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) return 0; + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; - /* get it and store it aside */ + /* Get it and store it aside: */ memset(range_state, 0, sizeof(range_state)); for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); @@ -822,29 +823,28 @@ int __init mtrr_cleanup(unsigned address_bits) range_state[i].type = type; } - /* check if we need handle it and can handle it */ + /* Check if we need handle it and can handle it: */ if (!mtrr_need_cleanup()) return 0; - /* print original var MTRRs at first, for debugging: */ + /* Print original var MTRRs at first, for debugging: */ printk(KERN_DEBUG "original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); - extra_remove_size = 0; - extra_remove_base = 1 << (32 - PAGE_SHIFT); + x_remove_size = 0; + x_remove_base = 1 << (32 - PAGE_SHIFT); if (mtrr_tom2) - extra_remove_size = - (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, - extra_remove_size); + x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; + + nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); /* - * [0, 1M) should always be coverred by var mtrr with WB - * and fixed mtrrs should take effective before var mtrr for it + * [0, 1M) should always be covered by var mtrr with WB + * and fixed mtrrs should take effect before var mtrr for it: */ nr_range = add_range_with_merge(range, nr_range, 0, (1ULL<<(20 - PAGE_SHIFT)) - 1); - /* sort the ranges */ + /* Sort the ranges: */ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); range_sums = sum_ranges(range, nr_range); @@ -854,7 +854,7 @@ int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { i = 0; mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, - extra_remove_base, extra_remove_size, i); + x_remove_base, x_remove_size, i); mtrr_print_out_one_result(i); @@ -880,7 +880,7 @@ int __init mtrr_cleanup(unsigned address_bits) continue; mtrr_calc_range_state(chunk_size, gran_size, - extra_remove_base, extra_remove_size, i); + x_remove_base, x_remove_size, i); if (debug_print) { mtrr_print_out_one_result(i); printk(KERN_INFO "\n"); @@ -890,7 +890,7 @@ int __init mtrr_cleanup(unsigned address_bits) } } - /* try to find the optimal index */ + /* Try to find the optimal index: */ index_good = mtrr_search_optimal_index(); if (index_good != -1) { @@ -898,7 +898,7 @@ int __init mtrr_cleanup(unsigned address_bits) i = index_good; mtrr_print_out_one_result(i); - /* convert ranges to var ranges state */ + /* Convert ranges to var ranges state: */ chunk_size = result[i].chunk_sizek; chunk_size <<= 10; gran_size = result[i].gran_sizek; @@ -941,8 +941,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup); * Note this won't check if the MTRRs < 4GB where the magic bit doesn't * apply to are wrong, but so far we don't know of any such case in the wild. */ -#define Tom2Enabled (1U << 21) -#define Tom2ForceMemTypeWB (1U << 22) +#define Tom2Enabled (1U << 21) +#define Tom2ForceMemTypeWB (1U << 22) int __init amd_special_default_mtrr(void) { @@ -952,7 +952,7 @@ int __init amd_special_default_mtrr(void) return 0; if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) return 0; - /* In case some hypervisor doesn't pass SYSCFG through */ + /* In case some hypervisor doesn't pass SYSCFG through: */ if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) return 0; /* @@ -965,19 +965,21 @@ int __init amd_special_default_mtrr(void) return 0; } -static u64 __init real_trim_memory(unsigned long start_pfn, - unsigned long limit_pfn) +static u64 __init +real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) { u64 trim_start, trim_size; + trim_start = start_pfn; trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; trim_size <<= PAGE_SHIFT; trim_size -= trim_start; - return e820_update_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); + return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); } + /** * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs * @end_pfn: ending page frame number @@ -985,7 +987,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn, * Some buggy BIOSes don't setup the MTRRs properly for systems with certain * memory configurations. This routine checks that the highest MTRR matches * the end of memory, to make sure the MTRRs having a write back type cover - * all of the memory the kernel is intending to use. If not, it'll trim any + * all of the memory the kernel is intending to use. If not, it'll trim any * memory off the end by adjusting end_pfn, removing it from the kernel's * allocation pools, warning the user with an obnoxious message. */ @@ -994,21 +996,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; u64 total_trim_size; - /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; + /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: */ if (!is_cpu(INTEL) || disable_mtrr_trim) return 0; + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; - /* get it and store it aside */ + /* Get it and store it aside: */ memset(range_state, 0, sizeof(range_state)); for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); @@ -1017,7 +1020,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) range_state[i].type = type; } - /* Find highest cached pfn */ + /* Find highest cached pfn: */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; if (type != MTRR_TYPE_WRBACK) @@ -1028,13 +1031,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) highest_pfn = base + size; } - /* kvm/qemu doesn't have mtrr set right, don't trim them all */ + /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ if (!highest_pfn) { printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); return 0; } - /* check entries number */ + /* Check entries number: */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; @@ -1046,11 +1049,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) num[type]++; } - /* no entry for WB? */ + /* No entry for WB? */ if (!num[MTRR_TYPE_WRBACK]) return 0; - /* check if we only had WB and UC */ + /* Check if we only had WB and UC: */ if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != num_var_ranges - num[MTRR_NUM_TYPES]) return 0; @@ -1066,31 +1069,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) } nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + /* Check the head: */ total_trim_size = 0; - /* check the head */ if (range[0].start) total_trim_size += real_trim_memory(0, range[0].start); - /* check the holes */ + + /* Check the holes: */ for (i = 0; i < nr_range - 1; i++) { if (range[i].end + 1 < range[i+1].start) total_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); } - /* check the top */ + + /* Check the top: */ i = nr_range - 1; if (range[i].end + 1 < end_pfn) total_trim_size += real_trim_memory(range[i].end + 1, end_pfn); if (total_trim_size) { - printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %lluMB of RAM.\n", - total_trim_size >> 20); + pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); if (!changed_by_mtrr_cleanup) WARN_ON(1); - printk(KERN_INFO "update e820 for mtrr\n"); + pr_info("update e820 for mtrr\n"); update_e820(); return 1; @@ -1098,4 +1101,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) return 0; } - -- cgit v1.1 From 2311037708c170977506fbcbe0a2ba0c6d221940 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:52:08 +0530 Subject: x86: Clean up mtrr/cyrix.c Fix trivial style problems: WARNING: Use #include instead of WARNING: line over 80 characters ERROR: do not initialise statics to 0 or NULL ERROR: space prohibited after that open parenthesis '(' X 2 ERROR: space prohibited before that close parenthesis ')' X 2 ERROR: trailing whitespace X 2 ERROR: trailing statements should be on next line ERROR: do not use C99 // comments X 2 arch/x86/kernel/cpu/mtrr/cyrix.o: text data bss dec hex filename 1637 32 8 1677 68d cyrix.o.before 1637 32 8 1677 68d cyrix.o.after md5: 6f52abd06905be3f4cabb5239f9b0ff0 cyrix.o.before.asm 6f52abd06905be3f4cabb5239f9b0ff0 cyrix.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ Made the code more consistent ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cyrix.c | 94 ++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index ff14c32..228d982 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -1,38 +1,40 @@ #include +#include #include -#include -#include -#include + #include #include +#include +#include + #include "mtrr.h" static void cyrix_get_arr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type * type) { - unsigned long flags; unsigned char arr, ccr3, rcr, shift; + unsigned long flags; arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - /* Save flags and disable interrupts */ local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ((unsigned char *) base)[3] = getCx86(arr); - ((unsigned char *) base)[2] = getCx86(arr + 1); - ((unsigned char *) base)[1] = getCx86(arr + 2); + ((unsigned char *)base)[3] = getCx86(arr); + ((unsigned char *)base)[2] = getCx86(arr + 1); + ((unsigned char *)base)[1] = getCx86(arr + 2); rcr = getCx86(CX86_RCR_BASE + reg); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - /* Enable interrupts if it was enabled previously */ local_irq_restore(flags); + shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; - /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 + /* + * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 * Note: shift==0xf means 4G, this is unsupported. */ if (shift) @@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, } } +/* + * cyrix_get_free_region - get a free ARR. + * + * @base: the starting (base) address of the region. + * @size: the size (in bytes) of the region. + * + * Returns: the index of the region on success, else -1 on error. +*/ static int cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free ARR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ { - int i; - mtrr_type ltype; unsigned long lbase, lsize; + mtrr_type ltype; + int i; switch (replace_reg) { case 7: @@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) cyrix_get_arr(7, &lbase, &lsize, <ype); if (lsize == 0) return 7; - /* Else try ARR0-ARR6 first */ + /* Else try ARR0-ARR6 first */ } else { for (i = 0; i < 7; i++) { cyrix_get_arr(i, &lbase, &lsize, <ype); if (lsize == 0) return i; } - /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ + /* + * ARR0-ARR6 isn't free + * try ARR7 but its size must be at least 256K + */ cyrix_get_arr(i, &lbase, &lsize, <ype); if ((lsize == 0) && (size >= 0x40)) return i; @@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) return -ENOSPC; } -static u32 cr4 = 0; -static u32 ccr3; +static u32 cr4, ccr3; static void prepare_set(void) { u32 cr0; /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { + if (cpu_has_pge) { cr4 = read_cr4(); write_cr4(cr4 & ~X86_CR4_PGE); } - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ + /* + * Disable and flush caches. + * Note that wbinvd flushes the TLBs as a side-effect + */ cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); @@ -147,22 +156,21 @@ static void prepare_set(void) /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); - } static void post_set(void) { - /* Flush caches and TLBs */ + /* Flush caches and TLBs */ wbinvd(); /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, ccr3); - - /* Enable caches */ + + /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); - /* Restore value of CR4 */ - if ( cpu_has_pge ) + /* Restore value of CR4 */ + if (cpu_has_pge) write_cr4(cr4); } @@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, size >>= 6; size &= 0x7fff; /* make sure arr_size <= 14 */ - for (arr_size = 0; size; arr_size++, size >>= 1) ; + for (arr_size = 0; size; arr_size++, size >>= 1) + ; if (reg < 7) { switch (type) { @@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, prepare_set(); base <<= PAGE_SHIFT; - setCx86(arr, ((unsigned char *) &base)[3]); - setCx86(arr + 1, ((unsigned char *) &base)[2]); - setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); + setCx86(arr + 0, ((unsigned char *)&base)[3]); + setCx86(arr + 1, ((unsigned char *)&base)[2]); + setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size); setCx86(CX86_RCR_BASE + reg, arr_type); post_set(); } typedef struct { - unsigned long base; - unsigned long size; - mtrr_type type; + unsigned long base; + unsigned long size; + mtrr_type type; } arr_state_t; static arr_state_t arr_state[8] = { @@ -247,16 +256,17 @@ static void cyrix_set_all(void) setCx86(CX86_CCR0 + i, ccr_state[i]); for (; i < 7; i++) setCx86(CX86_CCR4 + i, ccr_state[i]); - for (i = 0; i < 8; i++) - cyrix_set_arr(i, arr_state[i].base, + + for (i = 0; i < 8; i++) { + cyrix_set_arr(i, arr_state[i].base, arr_state[i].size, arr_state[i].type); + } post_set(); } static struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, -// .init = cyrix_arr_init, .set_all = cyrix_set_all, .set = cyrix_set_arr, .get = cyrix_get_arr, @@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void) set_mtrr_ops(&cyrix_mtrr_ops); return 0; } - -//arch_initcall(cyrix_init_mtrr); -- cgit v1.1 From a1a499a39911fcfecbebaba1f38588088909f918 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:53:00 +0530 Subject: x86: Clean up mtrr/generic.c Fix following trivial style problems: ERROR: trailing whitespace X 4 WARNING: Use #include instead of WARNING: braces {} are not necessary for single statement blocks X 3 ERROR: "foo * bar" should be "foo *bar" WARNING: line over 80 characters X 6 ERROR: "foo * bar" should be "foo *bar" ERROR: spaces required around that '=' (ctx:VxO) ERROR: space required before that '-' (ctx:OxV) WARNING: suspect code indent for conditional statements (8, 12) ERROR: spaces required around that '=' (ctx:VxV) ERROR: do not initialise statics to 0 or NULL ERROR: space prohibited after that open parenthesis '(' X 2 ERROR: space prohibited before that close parenthesis ')' X 2 ERROR: trailing statements should be on next line ERROR: return is not a function, parentheses are not required Also use pr_debug and pr_warning where possible. arch/x86/kernel/cpu/mtrr/generic.o: text data bss dec hex filename 5652 77 4224 9953 26e1 generic.o.before 5652 77 4220 9949 26dd generic.o.after The md5 changed: b34d6c045f06daa4ed092b90cc760e8f generic.o.before.asm a490c6251cfd8442fbffecc0e09a573d generic.o.after.asm Because mtrr_state moved from data to bss, changing its offsets - and also because __LINE__ numbers changed. Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ Further cleanups to make the code more consistent ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 304 +++++++++++++++++++++---------------- 1 file changed, 169 insertions(+), 135 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0543f69..55da0c5 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -1,28 +1,34 @@ -/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong - because MTRRs can span upto 40 bits (36bits on most modern x86) */ +/* + * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong + * because MTRRs can span upto 40 bits (36bits on most modern x86) + */ +#define DEBUG + +#include #include #include +#include #include -#include -#include -#include -#include -#include -#include + #include +#include #include +#include +#include +#include #include + #include "mtrr.h" struct fixed_range_block { - int base_msr; /* start address of an MTRR block */ - int ranges; /* number of MTRRs in this block */ + int base_msr; /* start address of an MTRR block */ + int ranges; /* number of MTRRs in this block */ }; static struct fixed_range_block fixed_range_blocks[] = { - { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ - { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ - { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ + { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ + { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ + { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ {} }; @@ -30,10 +36,10 @@ static unsigned long smp_changes_mask; static int mtrr_state_set; u64 mtrr_tom2; -struct mtrr_state_type mtrr_state = {}; +struct mtrr_state_type mtrr_state; EXPORT_SYMBOL_GPL(mtrr_state); -/** +/* * BIOS is expected to clear MtrrFixDramModEn bit, see for example * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" (26094 Rev. 3.30 February 2006), section @@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - if (!(mtrr_state.enabled & 2)) { + if (!(mtrr_state.enabled & 2)) return mtrr_state.def_type; - } prev_match = 0xFF; for (i = 0; i < num_var_ranges; ++i) { @@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) if (start_state != end_state) return 0xFE; - if ((start & mask) != (base & mask)) { + if ((start & mask) != (base & mask)) continue; - } curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; if (prev_match == 0xFF) { @@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) curr_match = MTRR_TYPE_WRTHROUGH; } - if (prev_match != curr_match) { + if (prev_match != curr_match) return MTRR_TYPE_UNCACHABLE; - } } if (mtrr_tom2) { @@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end) return mtrr_state.def_type; } -/* Get the MSR pair relating to a var range */ +/* Get the MSR pair relating to a var range */ static void get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) { @@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); } -/* fill the MSR pair relating to a var range */ +/* Fill the MSR pair relating to a var range */ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) { @@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index, vr[index].mask_hi = mask_hi; } -static void -get_fixed_ranges(mtrr_type * frs) +static void get_fixed_ranges(mtrr_type *frs) { - unsigned int *p = (unsigned int *) frs; + unsigned int *p = (unsigned int *)frs; int i; k8_check_syscfg_dram_mod_en(); @@ -217,22 +219,22 @@ static void __init print_fixed_last(void) if (!last_fixed_end) return; - printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, - last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); + pr_debug(" %05X-%05X %s\n", last_fixed_start, + last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); last_fixed_end = 0; } static void __init update_fixed_last(unsigned base, unsigned end, - mtrr_type type) + mtrr_type type) { last_fixed_start = base; last_fixed_end = end; last_fixed_type = type; } -static void __init print_fixed(unsigned base, unsigned step, - const mtrr_type *types) +static void __init +print_fixed(unsigned base, unsigned step, const mtrr_type *types) { unsigned i; @@ -259,54 +261,55 @@ static void __init print_mtrr_state(void) unsigned int i; int high_width; - printk(KERN_DEBUG "MTRR default type: %s\n", - mtrr_attrib_to_str(mtrr_state.def_type)); + pr_debug("MTRR default type: %s\n", + mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { - printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); + pr_debug("MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) - print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + print_fixed(0x80000 + i * 0x20000, 0x04000, + mtrr_state.fixed_ranges + (i + 1) * 8); for (i = 0; i < 8; ++i) - print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + print_fixed(0xC0000 + i * 0x08000, 0x01000, + mtrr_state.fixed_ranges + (i + 3) * 8); /* tail */ print_fixed_last(); } - printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); + pr_debug("MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); if (size_or_mask & 0xffffffffUL) high_width = ffs(size_or_mask & 0xffffffffUL) - 1; else high_width = ffs(size_or_mask>>32) + 32 - 1; high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); else - printk(KERN_DEBUG " %u disabled\n", i); - } - if (mtrr_tom2) { - printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n", - mtrr_tom2, mtrr_tom2>>20); + pr_debug(" %u disabled\n", i); } + if (mtrr_tom2) + pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } -/* Grab all of the MTRR state for this CPU into *state */ +/* Grab all of the MTRR state for this CPU into *state */ void __init get_mtrr_state(void) { - unsigned int i; struct mtrr_var_range *vrs; - unsigned lo, dummy; unsigned long flags; + unsigned lo, dummy; + unsigned int i; vrs = mtrr_state.var_ranges; @@ -324,6 +327,7 @@ void __init get_mtrr_state(void) if (amd_special_default_mtrr()) { unsigned low, high; + /* TOP_MEM2 */ rdmsr(MSR_K8_TOP_MEM2, low, high); mtrr_tom2 = high; @@ -344,10 +348,9 @@ void __init get_mtrr_state(void) post_set(); local_irq_restore(flags); - } -/* Some BIOS's are fucked and don't set all MTRRs the same! */ +/* Some BIOS's are messed up and don't set all MTRRs the same! */ void __init mtrr_state_warn(void) { unsigned long mask = smp_changes_mask; @@ -355,28 +358,33 @@ void __init mtrr_state_warn(void) if (!mask) return; if (mask & MTRR_CHANGE_MASK_FIXED) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_VARIABLE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); + pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_DEFTYPE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); printk(KERN_INFO "mtrr: corrected configuration.\n"); } -/* Doesn't attempt to pass an error out to MTRR users - because it's quite complicated in some cases and probably not - worth it because the best error handling is to ignore it. */ +/* + * Doesn't attempt to pass an error out to MTRR users + * because it's quite complicated in some cases and probably not + * worth it because the best error handling is to ignore it. + */ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) { - if (wrmsr_safe(msr, a, b) < 0) + if (wrmsr_safe(msr, a, b) < 0) { printk(KERN_ERR "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", smp_processor_id(), msr, a, b); + } } /** - * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have + * set_fixed_range - checks & updates a fixed-range MTRR if it + * differs from the value it should have * @msr: MSR address of the MTTR which should be checked and updated * @changed: pointer which indicates whether the MTRR needed to be changed * @msrwords: pointer to the MSR values which the MSR should have @@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) * * Returns: The index of the region on success, else negative on error. */ -int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) +int +generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) { - int i, max; - mtrr_type ltype; unsigned long lbase, lsize; + mtrr_type ltype; + int i, max; max = num_var_ranges; if (replace_reg >= 0 && replace_reg < max) return replace_reg; + for (i = 0; i < max; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (lsize == 0) return i; } + return -ENOSPC; } @@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { - /* Invalid (i.e. free) range */ + /* Invalid (i.e. free) range */ *base = 0; *size = 0; *type = 0; @@ -471,27 +482,31 @@ out_put_cpu: } /** - * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set + * set_fixed_ranges - checks & updates the fixed-range MTRRs if they + * differ from the saved set * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() */ -static int set_fixed_ranges(mtrr_type * frs) +static int set_fixed_ranges(mtrr_type *frs) { - unsigned long long *saved = (unsigned long long *) frs; + unsigned long long *saved = (unsigned long long *)frs; bool changed = false; - int block=-1, range; + int block = -1, range; k8_check_syscfg_dram_mod_en(); - while (fixed_range_blocks[++block].ranges) - for (range=0; range < fixed_range_blocks[block].ranges; range++) - set_fixed_range(fixed_range_blocks[block].base_msr + range, - &changed, (unsigned int *) saved++); + while (fixed_range_blocks[++block].ranges) { + for (range = 0; range < fixed_range_blocks[block].ranges; range++) + set_fixed_range(fixed_range_blocks[block].base_msr + range, + &changed, (unsigned int *)saved++); + } return changed; } -/* Set the MSR pair relating to a var range. Returns TRUE if - changes are made */ +/* + * Set the MSR pair relating to a var range. + * Returns true if changes are made. + */ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) { unsigned int lo, hi; @@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); changed = true; } @@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi; */ static unsigned long set_mtrr_state(void) { - unsigned int i; unsigned long change_mask = 0; + unsigned int i; - for (i = 0; i < num_var_ranges; i++) + for (i = 0; i < num_var_ranges; i++) { if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) change_mask |= MTRR_CHANGE_MASK_VARIABLE; + } if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) change_mask |= MTRR_CHANGE_MASK_FIXED; - /* Set_mtrr_restore restores the old value of MTRRdefType, - so to set it we fiddle with the saved value */ + /* + * Set_mtrr_restore restores the old value of MTRRdefType, + * so to set it we fiddle with the saved value: + */ if ((deftype_lo & 0xff) != mtrr_state.def_type || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); + + deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | + (mtrr_state.enabled << 10); change_mask |= MTRR_CHANGE_MASK_DEFTYPE; } @@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void) } -static unsigned long cr4 = 0; +static unsigned long cr4; static DEFINE_SPINLOCK(set_atomicity_lock); /* - * Since we are disabling the cache don't allow any interrupts - they - * would run extremely slow and would only increase the pain. The caller must - * ensure that local interrupts are disabled and are reenabled after post_set() - * has been called. + * Since we are disabling the cache don't allow any interrupts, + * they would run extremely slow and would only increase the pain. + * + * The caller must ensure that local interrupts are disabled and + * are reenabled after post_set() has been called. */ - static void prepare_set(void) __acquires(set_atomicity_lock) { unsigned long cr0; - /* Note that this is not ideal, since the cache is only flushed/disabled - for this CPU while the MTRRs are changed, but changing this requires - more invasive changes to the way the kernel boots */ + /* + * Note that this is not ideal + * since the cache is only flushed/disabled for this CPU while the + * MTRRs are changed, but changing this requires more invasive + * changes to the way the kernel boots + */ spin_lock(&set_atomicity_lock); - /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ + /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); wbinvd(); - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if (cpu_has_pge) { cr4 = read_cr4(); write_cr4(cr4 & ~X86_CR4_PGE); } @@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ __flush_tlb(); - /* Save MTRR state */ + /* Save MTRR state */ rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); - /* Disable MTRRs, and set the default type to uncached */ + /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); } static void post_set(void) __releases(set_atomicity_lock) { - /* Flush TLBs (no need to flush caches - they are disabled) */ + /* Flush TLBs (no need to flush caches - they are disabled) */ __flush_tlb(); /* Intel (P6) standard MTRRs */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); - - /* Enable caches */ + + /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); - /* Restore value of CR4 */ - if ( cpu_has_pge ) + /* Restore value of CR4 */ + if (cpu_has_pge) write_cr4(cr4); spin_unlock(&set_atomicity_lock); } @@ -623,24 +647,27 @@ static void generic_set_all(void) post_set(); local_irq_restore(flags); - /* Use the atomic bitops to update the global mask */ + /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { if (mask & 0x01) set_bit(count, &smp_changes_mask); mask >>= 1; } - + } +/** + * generic_set_mtrr - set variable MTRR register on the local CPU. + * + * @reg: The register to set. + * @base: The base address of the region. + * @size: The size of the region. If this is 0 the region is disabled. + * @type: The type of the region. + * + * Returns nothing. + */ static void generic_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - [RETURNS] Nothing. -*/ { unsigned long flags; struct mtrr_var_range *vr; @@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, prepare_set(); if (size == 0) { - /* The invalid bit is kept in the mask, so we simply clear the - relevant mask register to disable a range. */ + /* + * The invalid bit is kept in the mask, so we simply + * clear the relevant mask register to disable a range. + */ mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); memset(vr, 0, sizeof(struct mtrr_var_range)); } else { @@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, local_irq_restore(flags); } -int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +int generic_validate_add_page(unsigned long base, unsigned long size, + unsigned int type) { unsigned long lbase, last; - /* For Intel PPro stepping <= 7, must be 4 MiB aligned - and not touch 0x70000000->0x7003FFFF */ + /* + * For Intel PPro stepping <= 7 + * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF + */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; } if (!(base + size < 0x70000 || base > 0x7003F) && (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK)) { - printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); return -EINVAL; } } - /* Check upper bits of base and last are equal and lower bits are 0 - for base and 1 for last */ + /* + * Check upper bits of base and last are equal and lower bits are 0 + * for base and 1 for last + */ last = base + size - 1; for (lbase = base; !(lbase & 1) && (last & 1); - lbase = lbase >> 1, last = last >> 1) ; + lbase = lbase >> 1, last = last >> 1) + ; if (lbase != last) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", - base, size); + pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); return -EINVAL; } return 0; } - static int generic_have_wrcomb(void) { unsigned long config, dummy; rdmsr(MSR_MTRRcap, config, dummy); - return (config & (1 << 10)); + return config & (1 << 10); } int positive_have_wrcomb(void) @@ -716,14 +749,15 @@ int positive_have_wrcomb(void) return 1; } -/* generic structure... +/* + * Generic structure... */ struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, - .set_all = generic_set_all, - .get = generic_get_mtrr, - .get_free_region = generic_get_free_region, - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, + .use_intel_if = 1, + .set_all = generic_set_all, + .get = generic_get_mtrr, + .get_free_region = generic_get_free_region, + .set = generic_set_mtrr, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = generic_have_wrcomb, }; -- cgit v1.1 From 26dc67eda19beafb7e5ef2770cec5b3ee5995a8e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:53:40 +0530 Subject: x86: Clean up mtrr/if.c Fix: WARNING: Use #include instead of ERROR: trailing whitespace X 7 ERROR: trailing statements should be on next line X 3 WARNING: line over 80 characters X 5 ERROR: space required before the open parenthesis '(' arch/x86/kernel/cpu/mtrr/if.o: text data bss dec hex filename 2239 4 0 2243 8c3 if.o.before 2239 4 0 2243 8c3 if.o.after md5: 78d1f2aa4843ec6509c18e2dee54bc7f if.o.before.asm 78d1f2aa4843ec6509c18e2dee54bc7f if.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ More cleanups to make the code more consistent. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 135 ++++++++++++++++++++++++------------------ 1 file changed, 76 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index fb73a52..08b6ea4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -1,27 +1,28 @@ -#include -#include #include -#include -#include #include -#include +#include +#include +#include +#include +#include #define LINE_SIZE 80 #include + #include "mtrr.h" #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) static const char *const mtrr_strings[MTRR_NUM_TYPES] = { - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ }; const char *mtrr_attrib_to_str(int x) @@ -35,8 +36,8 @@ static int mtrr_file_add(unsigned long base, unsigned long size, unsigned int type, bool increment, struct file *file, int page) { + unsigned int *fcount = FILE_FCOUNT(file); int reg, max; - unsigned int *fcount = FILE_FCOUNT(file); max = num_var_ranges; if (fcount == NULL) { @@ -61,8 +62,8 @@ static int mtrr_file_del(unsigned long base, unsigned long size, struct file *file, int page) { - int reg; unsigned int *fcount = FILE_FCOUNT(file); + int reg; if (!page) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) @@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size, return reg; } -/* RED-PEN: seq_file can seek now. this is ignored. */ +/* + * seq_file can seek but we ignore it. + * + * Format of control line: + * "base=%Lx size=%Lx type=%s" or "disable=%d" + */ static ssize_t mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) -/* Format of control line: - "base=%Lx size=%Lx type=%s" OR: - "disable=%d" -*/ { int i, err; unsigned long reg; @@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EPERM; if (!len) return -EINVAL; + memset(line, 0, LINE_SIZE); if (len > LINE_SIZE) len = LINE_SIZE; if (copy_from_user(line, buf, len - 1)) return -EFAULT; + linelen = strlen(line); ptr = line + linelen - 1; if (linelen && *ptr == '\n') *ptr = '\0'; + if (!strncmp(line, "disable=", 8)) { reg = simple_strtoul(line + 8, &ptr, 0); err = mtrr_del_page(reg, 0, 0); @@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return err; return len; } + if (strncmp(line, "base=", 5)) return -EINVAL; + base = simple_strtoull(line + 5, &ptr, 0); - for (; isspace(*ptr); ++ptr) ; + for (; isspace(*ptr); ++ptr) + ; + if (strncmp(ptr, "size=", 5)) return -EINVAL; + size = simple_strtoull(ptr + 5, &ptr, 0); if ((base & 0xfff) || (size & 0xfff)) return -EINVAL; - for (; isspace(*ptr); ++ptr) ; + for (; isspace(*ptr); ++ptr) + ; + if (strncmp(ptr, "type=", 5)) return -EINVAL; ptr += 5; - for (; isspace(*ptr); ++ptr) ; + for (; isspace(*ptr); ++ptr) + ; + for (i = 0; i < MTRR_NUM_TYPES; ++i) { if (strcmp(ptr, mtrr_strings[i])) continue; base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; - err = - mtrr_add_page((unsigned long) base, (unsigned long) size, i, - true); + err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); if (err < 0) return err; return len; @@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) case MTRRIOC32_SET_PAGE_ENTRY: case MTRRIOC32_DEL_PAGE_ENTRY: case MTRRIOC32_KILL_PAGE_ENTRY: { - struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + struct mtrr_sentry32 __user *s32; + + s32 = (struct mtrr_sentry32 __user *)__arg; err = get_user(sentry.base, &s32->base); err |= get_user(sentry.size, &s32->size); err |= get_user(sentry.type, &s32->type); @@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) } case MTRRIOC32_GET_ENTRY: case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + struct mtrr_gentry32 __user *g32; + + g32 = (struct mtrr_gentry32 __user *)__arg; err = get_user(gentry.regnum, &g32->regnum); err |= get_user(gentry.base, &g32->base); err |= get_user(gentry.size, &g32->size); @@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) if (err) return err; - switch(cmd) { + switch (cmd) { case MTRRIOC_GET_ENTRY: case MTRRIOC_GET_PAGE_ENTRY: if (copy_to_user(arg, &gentry, sizeof gentry)) @@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_GET_ENTRY: case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + struct mtrr_gentry32 __user *g32; + + g32 = (struct mtrr_gentry32 __user *)__arg; err = put_user(gentry.base, &g32->base); err |= put_user(gentry.size, &g32->size); err |= put_user(gentry.regnum, &g32->regnum); @@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) return err; } -static int -mtrr_close(struct inode *ino, struct file *file) +static int mtrr_close(struct inode *ino, struct file *file) { - int i, max; unsigned int *fcount = FILE_FCOUNT(file); + int i, max; if (fcount != NULL) { max = num_var_ranges; @@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset); static int mtrr_open(struct inode *inode, struct file *file) { - if (!mtrr_if) + if (!mtrr_if) return -EIO; - if (!mtrr_if->get) - return -ENXIO; + if (!mtrr_if->get) + return -ENXIO; return single_open(file, mtrr_seq_show, NULL); } static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, + .owner = THIS_MODULE, + .open = mtrr_open, + .read = seq_read, + .llseek = seq_lseek, + .write = mtrr_write, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, + .release = mtrr_close, }; static int mtrr_seq_show(struct seq_file *seq, void *offset) @@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) max = num_var_ranges; for (i = 0; i < max; i++) { mtrr_if->get(i, &base, &size, &type); - if (size == 0) + if (size == 0) { mtrr_usage_table[i] = 0; - else { - if (size < (0x100000 >> PAGE_SHIFT)) { - /* less than 1MB */ - factor = 'K'; - size <<= PAGE_SHIFT - 10; - } else { - factor = 'M'; - size >>= 20 - PAGE_SHIFT; - } - /* RED-PEN: base can be > 32bit */ - len += seq_printf(seq, - "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", - i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_usage_table[i], mtrr_attrib_to_str(type)); + continue; } + if (size < (0x100000 >> PAGE_SHIFT)) { + /* less than 1MB */ + factor = 'K'; + size <<= PAGE_SHIFT - 10; + } else { + factor = 'M'; + size >>= 20 - PAGE_SHIFT; + } + /* Base can be > 32bit */ + len += seq_printf(seq, "reg%02i: base=0x%06lx000 " + "(%5luMB), size=%5lu%cB, count=%d: %s\n", + i, base, base >> (20 - PAGE_SHIFT), size, + factor, mtrr_usage_table[i], + mtrr_attrib_to_str(type)); } return 0; } @@ -422,6 +440,5 @@ static int __init mtrr_if_init(void) proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); return 0; } - arch_initcall(mtrr_if_init); #endif /* CONFIG_PROC_FS */ -- cgit v1.1 From 3ec8dbcb09bb6df83993ca03e88cb85e3aaa8edb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:54:16 +0530 Subject: x86: Clean up mtrr/mtrr.h Fix: ERROR: do not use C99 // comments ERROR: "foo * bar" should be "foo *bar" X 2 Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ More tidyups ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/mtrr.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 7538b76..a501dee 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -1,5 +1,5 @@ /* - * local mtrr defines. + * local MTRR defines. */ #include @@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { u32 vendor; u32 use_intel_if; -// void (*init)(void); void (*set)(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); void (*set_all)(void); void (*get)(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type); + unsigned long *size, mtrr_type *type); int (*get_free_region)(unsigned long base, unsigned long size, int replace_reg); int (*validate_add_page)(unsigned long base, unsigned long size, @@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void); /* library functions for processor-specific routines */ struct set_mtrr_context { - unsigned long flags; - unsigned long cr4val; - u32 deftype_lo; - u32 deftype_hi; - u32 ccr3; + unsigned long flags; + unsigned long cr4val; + u32 deftype_lo; + u32 deftype_hi; + u32 ccr3; }; void set_mtrr_done(struct set_mtrr_context *ctxt); @@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); -extern void set_mtrr_ops(struct mtrr_ops * ops); +extern void set_mtrr_ops(struct mtrr_ops *ops); extern u64 size_or_mask, size_and_mask; -extern struct mtrr_ops * mtrr_if; +extern struct mtrr_ops *mtrr_if; #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) -- cgit v1.1 From 09b22c85d59dd935fdfa71655a443785e3f99c18 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:54:53 +0530 Subject: x86: Clean up mtrr/state.c Fix: WARNING: Use #include instead of WARNING: line over 80 characters X 4 arch/x86/kernel/cpu/mtrr/state.o: text data bss dec hex filename 864 0 0 864 360 state.o.before 864 0 0 864 360 state.o.after md5: c5c4364b9aeac74d70111e1e49667a2c state.o.before.asm c5c4364b9aeac74d70111e1e49667a2c state.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> [ More cleanups ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/state.c | 68 +++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 1f5fb15..dfc80b4 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -1,24 +1,25 @@ -#include #include -#include -#include -#include +#include +#include + #include #include -#include "mtrr.h" +#include +#include +#include "mtrr.h" -/* Put the processor into a state where MTRRs can be safely set */ +/* Put the processor into a state where MTRRs can be safely set */ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) { unsigned int cr0; - /* Disable interrupts locally */ + /* Disable interrupts locally */ local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { - /* Save value of CR4 and clear Page Global Enable (bit 7) */ + /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (cpu_has_pge) { ctxt->cr4val = read_cr4(); write_cr4(ctxt->cr4val & ~X86_CR4_PGE); @@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) write_cr0(cr0); wbinvd(); - if (use_intel()) - /* Save MTRR state */ + if (use_intel()) { + /* Save MTRR state */ rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else were excluded at the top */ + } else { + /* + * Cyrix ARRs - + * everything else were excluded at the top + */ ctxt->ccr3 = getCx86(CX86_CCR3); + } } } void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) { - if (use_intel()) - /* Disable MTRRs, and set the default type to uncached */ + if (use_intel()) { + /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); - else if (is_cpu(CYRIX)) - /* Cyrix ARRs - everything else were excluded at the top */ - setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); + } else { + if (is_cpu(CYRIX)) { + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); + } + } } -/* Restore the processor after a set_mtrr_prepare */ +/* Restore the processor after a set_mtrr_prepare */ void set_mtrr_done(struct set_mtrr_context *ctxt) { if (use_intel() || is_cpu(CYRIX)) { - /* Flush caches and TLBs */ + /* Flush caches and TLBs */ wbinvd(); - /* Restore MTRRdefType */ - if (use_intel()) + /* Restore MTRRdefType */ + if (use_intel()) { /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else was excluded at the top */ + mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, + ctxt->deftype_hi); + } else { + /* + * Cyrix ARRs - + * everything else was excluded at the top + */ setCx86(CX86_CCR3, ctxt->ccr3); + } - /* Enable caches */ + /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); - /* Restore value of CR4 */ + /* Restore value of CR4 */ if (cpu_has_pge) write_cr4(ctxt->cr4val); } - /* Re-enable interrupts locally (if enabled previously) */ + /* Re-enable interrupts locally (if enabled previously) */ local_irq_restore(ctxt->flags); } - -- cgit v1.1 From dbd51be026eaf84088fdee7fab9f38fa92eef26d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 4 Jul 2009 07:56:28 +0530 Subject: x86: Clean up mtrr/main.c Fix following trivial style problems: ERROR: trailing whitespace X 25 WARNING: Use #include instead of WARNING: Use #include instead of ERROR: do not initialise externals to 0 or NULL X 2 ERROR: "foo * bar" should be "foo *bar" X 5 ERROR: do not use assignment in if condition X 2 WARNING: line over 80 characters X 8 ERROR: return is not a function, parentheses are not required WARNING: braces {} are not necessary for any arm of this statement ERROR: space required before the open parenthesis '(' X 2 ERROR: open brace '{' following function declarations go on the next line ERROR: space required after that ',' (ctx:VxV) X 8 ERROR: space required before the open parenthesis '(' X 3 ERROR: else should follow close brace '}' WARNING: space prohibited between function name and open parenthesis '(' WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable X 2 Also use pr_debug and pr_warning where possible. total: 50 errors, 14 warnings arch/x86/kernel/cpu/mtrr/main.o: text data bss dec hex filename 3668 116 4156 7940 1f04 main.o.before 3668 116 4156 7940 1f04 main.o.after md5: e01af2fd28deef77c8d01e71acfbd365 main.o.before.asm e01af2fd28deef77c8d01e71acfbd365 main.o.after.asm Suggested-by: Alan Cox Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton Cc: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> Cc: Avi Kivity # Avi, please have a look at the kvm_para.h bit [ More cleanups ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 455 +++++++++++++++++++++------------------- 1 file changed, 242 insertions(+), 213 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8fc248b..7af0f88 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -25,43 +25,48 @@ Operating System Writer's Guide" (Intel document number 242692), section 11.11.7 - This was cleaned and made readable by Patrick Mochel - on 6-7 March 2002. - Source: Intel Architecture Software Developers Manual, Volume 3: + This was cleaned and made readable by Patrick Mochel + on 6-7 March 2002. + Source: Intel Architecture Software Developers Manual, Volume 3: System Programming Guide; Section 9.11. (1997 edition - PPro). */ +#define DEBUG + +#include /* FIXME: kvm_para.h needs this */ + +#include +#include #include +#include #include +#include +#include #include #include -#include -#include -#include +#include #include #include -#include -#include #include -#include + #include "mtrr.h" -u32 num_var_ranges = 0; +u32 num_var_ranges; unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; -static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; +static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; -struct mtrr_ops * mtrr_if = NULL; +struct mtrr_ops *mtrr_if; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -void set_mtrr_ops(struct mtrr_ops * ops) +void set_mtrr_ops(struct mtrr_ops *ops) { if (ops->vendor && ops->vendor < X86_VENDOR_NUM) mtrr_ops[ops->vendor] = ops; @@ -72,30 +77,36 @@ static int have_wrcomb(void) { struct pci_dev *dev; u8 rev; - - if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { - /* ServerWorks LE chipsets < rev 6 have problems with write-combining - Don't allow it and leave room for other chipsets to be tagged */ + + dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); + if (dev != NULL) { + /* + * ServerWorks LE chipsets < rev 6 have problems with + * write-combining. Don't allow it and leave room for other + * chipsets to be tagged + */ if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); if (rev <= 5) { - printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); pci_dev_put(dev); return 0; } } - /* Intel 450NX errata # 23. Non ascending cacheline evictions to - write combining memory may resulting in data corruption */ + /* + * Intel 450NX errata # 23. Non ascending cacheline evictions to + * write combining memory may resulting in data corruption + */ if (dev->vendor == PCI_VENDOR_ID_INTEL && dev->device == PCI_DEVICE_ID_INTEL_82451NX) { - printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); + pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); pci_dev_put(dev); return 0; } pci_dev_put(dev); - } - return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); + } + return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; } /* This function returns the number of variable MTRRs */ @@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void) { unsigned long config = 0, dummy; - if (use_intel()) { + if (use_intel()) rdmsr(MSR_MTRRcap, config, dummy); - } else if (is_cpu(AMD)) + else if (is_cpu(AMD)) config = 2; else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) config = 8; + num_var_ranges = config & 0xff; } @@ -130,10 +142,12 @@ struct set_mtrr_data { mtrr_type smp_type; }; +/** + * ipi_handler - Synchronisation handler. Executed by "other" CPUs. + * + * Returns nothing. + */ static void ipi_handler(void *info) -/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. - [RETURNS] Nothing. -*/ { #ifdef CONFIG_SMP struct set_mtrr_data *data = info; @@ -142,18 +156,19 @@ static void ipi_handler(void *info) local_irq_save(flags); atomic_dec(&data->count); - while(!atomic_read(&data->gate)) + while (!atomic_read(&data->gate)) cpu_relax(); /* The master has cleared me to execute */ - if (data->smp_reg != ~0U) - mtrr_if->set(data->smp_reg, data->smp_base, + if (data->smp_reg != ~0U) { + mtrr_if->set(data->smp_reg, data->smp_base, data->smp_size, data->smp_type); - else + } else { mtrr_if->set_all(); + } atomic_dec(&data->count); - while(atomic_read(&data->gate)) + while (atomic_read(&data->gate)) cpu_relax(); atomic_dec(&data->count); @@ -161,7 +176,8 @@ static void ipi_handler(void *info) #endif } -static inline int types_compatible(mtrr_type type1, mtrr_type type2) { +static inline int types_compatible(mtrr_type type1, mtrr_type type2) +{ return type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE || (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || @@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { * @type: mtrr type * * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: - * + * * 1. Send IPI to do the following: * 2. Disable Interrupts - * 3. Wait for all procs to do so + * 3. Wait for all procs to do so * 4. Enter no-fill cache mode * 5. Flush caches * 6. Clear PGE bit @@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { * 10. Enable all range registers * 11. Flush all TLBs and caches again * 12. Enter normal cache mode and reenable caching - * 13. Set PGE + * 13. Set PGE * 14. Wait for buddies to catch up * 15. Enable interrupts. - * + * * What does that mean for us? Well, first we set data.count to the number * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each - * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it - * differently, so we call mtrr_if->set() callback and let them take care of it. - * When they're done, they again decrement data->count and wait for data.gate to - * be reset. - * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. + * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * CPU goes through the transition of updating MTRRs. + * The CPU vendors may each do it differently, + * so we call mtrr_if->set() callback and let them take care of it. + * When they're done, they again decrement data->count and wait for data.gate + * to be reset. + * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag * Everyone then enables interrupts and we all continue on. * * Note that the mechanism is the same for UP systems, too; all the SMP stuff * becomes nops. */ -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) +static void +set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { struct set_mtrr_data data; unsigned long flags; @@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base, data.smp_size = size; data.smp_type = type; atomic_set(&data.count, num_booting_cpus() - 1); - /* make sure data.count is visible before unleashing other CPUs */ + + /* Make sure data.count is visible before unleashing other CPUs */ smp_wmb(); - atomic_set(&data.gate,0); + atomic_set(&data.gate, 0); - /* Start the ball rolling on other CPUs */ + /* Start the ball rolling on other CPUs */ if (smp_call_function(ipi_handler, &data, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); local_irq_save(flags); - while(atomic_read(&data.count)) + while (atomic_read(&data.count)) cpu_relax(); - /* ok, reset count and toggle gate */ + /* Ok, reset count and toggle gate */ atomic_set(&data.count, num_booting_cpus() - 1); smp_wmb(); - atomic_set(&data.gate,1); + atomic_set(&data.gate, 1); - /* do our MTRR business */ + /* Do our MTRR business */ - /* HACK! + /* + * HACK! * We use this same function to initialize the mtrrs on boot. * The state of the boot cpu's mtrrs has been saved, and we want - * to replicate across all the APs. + * to replicate across all the APs. * If we're doing that @reg is set to something special... */ - if (reg != ~0U) - mtrr_if->set(reg,base,size,type); + if (reg != ~0U) + mtrr_if->set(reg, base, size, type); - /* wait for the others */ - while(atomic_read(&data.count)) + /* Wait for the others */ + while (atomic_read(&data.count)) cpu_relax(); atomic_set(&data.count, num_booting_cpus() - 1); smp_wmb(); - atomic_set(&data.gate,0); + atomic_set(&data.gate, 0); /* * Wait here for everyone to have seen the gate change * So we're the last ones to touch 'data' */ - while(atomic_read(&data.count)) + while (atomic_read(&data.count)) cpu_relax(); local_irq_restore(flags); } /** - * mtrr_add_page - Add a memory type region - * @base: Physical base address of region in pages (in units of 4 kB!) - * @size: Physical size of region in pages (4 kB) - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region + * mtrr_add_page - Add a memory type region + * @base: Physical base address of region in pages (in units of 4 kB!) + * @size: Physical size of region in pages (4 kB) + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. * - * The available types are + * The available types are * - * %MTRR_TYPE_UNCACHABLE - No caching + * %MTRR_TYPE_UNCACHABLE - No caching * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. */ - -int mtrr_add_page(unsigned long base, unsigned long size, +int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, bool increment) { + unsigned long lbase, lsize; int i, replace, error; mtrr_type ltype; - unsigned long lbase, lsize; if (!mtrr_if) return -ENXIO; - - if ((error = mtrr_if->validate_add_page(base,size,type))) + + error = mtrr_if->validate_add_page(base, size, type); + if (error) return error; if (type >= MTRR_NUM_TYPES) { - printk(KERN_WARNING "mtrr: type: %u invalid\n", type); + pr_warning("mtrr: type: %u invalid\n", type); return -EINVAL; } - /* If the type is WC, check that this processor supports it */ + /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - printk(KERN_WARNING - "mtrr: your processor doesn't support write-combining\n"); + pr_warning("mtrr: your processor doesn't support write-combining\n"); return -ENOSYS; } if (!size) { - printk(KERN_WARNING "mtrr: zero sized request\n"); + pr_warning("mtrr: zero sized request\n"); return -EINVAL; } if (base & size_or_mask || size & size_or_mask) { - printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); + pr_warning("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* No CPU hotplug when we change MTRR entries */ get_online_cpus(); - /* Search for existing MTRR */ + + /* Search for existing MTRR */ mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); - if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) + if (!lsize || base > lbase + lsize - 1 || + base + size - 1 < lbase) continue; - /* At this point we know there is some kind of overlap/enclosure */ + /* + * At this point we know there is some kind of + * overlap/enclosure + */ if (base < lbase || base + size - 1 > lbase + lsize - 1) { - if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { + if (base <= lbase && + base + size - 1 >= lbase + lsize - 1) { /* New region encloses an existing region */ if (type == ltype) { replace = replace == -1 ? i : -2; continue; - } - else if (types_compatible(type, ltype)) + } else if (types_compatible(type, ltype)) continue; } - printk(KERN_WARNING - "mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%lx000\n", base, size, lbase, - lsize); + pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" + " 0x%lx000,0x%lx000\n", base, size, lbase, + lsize); goto out; } - /* New region is enclosed by an existing region */ + /* New region is enclosed by an existing region */ if (ltype != type) { if (types_compatible(type, ltype)) continue; - printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", - base, size, mtrr_attrib_to_str(ltype), - mtrr_attrib_to_str(type)); + pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + base, size, mtrr_attrib_to_str(ltype), + mtrr_attrib_to_str(type)); goto out; } if (increment) @@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; goto out; } - /* Search for an empty MTRR */ + /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { set_mtrr(i, base, size, type); @@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size, mtrr_usage_table[replace] = 0; } } - } else - printk(KERN_INFO "mtrr: no more MTRRs available\n"); + } else { + pr_info("mtrr: no more MTRRs available\n"); + } error = i; out: mutex_unlock(&mtrr_mutex); @@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING - "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG - "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); + pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; } @@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size) } /** - * mtrr_add - Add a memory type region - * @base: Physical base address of region - * @size: Physical size of region - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region + * mtrr_add - Add a memory type region + * @base: Physical base address of region + * @size: Physical size of region + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. * - * The available types are + * The available types are * - * %MTRR_TYPE_UNCACHABLE - No caching + * %MTRR_TYPE_UNCACHABLE - No caching * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. */ - -int -mtrr_add(unsigned long base, unsigned long size, unsigned int type, - bool increment) +int mtrr_add(unsigned long base, unsigned long size, unsigned int type, + bool increment) { if (mtrr_check(base, size)) return -EINVAL; return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } +EXPORT_SYMBOL(mtrr_add); /** - * mtrr_del_page - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region + * mtrr_del_page - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. + * If register is supplied then base and size are ignored. This is + * how drivers should call it. * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. */ - int mtrr_del_page(int reg, unsigned long base, unsigned long size) { int i, max; @@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg < 0) { - printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, - size); + pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", + base, size); goto out; } } if (reg >= max) { - printk(KERN_WARNING "mtrr: register: %d too big\n", reg); + pr_warning("mtrr: register: %d too big\n", reg); goto out; } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { - printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); + pr_warning("mtrr: MTRR %d not used\n", reg); goto out; } if (mtrr_usage_table[reg] < 1) { - printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); + pr_warning("mtrr: reg: %d has count=0\n", reg); goto out; } if (--mtrr_usage_table[reg] < 1) @@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) put_online_cpus(); return error; } + /** - * mtrr_del - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region + * mtrr_del - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. + * If register is supplied then base and size are ignored. This is + * how drivers should call it. * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. */ - -int -mtrr_del(int reg, unsigned long base, unsigned long size) +int mtrr_del(int reg, unsigned long base, unsigned long size) { if (mtrr_check(base, size)) return -EINVAL; return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } - -EXPORT_SYMBOL(mtrr_add); EXPORT_SYMBOL(mtrr_del); -/* HACK ALERT! +/* + * HACK ALERT! * These should be called implicitly, but we can't yet until all the initcall * stuff is done... */ @@ -576,29 +593,28 @@ struct mtrr_value { static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; -static int mtrr_save(struct sys_device * sysdev, pm_message_t state) +static int mtrr_save(struct sys_device *sysdev, pm_message_t state) { int i; for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, - &mtrr_value[i].lbase, - &mtrr_value[i].lsize, - &mtrr_value[i].ltype); + mtrr_if->get(i, &mtrr_value[i].lbase, + &mtrr_value[i].lsize, + &mtrr_value[i].ltype); } return 0; } -static int mtrr_restore(struct sys_device * sysdev) +static int mtrr_restore(struct sys_device *sysdev) { int i; for (i = 0; i < num_var_ranges; i++) { - if (mtrr_value[i].lsize) - set_mtrr(i, - mtrr_value[i].lbase, - mtrr_value[i].lsize, - mtrr_value[i].ltype); + if (mtrr_value[i].lsize) { + set_mtrr(i, mtrr_value[i].lbase, + mtrr_value[i].lsize, + mtrr_value[i].ltype); + } } return 0; } @@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup; /** * mtrr_bp_init - initialize mtrrs on the boot CPU * - * This needs to be called early; before any of the other CPUs are + * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). - * + * */ void __init mtrr_bp_init(void) { u32 phys_addr; + init_ifs(); phys_addr = 32; if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ + size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; phys_addr = 36; - /* This is an AMD specific MSR, but we assume(hope?) that - Intel will implement it to when they extend the address - bus of the Xeon. */ + /* + * This is an AMD specific MSR, but we assume(hope?) that + * Intel will implement it to when they extend the address + * bus of the Xeon. + */ if (cpuid_eax(0x80000000) >= 0x80000008) { phys_addr = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ @@ -649,9 +668,11 @@ void __init mtrr_bp_init(void) size_and_mask = ~size_or_mask & 0xfffff00000ULL; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && boot_cpu_data.x86 == 6) { - /* VIA C* family have Intel style MTRRs, but - don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ + /* + * VIA C* family have Intel style MTRRs, + * but don't support PAE + */ + size_or_mask = 0xfff00000; /* 32 bits */ size_and_mask = 0; phys_addr = 32; } @@ -694,7 +715,6 @@ void __init mtrr_bp_init(void) changed_by_mtrr_cleanup = 1; mtrr_if->set_all(); } - } } } @@ -706,12 +726,17 @@ void mtrr_ap_init(void) if (!mtrr_if || !use_intel()) return; /* - * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, - * but this routine will be called in cpu boot time, holding the lock - * breaks it. This routine is called in two cases: 1.very earily time - * of software resume, when there absolutely isn't mtrr entry changes; - * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to - * prevent mtrr entry changes + * Ideally we should hold mtrr_mutex here to avoid mtrr entries + * changed, but this routine will be called in cpu boot time, + * holding the lock breaks it. + * + * This routine is called in two cases: + * + * 1. very earily time of software resume, when there absolutely + * isn't mtrr entry changes; + * + * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug + * lock to prevent mtrr entry changes */ local_irq_save(flags); @@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void) { if (!mtrr_if) return 0; + if (use_intel()) { if (!changed_by_mtrr_cleanup) mtrr_state_warn(); - } else { - /* The CPUs haven't MTRR and seem to not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * TBD: is there any system with such CPU which supports - * suspend/resume? if no, we should remove the code. - */ - sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); + return 0; } + + /* + * The CPU has no MTRR and seems to not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * + * TBD: is there any system with such CPU which supports + * suspend/resume? If no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); + return 0; } subsys_initcall(mtrr_init_finialize); -- cgit v1.1 From e3d0e69268dffb9676bf0800a60fb3573a723480 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Jul 2009 09:44:11 +0200 Subject: x86: Further clean up of mtrr/generic.c Yinghai noticed that i defined BIOS_BUG_MSG but added no usage for it. The usage is to clean up this turd in generic.c: printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " "contains strange UC entry under 1M, check " "with your system vendor!\n", i); Breaking printk lines in the middle looks ugly, is hard to read and breaks 'git grep'. Use the BIOS_BUG_MSG instead. Also complete the moving of structure definitions and variables to the top of the file. Reported-by: Yinghai Lu LKML-Reference: <20090703164225.GA21447@elte.hu> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 56 ++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index b8aba81..315738c 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -34,14 +34,37 @@ #include "mtrr.h" -/* Should be related to MTRR_VAR_RANGES nums */ -#define RANGE_NUM 256 - struct res_range { unsigned long start; unsigned long end; }; +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +struct var_mtrr_state { + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; +}; + +/* Should be related to MTRR_VAR_RANGES nums */ +#define RANGE_NUM 256 + +static struct res_range __initdata range[RANGE_NUM]; +static int __initdata nr_range; + +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; + +static int __initdata debug_print; +#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) + + static int __init add_range(struct res_range *range, int nr_range, unsigned long start, unsigned long end) @@ -147,18 +170,6 @@ static int __init cmp_range(const void *x1, const void *x2) return start1 - start2; } -struct var_mtrr_range_state { - unsigned long base_pfn; - unsigned long size_pfn; - mtrr_type type; -}; - -static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; - -static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) - - #define BIOS_BUG_MSG KERN_WARNING \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" @@ -200,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && (mtrr_state.enabled & 1)) { /* Var MTRR contains UC entry below 1M? Skip it: */ - printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " - "contains strange UC entry under 1M, check " - "with your system vendor!\n", i); + printk(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) continue; size -= (1<<(20-PAGE_SHIFT)) - base; @@ -244,9 +253,6 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, return nr_range; } -static struct res_range __initdata range[RANGE_NUM]; -static int __initdata nr_range; - #ifdef CONFIG_MTRR_SANITIZER static unsigned long __init sum_ranges(struct res_range *range, int nr_range) @@ -284,14 +290,6 @@ static int __init mtrr_cleanup_debug_setup(char *str) } early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); -struct var_mtrr_state { - unsigned long range_startk; - unsigned long range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - unsigned int reg; -}; - static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type, unsigned int address_bits) -- cgit v1.1 From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2009 11:27:40 +0900 Subject: linker script: unify usage of discard definition Discarded sections in different archs share some commonality but have considerable differences. This led to linker script for each arch implementing its own /DISCARD/ definition, which makes maintaining tedious and adding new entries error-prone. This patch makes all linker scripts to move discard definitions to the end of the linker script and use the common DISCARDS macro. As ld uses the first matching section definition, archs can include default discarded sections by including them earlier in the linker script. ia64 is notable because it first throws away some ia64 specific subsections and then include the rest of the sections into the final image, so those sections must be discarded before the inclusion. defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64, alpha, sparc, sparc64 and s390. Michal Simek tested microblaze. Signed-off-by: Tejun Heo Acked-by: Paul Mundt Acked-by: Mike Frysinger Tested-by: Michal Simek Cc: linux-arch@vger.kernel.org Cc: Michal Simek Cc: microblaze-uclinux@itee.uq.edu.au Cc: Sam Ravnborg Cc: Tony Luck --- arch/x86/kernel/vmlinux.lds.S | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 367e878..b600c84 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -387,15 +387,12 @@ SECTIONS _end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.eh_frame) } } -- cgit v1.1 From c31d96338a6041520ba5f1b6a4a5012ef00686b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:37 +0200 Subject: x86: mce: Make CONFIG_X86_ANCIENT_MCE dependent on CONFIG_X86_MCE Add a missing depency for ANCIENT_MCE. It didn't matter in practice because the ANCIENT code wasn't compiled without X86_MCE, but it's better to express that clearly in Kconfig. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 356d2ec..5962b87 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -823,7 +823,7 @@ config X86_MCE_AMD config X86_ANCIENT_MCE def_bool n - depends on X86_32 + depends on X86_32 && X86_MCE prompt "Support for old Pentium 5 / WinChip machine checks" ---help--- Include support for machine check handling on old Pentium 5 or WinChip -- cgit v1.1 From bab9bc6583fe6c1660d6ed36dd14bbb4edfaf393 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:38 +0200 Subject: x86: mce: Update X86_MCE description in x86/Kconfig - Clarify that this config controls thermal throttling reporting too - Clarify the types of errors reported by machine checks - Drop references to ancient CPUs. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5962b87..134a8c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -774,20 +774,12 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS increased on these systems. config X86_MCE - bool "Machine Check Exception" + bool "Machine Check / overheating reporting" ---help--- - Machine Check Exception support allows the processor to notify the - kernel if it detects a problem (e.g. overheating, component failure). + Machine Check support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, data corruption). The action the kernel takes depends on the severity of the problem, - ranging from a warning message on the console, to halting the machine. - Your processor must be a Pentium or newer to support this - check the - flags in /proc/cpuinfo for mce. Note that some older Pentium systems - have a design flaw which leads to false MCE events - hence MCE is - disabled on all P5 processors, unless explicitly enabled with "mce" - as a boot argument. Similarly, if MCE is built in and creates a - problem on some new non-standard machine, you can boot with "nomce" - to disable it. MCE support simply ignores non-MCE processors like - the 386 and 486, so nearly everyone can say Y here. + ranging from warning messages to halting the machine. config X86_OLD_MCE depends on X86_32 && X86_MCE -- cgit v1.1 From 5bb38adcb54cf7192b154368ad62982caa11ca0b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:39 +0200 Subject: x86: mce: Remove old i386 machine check code As announced in feature-remove-schedule.txt remove CONFIG_X86_OLD_MCE This patch only removes code. The ancient machine check code for very old systems that are not supported by CONFIG_X86_NEW_MCE is still kept. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 35 +------ arch/x86/include/asm/mce.h | 11 --- arch/x86/kernel/cpu/mcheck/Makefile | 2 - arch/x86/kernel/cpu/mcheck/k7.c | 116 ----------------------- arch/x86/kernel/cpu/mcheck/mce.c | 47 ---------- arch/x86/kernel/cpu/mcheck/non-fatal.c | 94 ------------------- arch/x86/kernel/cpu/mcheck/p4.c | 163 --------------------------------- arch/x86/kernel/cpu/mcheck/p6.c | 127 ------------------------- 8 files changed, 2 insertions(+), 593 deletions(-) delete mode 100644 arch/x86/kernel/cpu/mcheck/k7.c delete mode 100644 arch/x86/kernel/cpu/mcheck/non-fatal.c delete mode 100644 arch/x86/kernel/cpu/mcheck/p4.c delete mode 100644 arch/x86/kernel/cpu/mcheck/p6.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 134a8c0..d986769 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -781,21 +781,10 @@ config X86_MCE The action the kernel takes depends on the severity of the problem, ranging from warning messages to halting the machine. -config X86_OLD_MCE - depends on X86_32 && X86_MCE - bool "Use legacy machine check code (will go away)" - default n - select X86_ANCIENT_MCE - ---help--- - Use the old i386 machine check code. This is merely intended for - testing in a transition period. Try this if you run into any machine - check related software problems, but report the problem to - linux-kernel. When in doubt say no. - config X86_NEW_MCE depends on X86_MCE bool - default y if (!X86_OLD_MCE && X86_32) || X86_64 + default y config X86_MCE_INTEL def_bool y @@ -835,29 +824,9 @@ config X86_MCE_INJECT If you don't know what a machine check is and you don't do kernel QA it is safe to say n. -config X86_MCE_NONFATAL - tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" - depends on X86_OLD_MCE - ---help--- - Enabling this feature starts a timer that triggers every 5 seconds which - will look at the machine check registers to see if anything happened. - Non-fatal problems automatically get corrected (but still logged). - Disable this if you don't want to see these messages. - Seeing the messages this option prints out may be indicative of dying - or out-of-spec (ie, overclocked) hardware. - This option only does something on certain CPUs. - (AMD Athlon/Duron and Intel Pentium 4) - -config X86_MCE_P4THERMAL - bool "check for P4 thermal throttling interrupt." - depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) - ---help--- - Enabling this feature will cause a message to be printed when the P4 - enters thermal throttling. - config X86_THERMAL_VECTOR def_bool y - depends on X86_MCE_P4THERMAL || X86_MCE_INTEL + depends on X86_MCE_INTEL config VM86 bool "Enable VM86 support" if EMBEDDED diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b50b9e9..6b8a974 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -115,13 +115,6 @@ void mcheck_init(struct cpuinfo_x86 *c); static inline void mcheck_init(struct cpuinfo_x86 *c) {} #endif -#ifdef CONFIG_X86_OLD_MCE -extern int nr_mce_banks; -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -#endif - #ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); @@ -208,11 +201,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); void intel_init_thermal(struct cpuinfo_x86 *c); -#ifdef CONFIG_X86_NEW_MCE void mce_log_therm_throt_event(__u64 status); -#else -static inline void mce_log_therm_throt_event(__u64 status) {} -#endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 188a1ca..022a036 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,11 +1,9 @@ obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o -obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o -obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c deleted file mode 100644 index b945d5d..0000000 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Athlon specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Machine Check Handler For AMD Athlon/Duron: */ -static void k7_machine_check(struct pt_regs *regs, long error_code) -{ - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int recover = 1; - int i; - - rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover = 0; - - printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i = 1; i < nr_mce_banks; i++) { - rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high & (1<<31)) { - char misc[20]; - char addr[24]; - - misc[0] = '\0'; - addr[0] = '\0'; - - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - high &= ~(1<<31); - - if (high & (1<<27)) { - rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); - snprintf(misc, 20, "[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - snprintf(addr, 24, " at %08x%08x", ahigh, alow); - } - - printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", - smp_processor_id(), i, high, low, misc, addr); - - /* Clear it: */ - wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - /* Serialize: */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - - if (recover & 2) - panic("CPU context corrupt"); - if (recover & 1) - panic("Unable to continue"); - - printk(KERN_EMERG "Attempting to continue.\n"); - - mcgstl &= ~(1<<2); - wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); -} - - -/* AMD K7 machine check is Intel like: */ -void amd_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - int i; - - if (!cpu_has(c, X86_FEATURE_MCE)) - return; - - machine_check_vector = k7_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ - wmb(); - - printk(KERN_INFO "Intel machine check architecture supported.\n"); - - rdmsr(MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; - - /* - * Clear status for MC index 0 separately, we don't touch CTL, - * as some K7 Athlons cause spurious MCEs when its enabled: - */ - if (boot_cpu_data.x86 == 6) { - wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); - i = 1; - } else - i = 0; - - for (; i < nr_mce_banks; i++) { - wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - } - - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); -} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7da8fec..5ff6362 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -58,8 +58,6 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = int mce_disabled __read_mostly; -#ifdef CONFIG_X86_NEW_MCE - #define MISC_MCELOG_MINOR 227 #define SPINUNIT 100 /* 100ns */ @@ -1993,51 +1991,6 @@ static __init int mce_init_device(void) device_initcall(mce_init_device); -#else /* CONFIG_X86_OLD_MCE: */ - -int nr_mce_banks; -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86 == 5) - intel_p5_mcheck_init(c); - if (c->x86 == 6) - intel_p6_mcheck_init(c); - if (c->x86 == 15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86 == 5) - winchip_mcheck_init(c); - break; - - default: - break; - } - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); -} - -static int __init mcheck_enable(char *str) -{ - mce_p5_enabled = 1; - return 1; -} -__setup("mce", mcheck_enable); - -#endif /* CONFIG_X86_OLD_MCE */ - /* * Old style boot options parsing. Only for compatibility. */ diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index f5f2d6f..0000000 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Non Fatal Machine Check Exception Reporting - * - * (C) Copyright 2002 Dave Jones. - * - * This file contains routines to check for non-fatal MCEs every 15s - * - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static int firstbank; - -#define MCE_RATE (15*HZ) /* timer rate is 15s */ - -static void mce_checkregs(void *info) -{ - u32 low, high; - int i; - - for (i = firstbank; i < nr_mce_banks; i++) { - rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - - if (!(high & (1<<31))) - continue; - - printk(KERN_INFO "MCE: The hardware reports a non fatal, " - "correctable incident occurred on CPU %d.\n", - smp_processor_id()); - - printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); - - /* - * Scrub the error so we don't pick it up in MCE_RATE - * seconds time: - */ - wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); - - /* Serialize: */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } -} - -static void mce_work_fn(struct work_struct *work); -static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); - -static void mce_work_fn(struct work_struct *work) -{ - on_each_cpu(mce_checkregs, NULL, 1); - schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); -} - -static int __init init_nonfatal_mce_checker(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - /* Check for MCE support */ - if (!cpu_has(c, X86_FEATURE_MCE)) - return -ENODEV; - - /* Check for PPro style MCA */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return -ENODEV; - - /* Some Athlons misbehave when we frob bank 0 */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 6) - firstbank = 1; - else - firstbank = 0; - - /* - * Check for non-fatal errors every MCE_RATE s - */ - schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); - printk(KERN_INFO "Machine check exception polling timer started.\n"); - - return 0; -} -module_init(init_nonfatal_mce_checker); - -MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c deleted file mode 100644 index 4482aea..0000000 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * P4 specific Machine Check Exception Reporting - */ -#include -#include -#include -#include - -#include -#include -#include - -/* as supported by the P4/Xeon family */ -struct intel_mce_extended_msrs { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 esp; - u32 eflags; - u32 eip; - /* u32 *reserved[]; */ -}; - -static int mce_num_extended_msrs; - -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) -{ - u32 h; - - rdmsr(MSR_IA32_MCG_EAX, r->eax, h); - rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr(MSR_IA32_MCG_EDX, r->edx, h); - rdmsr(MSR_IA32_MCG_ESI, r->esi, h); - rdmsr(MSR_IA32_MCG_EDI, r->edi, h); - rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr(MSR_IA32_MCG_ESP, r->esp, h); - rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr(MSR_IA32_MCG_EIP, r->eip, h); -} - -static void intel_machine_check(struct pt_regs *regs, long error_code) -{ - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int recover = 1; - int i; - - rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover = 0; - - printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (mce_num_extended_msrs > 0) { - struct intel_mce_extended_msrs dbg; - - intel_get_extended_msrs(&dbg); - - printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" - "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" - "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags, - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i = 0; i < nr_mce_banks; i++) { - rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high & (1<<31)) { - char misc[20]; - char addr[24]; - - misc[0] = addr[0] = '\0'; - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - high &= ~(1<<31); - if (high & (1<<27)) { - rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); - snprintf(misc, 20, "[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - snprintf(addr, 24, " at %08x%08x", ahigh, alow); - } - printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", - smp_processor_id(), i, high, low, misc, addr); - } - } - - if (recover & 2) - panic("CPU context corrupt"); - if (recover & 1) - panic("Unable to continue"); - - printk(KERN_EMERG "Attempting to continue.\n"); - - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not - * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error. - */ - for (i = 0; i < nr_mce_banks; i++) { - u32 msr; - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr(msr, low, high); - if (high&(1<<31)) { - /* Clear it */ - wrmsr(msr, 0UL, 0UL); - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - mcgstl &= ~(1<<2); - wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); -} - -void intel_p4_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - int i; - - machine_check_vector = intel_machine_check; - wmb(); - - printk(KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr(MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; - - for (i = 0; i < nr_mce_banks; i++) { - wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - } - - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); - - /* Check for P4/Xeon extended MCE MSRs */ - rdmsr(MSR_IA32_MCG_CAP, l, h); - if (l & (1<<9)) {/* MCG_EXT_P */ - mce_num_extended_msrs = (l >> 16) & 0xff; - printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" - " available\n", - smp_processor_id(), mce_num_extended_msrs); - -#ifdef CONFIG_X86_MCE_P4THERMAL - /* Check for P4/Xeon Thermal monitor */ - intel_init_thermal(c); -#endif - } -} diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c deleted file mode 100644 index 01e4f81..0000000 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * P6 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Machine Check Handler For PII/PIII */ -static void intel_machine_check(struct pt_regs *regs, long error_code) -{ - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int recover = 1; - int i; - - rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover = 0; - - printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i = 0; i < nr_mce_banks; i++) { - rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); - if (high & (1<<31)) { - char misc[20]; - char addr[24]; - - misc[0] = '\0'; - addr[0] = '\0'; - - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - high &= ~(1<<31); - - if (high & (1<<27)) { - rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); - snprintf(misc, 20, "[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - snprintf(addr, 24, " at %08x%08x", ahigh, alow); - } - - printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", - smp_processor_id(), i, high, low, misc, addr); - } - } - - if (recover & 2) - panic("CPU context corrupt"); - if (recover & 1) - panic("Unable to continue"); - - printk(KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not - * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error: - */ - for (i = 0; i < nr_mce_banks; i++) { - unsigned int msr; - - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr(msr, low, high); - if (high & (1<<31)) { - /* Clear it: */ - wrmsr(msr, 0UL, 0UL); - /* Serialize: */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - mcgstl &= ~(1<<2); - wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); -} - -/* Set up machine check reporting for processors with Intel style MCE: */ -void intel_p6_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - int i; - - /* Check for MCE support */ - if (!cpu_has(c, X86_FEATURE_MCE)) - return; - - /* Check for PPro style MCA */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return; - - /* Ok machine check is available */ - machine_check_vector = intel_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ - wmb(); - - printk(KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr(MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; - - /* - * Following the example in IA-32 SDM Vol 3: - * - MC0_CTL should not be written - * - Status registers on all banks should be cleared on reset - */ - for (i = 1; i < nr_mce_banks; i++) - wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - - for (i = 0; i < nr_mce_banks; i++) - wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); -} -- cgit v1.1 From c1ebf835617035b1f08f734247dcb981e17aac6b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:41 +0200 Subject: x86: mce: Rename CONFIG_X86_NEW_MCE to CONFIG_X86_MCE Drop the CONFIG_X86_NEW_MCE symbol and change all references to it to check for CONFIG_X86_MCE directly. No code changes Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 11 +++-------- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/kernel/apic/nmi.c | 2 +- arch/x86/kernel/cpu/mcheck/Makefile | 3 +-- arch/x86/kernel/irq.c | 4 ++-- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/signal.c | 2 +- 7 files changed, 10 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d986769..06880ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -781,15 +781,10 @@ config X86_MCE The action the kernel takes depends on the severity of the problem, ranging from warning messages to halting the machine. -config X86_NEW_MCE - depends on X86_MCE - bool - default y - config X86_MCE_INTEL def_bool y prompt "Intel MCE features" - depends on X86_NEW_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC ---help--- Additional support for intel specific MCE features such as the thermal monitor. @@ -797,7 +792,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_NEW_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. @@ -817,7 +812,7 @@ config X86_MCE_THRESHOLD default y config X86_MCE_INJECT - depends on X86_NEW_MCE + depends on X86_MCE tristate "Machine check injector support" ---help--- Provide support for injecting machine checks for testing purposes. diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index ff8cbfa..5e3f204 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) #endif -#ifdef CONFIG_X86_NEW_MCE +#ifdef CONFIG_X86_MCE BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b4..f422728 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) static inline int mce_in_progress(void) { -#if defined(CONFIG_X86_NEW_MCE) +#if defined(CONFIG_X86_MCE) return atomic_read(&mce_entry) > 0; #endif return 0; diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 022a036..4ac6d48 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,6 +1,5 @@ -obj-y = mce.o +obj-y = mce.o mce-severity.o -obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6..74656d1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, " Threshold APIC interrupts\n"); # endif #endif -#ifdef CONFIG_X86_NEW_MCE +#ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); @@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_threshold_count; # endif #endif -#ifdef CONFIG_X86_NEW_MCE +#ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 696f0e4..8a194ad 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -190,7 +190,7 @@ static void __init apic_intr_init(void) #ifdef CONFIG_X86_THRESHOLD alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif -#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); #endif diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c57875..cc26ad4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#ifdef CONFIG_X86_NEW_MCE +#ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) mce_notify_process(); -- cgit v1.1 From 9eda8cb3ac235217e4ffa01cb9cedee1c1550599 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:42 +0200 Subject: x86: mce: Move code in mce.c Now that the X86_OLD_MCE ifdefs are gone move some code that used to be outside the big ifdef to a more natural place near its user. No code change. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5ff6362..e16271f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -45,17 +45,6 @@ #include "mce-internal.h" -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -1322,6 +1311,17 @@ static void mce_init_timer(void) add_timer(t); } +/* Handle unconfigured int18 (should never happen) */ +static void unexpected_machine_check(struct pt_regs *regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: -- cgit v1.1 From cebe182033f156b430952370fb0f9dbe6e89b081 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:43 +0200 Subject: x86: mce: Move per bank data in a single datastructure This addresses one of the leftover review comments. Move the per bank data into a single structure. This avoids several separate variables and also separate allocation of sysfs objects. I didn't move the CMCI ownership information so far because that would have needed some non trivial changes in the algorithms. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-internal.h | 14 ++++ arch/x86/kernel/cpu/mcheck/mce.c | 109 +++++++++++++++--------------- 2 files changed, 67 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 54dcb8f..6bd51e7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -1,3 +1,4 @@ +#include #include enum severity_level { @@ -10,6 +11,19 @@ enum severity_level { MCE_PANIC_SEVERITY, }; +#define ATTR_LEN 16 + +/* One object for each MCE bank, shared by all CPUs */ +struct mce_bank { + u64 ctl; /* subevents to enable */ + unsigned char init; /* initialise bank? */ + struct sysdev_attribute attr; /* sysdev attribute */ + char attrname[ATTR_LEN]; /* attribute name */ +}; + int mce_severity(struct mce *a, int tolerant, char **msg); extern int mce_ser; + +extern struct mce_bank *mce_banks; + diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e16271f..a04806e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -64,7 +64,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); */ static int tolerant __read_mostly = 1; static int banks __read_mostly; -static u64 *bank __read_mostly; static int rip_msr __read_mostly; static int mce_bootlog __read_mostly = -1; static int monarch_timeout __read_mostly = -1; @@ -74,13 +73,13 @@ int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; +struct mce_bank *mce_banks __read_mostly; + /* User mode helper program triggered by machine check event */ static unsigned long mce_need_notify; static char mce_helper[128]; static char *mce_helper_argv[2] = { mce_helper, NULL }; -static unsigned long dont_init_banks; - static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; @@ -91,11 +90,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; -static inline int skip_bank_init(int i) -{ - return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); -} - static DEFINE_PER_CPU(struct work_struct, mce_work); /* Do initial initialization of a struct mce */ @@ -482,7 +476,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); for (i = 0; i < banks; i++) { - if (!bank[i] || !test_bit(i, *b)) + if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; m.misc = 0; @@ -903,7 +897,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); - if (!bank[i]) + if (!mce_banks[i].ctl) continue; m.misc = 0; @@ -1146,6 +1140,21 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); +static int mce_banks_init(void) +{ + int i; + + mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); + if (!mce_banks) + return -ENOMEM; + for (i = 0; i < banks; i++) { + struct mce_bank *b = &mce_banks[i]; + b->ctl = -1ULL; + b->init = 1; + } + return 0; +} + /* * Initialize Machine Checks for a CPU. */ @@ -1169,11 +1178,10 @@ static int mce_cap_init(void) /* Don't support asymmetric configurations today */ WARN_ON(banks != 0 && b != banks); banks = b; - if (!bank) { - bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); - if (!bank) - return -ENOMEM; - memset(bank, 0xff, banks * sizeof(u64)); + if (!mce_banks) { + int err = mce_banks_init(); + if (err) + return err; } /* Use accurate RIP reporting if available. */ @@ -1205,9 +1213,10 @@ static void mce_init(void) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - if (skip_bank_init(i)) + struct mce_bank *b = &mce_banks[i]; + if (!b->init) continue; - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_CTL+4*i, b->ctl); wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } @@ -1223,7 +1232,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) * trips off incorrectly with the IOMMU & 3ware * & Cerberus: */ - clear_bit(10, (unsigned long *)&bank[4]); + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } if (c->x86 <= 17 && mce_bootlog < 0) { /* @@ -1237,7 +1246,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) * by default. */ if (c->x86 == 6 && banks > 0) - bank[0] = 0; + mce_banks[0].ctl = 0; } if (c->x86_vendor == X86_VENDOR_INTEL) { @@ -1250,8 +1259,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A) - __set_bit(0, &dont_init_banks); + if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) + mce_banks[0].init = 0; /* * All newer Intel systems support MCE broadcasting. Enable @@ -1578,7 +1587,8 @@ static int mce_disable(void) int i; for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) + struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); } return 0; @@ -1654,14 +1664,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); -static struct sysdev_attribute *bank_attrs; +static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) +{ + return container_of(attr, struct mce_bank, attr); +} static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { - u64 b = bank[attr - bank_attrs]; - - return sprintf(buf, "%llx\n", b); + return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); } static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, @@ -1672,7 +1683,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; - bank[attr - bank_attrs] = new; + attr_to_bank(attr)->ctl = new; mce_restart(); return size; @@ -1816,7 +1827,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) } for (j = 0; j < banks; j++) { err = sysdev_create_file(&per_cpu(mce_dev, cpu), - &bank_attrs[j]); + &mce_banks[j].attr); if (err) goto error2; } @@ -1825,10 +1836,10 @@ static __cpuinit int mce_create_device(unsigned int cpu) return 0; error2: while (--j >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); + sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); sysdev_unregister(&per_cpu(mce_dev, cpu)); @@ -1846,7 +1857,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); + sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); sysdev_unregister(&per_cpu(mce_dev, cpu)); cpumask_clear_cpu(cpu, mce_dev_initialized); @@ -1863,7 +1874,8 @@ static void mce_disable_cpu(void *h) if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) + struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); } } @@ -1879,8 +1891,9 @@ static void mce_reenable_cpu(void *h) if (!(action & CPU_TASKS_FROZEN)) cmci_reenable(); for (i = 0; i < banks; i++) { - if (!skip_bank_init(i)) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); + struct mce_bank *b = &mce_banks[i]; + if (b->init) + wrmsrl(MSR_IA32_MC0_CTL + i*4, b->ctl); } } @@ -1928,35 +1941,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { .notifier_call = mce_cpu_callback, }; -static __init int mce_init_banks(void) +static __init void mce_init_banks(void) { int i; - bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, - GFP_KERNEL); - if (!bank_attrs) - return -ENOMEM; - for (i = 0; i < banks; i++) { - struct sysdev_attribute *a = &bank_attrs[i]; + struct mce_bank *b = &mce_banks[i]; + struct sysdev_attribute *a = &b->attr; - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); - if (!a->attr.name) - goto nomem; + a->attr.name = b->attrname; + snprintf(b->attrname, ATTR_LEN, "bank%d", i); a->attr.mode = 0644; a->show = show_bank; a->store = set_bank; } - return 0; - -nomem: - while (--i >= 0) - kfree(bank_attrs[i].attr.name); - kfree(bank_attrs); - bank_attrs = NULL; - - return -ENOMEM; } static __init int mce_init_device(void) @@ -1969,9 +1968,7 @@ static __init int mce_init_device(void) zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); - err = mce_init_banks(); - if (err) - return err; + mce_init_banks(); err = sysdev_class_register(&mce_sysclass); if (err) -- cgit v1.1 From a2d32bcbc008aa0f9c301a7c6f3494cb23e6af54 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:44 +0200 Subject: x86: mce: macros to compute banks MSRs Instead of open coded calculations for bank MSRs hide the indexing of higher banks MCE register MSRs in new macros. No semantic changes. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 7 +++++++ arch/x86/kernel/cpu/mcheck/mce.c | 34 +++++++++++++++++----------------- arch/x86/kernel/cpu/mcheck/mce_intel.c | 10 +++++----- 3 files changed, 29 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1692fb5..3d1ce09 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -81,8 +81,15 @@ #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + /* These are consecutive and not in the normal 4er MCE bank block */ #define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + #define CMCI_EN (1ULL << 30) #define CMCI_THRESHOLD_MASK 0xffffULL diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a04806e..07139a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -267,11 +267,11 @@ static int msr_to_offset(u32 msr) unsigned bank = __get_cpu_var(injectm.bank); if (msr == rip_msr) return offsetof(struct mce, ip); - if (msr == MSR_IA32_MC0_STATUS + bank*4) + if (msr == MSR_IA32_MCx_STATUS(bank)) return offsetof(struct mce, status); - if (msr == MSR_IA32_MC0_ADDR + bank*4) + if (msr == MSR_IA32_MCx_ADDR(bank)) return offsetof(struct mce, addr); - if (msr == MSR_IA32_MC0_MISC + bank*4) + if (msr == MSR_IA32_MCx_MISC(bank)) return offsetof(struct mce, misc); if (msr == MSR_IA32_MCG_STATUS) return offsetof(struct mce, mcgstatus); @@ -485,7 +485,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.tsc = 0; barrier(); - m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); + m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); if (!(m.status & MCI_STATUS_VAL)) continue; @@ -500,9 +500,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) continue; if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); + m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); + m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; @@ -518,7 +518,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) /* * Clear state for this bank. */ - mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); } /* @@ -539,7 +539,7 @@ static int mce_no_way_out(struct mce *m, char **msg) int i; for (i = 0; i < banks; i++) { - m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); + m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) return 1; } @@ -823,7 +823,7 @@ static void mce_clear_state(unsigned long *toclear) for (i = 0; i < banks; i++) { if (test_bit(i, toclear)) - mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); } } @@ -904,7 +904,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) m.addr = 0; m.bank = i; - m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); + m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); if ((m.status & MCI_STATUS_VAL) == 0) continue; @@ -945,9 +945,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) kill_it = 1; if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); + m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); + m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); /* * Action optional error. Queue address for later processing. @@ -1216,8 +1216,8 @@ static void mce_init(void) struct mce_bank *b = &mce_banks[i]; if (!b->init) continue; - wrmsrl(MSR_IA32_MC0_CTL+4*i, b->ctl); - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); + wrmsrl(MSR_IA32_MCx_STATUS(i), 0); } } @@ -1589,7 +1589,7 @@ static int mce_disable(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + wrmsrl(MSR_IA32_MCx_CTL(i), 0); } return 0; } @@ -1876,7 +1876,7 @@ static void mce_disable_cpu(void *h) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); + wrmsrl(MSR_IA32_MCx_CTL(i), 0); } } @@ -1893,7 +1893,7 @@ static void mce_reenable_cpu(void *h) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) - wrmsrl(MSR_IA32_MC0_CTL + i*4, b->ctl); + wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index e1acec0f..889f665 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot) if (test_bit(i, owned)) continue; - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Already owned by someone else? */ if (val & CMCI_EN) { @@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot) } val |= CMCI_EN | CMCI_THRESHOLD; - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + wrmsrl(MSR_IA32_MCx_CTL2(i), val); + rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Did the enable bit stick? -- the bank supports CMCI */ if (val & CMCI_EN) { @@ -152,9 +152,9 @@ void cmci_clear(void) if (!test_bit(i, __get_cpu_var(mce_banks_owned))) continue; /* Disable CMCI */ - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); + rdmsrl(MSR_IA32_MCx_CTL2(i), val); val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); + wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } spin_unlock_irqrestore(&cmci_discover_lock, flags); -- cgit v1.1 From 3ccdccfadbd2548abe38682b587f4ba27eac2fc9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Jul 2009 00:31:45 +0200 Subject: x86: mce: Lower maximum number of banks to architecture limit The Intel x86 architecture right now only supports 32 machine check banks, more would bump into other MSRs. So lower the max define to 32. This only affects a few bitmaps, most data structures are dynamically sized anyways. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6b8a974..ad75353 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -130,10 +130,11 @@ void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); /* - * To support more than 128 would need to escape the predefined - * Linux defined extended banks first. + * Maximum banks number. + * This is the limit of the current register layout on + * Intel CPUs. */ -#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) +#define MAX_NR_BANKS 32 #ifdef CONFIG_X86_MCE_INTEL extern int mce_cmci_disabled; -- cgit v1.1 From 9ff80942992cd5abd0779c815f310f65b7b83860 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 8 Jul 2009 22:03:53 +0400 Subject: x86: Clean up idt_descr and idt_tableby using NR_VECTORS instead of hardcoded number Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20090708180353.GH5301@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/traps.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c0..d6f27c9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -982,7 +982,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5204332..7e4b1f5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -76,7 +76,7 @@ char ignore_fpu_irq; * F0 0F bug workaround.. We have a special link segment * for this. */ -gate_desc idt_table[256] +gate_desc idt_table[NR_VECTORS] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif -- cgit v1.1 From a1b4f1a5b7f57be2593a9f1fca465a529c95fc07 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 5 Jul 2009 20:01:54 +0400 Subject: x86, ipi: Clean up safe_smp_processor_id() by using the cpu_has_apic() macro helper We already use a lot of cpu_has_ helpers. Lets do here the same for consistency. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20090705160154.GB4791@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/ipi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445..e6b4f51 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -150,7 +150,7 @@ int safe_smp_processor_id(void) { int apicid, cpuid; - if (!boot_cpu_has(X86_FEATURE_APIC)) + if (!cpu_has_apic) return 0; apicid = hard_smp_processor_id(); -- cgit v1.1 From e90476d3bab4322070c0afb3e3b55671de8664ea Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sat, 11 Jul 2009 09:32:46 +0800 Subject: x86: Remove duplicated #include Remove duplicated #include in: arch/x86/kernel/dumpstack.c Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c8405718..2d8a371 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -15,7 +15,6 @@ #include #include #include -#include #include -- cgit v1.1 From 8bdbd962ecfcbdd96f9dbb02d780b4553afd2543 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 4 Jul 2009 00:35:45 +0100 Subject: x86/cpu: Clean up various files a bit No code changes except printk levels (although some of the K6 mtrr code might be clearer if there were a few as would splitting out some of the intel cache code). Signed-off-by: Alan Cox LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 37 ++++++----- arch/x86/kernel/cpu/bugs.c | 10 +-- arch/x86/kernel/cpu/bugs_64.c | 2 +- arch/x86/kernel/cpu/common.c | 8 +-- arch/x86/kernel/cpu/cyrix.c | 19 ++++-- arch/x86/kernel/cpu/hypervisor.c | 5 +- arch/x86/kernel/cpu/intel.c | 11 ++-- arch/x86/kernel/cpu/intel_cacheinfo.c | 116 +++++++++++++++++---------------- arch/x86/kernel/cpu/perfctr-watchdog.c | 45 ++++++------- arch/x86/kernel/cpu/proc.c | 2 +- arch/x86/kernel/cpu/vmware.c | 18 ++--- 11 files changed, 144 insertions(+), 129 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28e5f59..c6eb02e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) #define CBAR_ENB (0x80000000) #define CBAR_KEY (0X000000CB) if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); + if (inl(CBAR) & CBAR_ENB) + outl(0 | CBAR_KEY, CBAR); } } @@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) d = d2-d; if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); + printk(KERN_CONT + "system stability may be impaired when more than 32 MB are used.\n"); else - printk("probably OK (after B9730xxxx).\n"); + printk(KERN_CONT "probably OK (after B9730xxxx).\n"); printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } @@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); + printk(KERN_INFO + "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); } } @@ -398,7 +400,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) u32 level; level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } if (c->x86 == 0x10 || c->x86 == 0x11) @@ -487,27 +489,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * benefit in doing so. */ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < + ((tseg>>PMD_SHIFT) < (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); } } #endif } #ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, + unsigned int size) { /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + /* Duron Rev A0 */ + if (c->x86_model == 3 && c->x86_mask == 0) size = 64; + /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ + (c->x86_mask == 0 || c->x86_mask == 1)) size = 256; } return size; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c8e315f..01a2652 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -81,7 +81,7 @@ static void __init check_fpu(void) boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); + printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); } static void __init check_hlt(void) @@ -98,7 +98,7 @@ static void __init check_hlt(void) halt(); halt(); halt(); - printk("OK.\n"); + printk(KERN_CONT "OK.\n"); } /* @@ -122,9 +122,9 @@ static void __init check_popad(void) * CPU hard. Too bad. */ if (res != 12345678) - printk("Buggy.\n"); + printk(KERN_CONT "Buggy.\n"); else - printk("OK.\n"); + printk(KERN_CONT "OK.\n"); #endif } @@ -156,7 +156,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #ifndef CONFIG_SMP - printk("CPU: "); + printk(KERN_INFO "CPU: "); print_cpu_info(&boot_cpu_data); #endif check_config(); diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed06..04f0fe5 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c @@ -15,7 +15,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #if !defined(CONFIG_SMP) - printk("CPU: "); + printk(KERN_INFO "CPU: "); print_cpu_info(&boot_cpu_data); #endif alternative_instructions(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d6f27c9..c96ea44 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,8 +18,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -28,13 +28,13 @@ #include #include #include -#include +#include #include #include #include #include #include -#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 593171e..19807b8 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -3,10 +3,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include @@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) * The 5510/5520 companion chips have a funky PIT. */ if (vendor == PCI_VENDOR_ID_CYRIX && - (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) + (device == PCI_DEVICE_ID_CYRIX_5510 || + device == PCI_DEVICE_ID_CYRIX_5520)) mark_tsc_unstable("cyrix 5510/5520 detected"); } #endif @@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) * ? : 0x7x * GX1 : 0x8x GX1 datasheet 56 */ - if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) + if ((0x30 <= dir1 && dir1 <= 0x6f) || + (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); return; } else { /* MediaGX */ @@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); + /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); + /* disable MAPEN */ + setCx86(CX86_CCR3, ccr3); local_irq_restore(flags); } } diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index fb5b86a..93ba8ee 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -28,11 +28,10 @@ static inline void __cpuinit detect_hypervisor_vendor(struct cpuinfo_x86 *c) { - if (vmware_platform()) { + if (vmware_platform()) c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; - } else { + else c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; - } } unsigned long get_hypervisor_tsc_freq(void) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3260ab0..80a722a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -7,17 +7,17 @@ #include #include #include +#include #include #include #include -#include #include #include #include #ifdef CONFIG_X86_64 -#include +#include #include #endif @@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_F00F_BUG /* * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. + * have the F0 0F bug, which lets nonprivileged users lock up the + * system. * Note that the workaround only should be initialized once... */ c->f00f_bug = 0; @@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); } } @@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) /* Intel has a non-standard dependency on %ecx for this CPUID level. */ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); if (eax & 0x1f) - return ((eax >> 26) + 1); + return (eax >> 26) + 1; else return 1; } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 789efe2..306bf0d 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -3,7 +3,7 @@ * * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj : Work with CPU hotplug infrastructure. + * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -16,7 +16,7 @@ #include #include -#include +#include #include #define LVL_1_INST 1 @@ -25,14 +25,15 @@ #define LVL_3 4 #define LVL_TRACE 5 -struct _cache_table -{ +struct _cache_table { unsigned char descriptor; char cache_type; short size; }; -/* all the cache descriptor types we care about (no TLB or trace cache entries) */ +/* All the cache descriptor types we care about (no TLB or + trace cache entries) */ + static const struct _cache_table __cpuinitconst cache_table[] = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ @@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = }; -enum _cache_type -{ +enum _cache_type { CACHE_TYPE_NULL = 0, CACHE_TYPE_DATA = 1, CACHE_TYPE_INST = 2, @@ -170,31 +170,31 @@ unsigned short num_cache_leaves; Maybe later */ union l1_cache { struct { - unsigned line_size : 8; - unsigned lines_per_tag : 8; - unsigned assoc : 8; - unsigned size_in_kb : 8; + unsigned line_size:8; + unsigned lines_per_tag:8; + unsigned assoc:8; + unsigned size_in_kb:8; }; unsigned val; }; union l2_cache { struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned size_in_kb : 16; + unsigned line_size:8; + unsigned lines_per_tag:4; + unsigned assoc:4; + unsigned size_in_kb:16; }; unsigned val; }; union l3_cache { struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned res : 2; - unsigned size_encoded : 14; + unsigned line_size:8; + unsigned lines_per_tag:4; + unsigned assoc:4; + unsigned res:2; + unsigned size_encoded:14; }; unsigned val; }; @@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void) unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + /* Cache sizes */ + unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; @@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { - switch(this_leaf.eax.split.level) { - case 1: + switch (this_leaf.eax.split.level) { + case 1: if (this_leaf.eax.split.type == CACHE_TYPE_DATA) new_l1d = this_leaf.size/1024; @@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) CACHE_TYPE_INST) new_l1i = this_leaf.size/1024; break; - case 2: + case 2: new_l2 = this_leaf.size/1024; num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l2_id = c->apicid >> index_msb; break; - case 3: + case 3: new_l3 = this_leaf.size/1024; num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); + index_msb = get_count_order( + num_threads_sharing); l3_id = c->apicid >> index_msb; break; - default: + default: break; } } @@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; - for ( i = 0 ; i < n ; i++ ) { + for (i = 0 ; i < n ; i++) { cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if (regs[j] & (1 << 31)) regs[j] = 0; - } + for (j = 0 ; j < 3 ; j++) + if (regs[j] & (1 << 31)) + regs[j] = 0; /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { + for (j = 1 ; j < 16 ; j++) { unsigned char des = dp[j]; unsigned char k = 0; /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) - { + while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { if (only_trace && cache_table[k].cache_type != LVL_TRACE) break; @@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } if (trace) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if (l1i) + printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); if (l1d) - printk(", L1 D cache: %dK\n", l1d); + printk(KERN_CONT ", L1 D cache: %dK\n", l1d); else - printk("\n"); + printk(KERN_CONT "\n"); if (l2) printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); @@ -558,8 +559,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) } } #else -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ +} + +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ +} #endif static void __cpuinit free_cache_attributes(unsigned int cpu) @@ -645,7 +651,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); static ssize_t show_##file_name \ (struct _cpuid4_info *this_leaf, char *buf) \ { \ - return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ + return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ } show_one_plus(level, eax.split.level, 0); @@ -656,7 +662,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) { - return sprintf (buf, "%luK\n", this_leaf->size / 1024); + return sprintf(buf, "%luK\n", this_leaf->size / 1024); } static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, @@ -669,7 +675,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, const struct cpumask *mask; mask = to_cpumask(this_leaf->shared_cpu_map); - n = type? + n = type ? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; @@ -800,7 +806,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); -static struct attribute * default_attrs[] = { +static struct attribute *default_attrs[] = { &type.attr, &level.attr, &coherency_line_size.attr, @@ -815,7 +821,7 @@ static struct attribute * default_attrs[] = { NULL }; -static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct _cache_attr *fattr = to_attr(attr); struct _index_kobject *this_leaf = to_object(kobj); @@ -828,8 +834,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) return ret; } -static ssize_t store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) { struct _cache_attr *fattr = to_attr(attr); struct _index_kobject *this_leaf = to_object(kobj); @@ -883,7 +889,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) goto err_out; per_cpu(index_kobject, cpu) = kzalloc( - sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); + sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); if (unlikely(per_cpu(index_kobject, cpu) == NULL)) goto err_out; @@ -917,7 +923,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } for (i = 0; i < num_cache_leaves; i++) { - this_object = INDEX_KOBJECT_PTR(cpu,i); + this_object = INDEX_KOBJECT_PTR(cpu, i); this_object->cpu = cpu; this_object->index = i; retval = kobject_init_and_add(&(this_object->kobj), @@ -925,9 +931,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) per_cpu(cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { - for (j = 0; j < i; j++) { - kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); - } + for (j = 0; j < i; j++) + kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); return retval; @@ -952,7 +957,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) - kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); } @@ -977,8 +982,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = -{ +static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { .notifier_call = cacheinfo_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 5c481f6..8100a29 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) /* returns the bit offset of the performance counter register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - return (msr - MSR_K7_PERFCTR0); + return msr - MSR_K7_PERFCTR0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_PERFCTR0); + return msr - MSR_ARCH_PERFMON_PERFCTR0; switch (boot_cpu_data.x86) { case 6: - return (msr - MSR_P6_PERFCTR0); + return msr - MSR_P6_PERFCTR0; case 15: - return (msr - MSR_P4_BPU_PERFCTR0); + return msr - MSR_P4_BPU_PERFCTR0; } } return 0; @@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) /* returns the bit offset of the event selection register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - return (msr - MSR_K7_EVNTSEL0); + return msr - MSR_K7_EVNTSEL0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + return msr - MSR_ARCH_PERFMON_EVENTSEL0; switch (boot_cpu_data.x86) { case 6: - return (msr - MSR_P6_EVNTSEL0); + return msr - MSR_P6_EVNTSEL0; case 15: - return (msr - MSR_P4_BSU_ESCR0); + return msr - MSR_P4_BSU_ESCR0; } } return 0; @@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) { BUG_ON(counter > NMI_MAX_COUNTER_BITS); - return (!test_bit(counter, perfctr_nmi_owner)); + return !test_bit(counter, perfctr_nmi_owner); } /* checks the an msr for availability */ @@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr) counter = nmi_perfctr_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - return (!test_bit(counter, perfctr_nmi_owner)); + return !test_bit(counter, perfctr_nmi_owner); } EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); @@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz) */ counter_val = (u64)cpu_khz * 1000; do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { + if (counter_val > 0x7fffffffULL) { u64 count = (u64)cpu_khz * 1000; do_div(count, 0x7fffffffUL); retval = count + 1; @@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, u64 count = (u64)cpu_khz * 1000; do_div(count, nmi_hz); - if(descr) + if (descr) pr_debug("setting %s to -0x%08Lx\n", descr, count); wrmsrl(perfctr_msr, 0 - count); } @@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, u64 count = (u64)cpu_khz * 1000; do_div(count, nmi_hz); - if(descr) + if (descr) pr_debug("setting %s to -0x%08Lx\n", descr, count); wrmsr(perfctr_msr, (u32)(-count), 0); } @@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; @@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; @@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) apic_write(APIC_LVTPC, APIC_DM_NMI); /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); + write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); } static const struct wd_ops p6_wd_ops = { @@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz) if (smp_num_siblings == 2) { unsigned int ebx, apicid; - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; } else #endif ht_num = 0; @@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz) } evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS + | P4_ESCR_OS | P4_ESCR_USR; cccr_val |= P4_CCCR_THRESHOLD(15) @@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { unsigned dummy; /* - * P4 quirks: + * P4 quirks: * - An overflown perfctr will assert its interrupt * until the OVF flag in its CCCR is cleared. * - LVTPC is masked on interrupt and must be @@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) * NOTE: Corresponding bit = 0 in ebx indicates event present. */ cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + if ((eax.split.mask_length < + (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) return 0; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d5e3039..1e90434 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -128,7 +128,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (i < ARRAY_SIZE(x86_power_flags) && x86_power_flags[i]) seq_printf(m, "%s%s", - x86_power_flags[i][0]?" ":"", + x86_power_flags[i][0] ? " " : "", x86_power_flags[i]); else seq_printf(m, " [%d]", i); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 284c399..bc24f51 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -49,17 +49,17 @@ static inline int __vmware_platform(void) static unsigned long __vmware_get_tsc_khz(void) { - uint64_t tsc_hz; - uint32_t eax, ebx, ecx, edx; + uint64_t tsc_hz; + uint32_t eax, ebx, ecx, edx; - VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); + VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); - if (ebx == UINT_MAX) - return 0; - tsc_hz = eax | (((uint64_t)ebx) << 32); - do_div(tsc_hz, 1000); - BUG_ON(tsc_hz >> 32); - return tsc_hz; + if (ebx == UINT_MAX) + return 0; + tsc_hz = eax | (((uint64_t)ebx) << 32); + do_div(tsc_hz, 1000); + BUG_ON(tsc_hz >> 32); + return tsc_hz; } /* -- cgit v1.1 From 8045a4c293d36c61656a20d581b11f7f0cd7acd5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Jul 2009 19:30:25 +0200 Subject: x86/oprofile: Fix cast of counter value When casting the counter value to a 64 bit value in 32 bit mode, sign extension may lead to broken counter values. This patch fixes this by casting to (u64) instead of (s64). Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 4 ++-- arch/x86/oprofile/op_model_p4.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index e95268e..7ca8306 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -111,7 +111,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; wrmsrl(msrs->counters[i].addr, - -(s64)counter_config[i].count); + -(u64)counter_config[i].count); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; val |= op_x86_get_ctrl(model, &counter_config[i]); @@ -237,7 +237,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, if (val & OP_CTR_OVERFLOW) continue; oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]); } op_amd_handle_ibs(regs, msrs); diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index f01e53b..9db9e36 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -580,7 +580,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, - -(s64)counter_config[i].count); + -(u64)counter_config[i].count); } else { reset_value[i] = 0; } @@ -625,11 +625,11 @@ static int p4_check_ctrs(struct pt_regs * const regs, if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); wrmsrl(p4_counters[real].counter_address, - -(s64)reset_value[i]); + -(u64)reset_value[i]); CCCR_CLEAR_OVF(low); wrmsr(p4_counters[real].cccr_address, low, high); wrmsrl(p4_counters[real].counter_address, - -(s64)reset_value[i]); + -(u64)reset_value[i]); } } -- cgit v1.1 From 44ab9a6b0e909145d42615493952fe986b1ce5c2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 18:33:02 +0200 Subject: x86/oprofile: Rework and simplify nmi_cpu_setup() This patch removes the function nmi_save_registers(). Per-cpu code is now executed only in the function nmi_cpu_setup(). Also, it renames the per-cpu function nmi_restore_registers() to nmi_cpu_restore_registers(). Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 93df76d..25da1e17 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -87,13 +87,6 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) } } -static void nmi_save_registers(void *dummy) -{ - int cpu = smp_processor_id(); - struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - nmi_cpu_save_registers(msrs); -} - static void free_msrs(void) { int i; @@ -137,6 +130,7 @@ static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + nmi_cpu_save_registers(msrs); spin_lock(&oprofilefs_lock); model->setup_ctrs(model, msrs); spin_unlock(&oprofilefs_lock); @@ -182,13 +176,12 @@ static int nmi_setup(void) } } - on_each_cpu(nmi_save_registers, NULL, 1); on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; return 0; } -static void nmi_restore_registers(struct op_msrs *msrs) +static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; @@ -220,7 +213,7 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); - nmi_restore_registers(msrs); + nmi_cpu_restore_registers(msrs); } static void nmi_shutdown(void) -- cgit v1.1 From 6e63ea4b0b14ff5fb8a3ca704fcda7d28b95f079 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 7 Jul 2009 19:25:39 +0200 Subject: x86/oprofile: Whitespaces changes only This patch fixes whitespace changes of code that will be touched in follow-on patches. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 12 ++++++------ arch/x86/oprofile/op_model_amd.c | 12 ++++++------ arch/x86/oprofile/op_model_p4.c | 8 ++++---- arch/x86/oprofile/op_model_ppro.c | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 25da1e17..fca8dc9 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -516,12 +516,12 @@ int __init op_nmi_init(struct oprofile_operations *ops) register_cpu_notifier(&oprofile_cpu_nb); #endif /* default values, can be overwritten by model */ - ops->create_files = nmi_create_files; - ops->setup = nmi_setup; - ops->shutdown = nmi_shutdown; - ops->start = nmi_start; - ops->stop = nmi_stop; - ops->cpu_type = cpu_type; + ops->create_files = nmi_create_files; + ops->setup = nmi_setup; + ops->shutdown = nmi_shutdown; + ops->start = nmi_start; + ops->stop = nmi_stop; + ops->cpu_type = cpu_type; if (model->init) ret = model->init(ops); diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 7ca8306..f676f88 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -91,7 +91,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, int i; /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0; i < NUM_CONTROLS; ++i) { if (unlikely(!msrs->controls[i].addr)) continue; rdmsrl(msrs->controls[i].addr, val); @@ -229,7 +229,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; rdmsrl(msrs->counters[i].addr, val); @@ -250,7 +250,7 @@ static void op_amd_start(struct op_msrs const * const msrs) { u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (reset_value[i]) { rdmsrl(msrs->controls[i].addr, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -270,7 +270,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) * Subtle: stop on all counters to avoid race with setting our * pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; rdmsrl(msrs->controls[i].addr, val); @@ -285,11 +285,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0; i < NUM_CONTROLS; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9db9e36..5921b7f 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -558,7 +558,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, } /* clear the cccrs we will use */ - for (i = 0 ; i < num_counters ; i++) { + for (i = 0; i < num_counters; i++) { if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); @@ -575,7 +575,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model, } /* setup all counters */ - for (i = 0 ; i < num_counters ; ++i) { + for (i = 0; i < num_counters; ++i) { if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); @@ -678,7 +678,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < num_counters ; ++i) { + for (i = 0; i < num_counters; ++i) { if (msrs->counters[i].addr) release_perfctr_nmi(msrs->counters[i].addr); } @@ -687,7 +687,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) * conjunction with the counter registers (hence the starting offset). * This saves a few bits. */ - for (i = num_counters ; i < num_controls ; ++i) { + for (i = num_counters; i < num_controls; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(msrs->controls[i].addr); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index cd72d5c..570d717 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -81,7 +81,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, } /* clear all counters */ - for (i = 0 ; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) { if (unlikely(!msrs->controls[i].addr)) continue; rdmsrl(msrs->controls[i].addr, val); @@ -125,7 +125,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, if (unlikely(!reset_value)) goto out; - for (i = 0 ; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; rdmsrl(msrs->counters[i].addr, val); @@ -188,11 +188,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < num_counters ; ++i) { + for (i = 0; i < num_counters; ++i) { if (msrs->counters[i].addr) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); } - for (i = 0 ; i < num_counters ; ++i) { + for (i = 0; i < num_counters; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } -- cgit v1.1 From 6b2b171a774af256082635b53ac387b1613b7b4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 02:53:50 -0700 Subject: x86/acpi: acpi_parse_madt_ioapic_entries: remove redundant braces We don't put braces around a single statement. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/acpi/boot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6b8ca3a..ce31c1a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1179,9 +1179,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) * If MPS is present, it will handle them, * otherwise the system will stay in PIC mode */ - if (acpi_disabled || acpi_noirq) { + if (acpi_disabled || acpi_noirq) return -ENODEV; - } if (!cpu_has_apic) return -ENODEV; -- cgit v1.1 From 2f210deba9887dd9143b63b217506f1ac152e91c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 02:55:22 -0700 Subject: x86/ioapic.c: ioapic_modify_irq is too large to inline If ioapic_modify_irq() is marked inline, it gets inlined several times. Un-inlining it saves around 200 bytes in .text for me. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 90b5e6e..82271eb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -556,9 +556,9 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } -static inline void io_apic_modify_irq(struct irq_cfg *cfg, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) +static void io_apic_modify_irq(struct irq_cfg *cfg, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) { int pin; struct irq_pin_list *entry; -- cgit v1.1 From 890aeacf64c55a7ada7054a140d249ab13899f2d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 02:57:43 -0700 Subject: x86/ioapic.c: unify __mask_IO_APIC_irq() The main difference between 32 and 64-bit __mask_IO_APIC_irq() does a readback from the I/O APIC to synchronize it. If there's a hardware requirement to do a readback sync after updating an APIC register, then it will be a hardware requrement regardless of whether the kernel is compiled 32 or 64-bit. Unify __mask_IO_APIC_irq() using the 64-bit version which always syncs with io_apic_sync(). Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 82271eb..f8aa546 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -580,7 +580,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } -#ifdef CONFIG_X86_64 static void io_apic_sync(struct irq_pin_list *entry) { /* @@ -596,12 +595,8 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -#else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); -} +#ifdef CONFIG_X86_32 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, -- cgit v1.1 From 916a0fe739f151664f7f07b42543ae6fd4caec49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:00:22 -0700 Subject: x86/ioapic.c: remove #ifdef for 82093AA workaround While no 64-bit hardware will have a version 0x11 I/O APIC which needs the level/edge bug workaround, that's not a particular reason to use CONFIG_X86_32 to #ifdef the code out. Most 32-bit machines will no longer need the workaround either, so the test to see whether it is necessary should be more fine-grained than "32-bit=yes, 64-bit=no". (Also fix formatting of block comment.) Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 47 +++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f8aa546..1a34144 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -596,7 +596,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -#ifdef CONFIG_X86_32 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, @@ -608,7 +607,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } -#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { @@ -2510,11 +2508,8 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - -#ifdef CONFIG_X86_32 unsigned long v; int i; -#endif struct irq_cfg *cfg; int do_unmask_irq = 0; @@ -2527,31 +2522,28 @@ static void ack_apic_level(unsigned int irq) } #endif -#ifdef CONFIG_X86_32 /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ cfg = desc->chip_data; i = cfg->vector; - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); -#endif /* * We must acknowledge the irq before we move it or the acknowledge will @@ -2593,7 +2585,7 @@ static void ack_apic_level(unsigned int irq) unmask_IO_APIC_irq_desc(desc); } -#ifdef CONFIG_X86_32 + /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -2601,7 +2593,6 @@ static void ack_apic_level(unsigned int irq) __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } -#endif } #ifdef CONFIG_INTR_REMAP -- cgit v1.1 From 83c21bedf63ce92a2dd82ae2c7a96179b0aa4372 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:13:04 -0700 Subject: x86/ioapic.c: remove redundant declaration of irq_pin_list The structure is defined immediately below, so there's no need to forward declare it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1a34144..ec52e0c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -116,8 +116,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -struct irq_pin_list; - /* * This is performance-critical, we want to do it O(1) * -- cgit v1.1 From 8e13d697febc1ba17e70ed88789255c8bc25aa41 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:14:59 -0700 Subject: x86/ioapic.c: move lost comment to what seems like appropriate place The comment got separated from its subject, so move it to what appears to be the right place, and update to describe the current structure. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ec52e0c..a097a77 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -116,13 +116,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - struct irq_pin_list { int apic, pin; struct irq_pin_list *next; @@ -137,6 +130,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ struct irq_cfg { struct irq_pin_list *irq_2_pin; cpumask_var_t domain; -- cgit v1.1 From d8c52063ed85dda61b70bc05b90711478db5dc17 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:17:58 -0700 Subject: x86/ioapic.c: convert io_apic_level_ack_pending loop to normal for() loop Convert the unconventional loop in io_apic_level_ack_pending() to a conventional for() loop. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a097a77..0d04018 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -410,13 +410,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; int pin; - if (!entry) - break; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ @@ -424,9 +421,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) spin_unlock_irqrestore(&ioapic_lock, flags); return true; } - if (!entry->next) - break; - entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); -- cgit v1.1 From 875e68ec32fc5495f3edf987aaae1c52306184b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:24:11 -0700 Subject: x86/ioapic.c: simplify add_pin_to_irq_node() Rather than duplicating the same alloc/init code twice, restructure the function to look for duplicates and then add an entry if none is found. This function is not performance critical; all but one of its callers are __init functions, and the non-__init caller is for PCI device setup. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0d04018..d9e8f19 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -490,34 +490,22 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list *entry; + struct irq_pin_list **entryp, *entry; - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(node); - if (!entry) { - printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", - apic, pin); - return; - } - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - return; - } - - while (entry->next) { + for (entryp = &cfg->irq_2_pin; + *entryp != NULL; + entryp = &(*entryp)->next) { + entry = *entryp; /* not again, please */ if (entry->apic == apic && entry->pin == pin) return; - - entry = entry->next; } - entry->next = get_one_free_irq_2_pin(node); - entry = entry->next; + entry = get_one_free_irq_2_pin(node); entry->apic = apic; entry->pin = pin; + + *entryp = entry; } /* -- cgit v1.1 From 535b64291a9d1ff8bc54642494a5fce27e1e1170 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:29:26 -0700 Subject: x86/ioapic.c: convert replace_pin_at_irq_node to conventional for() loop Use a conventional for() loop in replace_pin_at_irq_node(). Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d9e8f19..9386976 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -515,10 +515,10 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = cfg->irq_2_pin; + struct irq_pin_list *entry; int replaced = 0; - while (entry) { + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; @@ -526,7 +526,6 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, /* every one is different, right? */ break; } - entry = entry->next; } /* why? call replace before add? */ -- cgit v1.1 From 4eea6fff612f54380dd642b045bf03ac0613fe3e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:32:15 -0700 Subject: x86/ioapic.c: clean up replace_pin_at_irq_node logic and comments There's no need for a control variable in replace_pin_at_irq_node(); it can just return if it finds the old apic/pin to replace. If the loop terminates, then it didn't find the old apic/pin, so it can add the new ones. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9386976..8245e62 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -512,25 +512,22 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin * Reroute an IRQ to a different pin. */ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, - int oldapic, int oldpin, - int newapic, int newpin) + int oldapic, int oldpin, + int newapic, int newpin) { struct irq_pin_list *entry; - int replaced = 0; for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - replaced = 1; /* every one is different, right? */ - break; + return; } } - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq_node(cfg, node, newapic, newpin); + /* old apic/pin didn't exist, so just add new ones */ + add_pin_to_irq_node(cfg, node, newapic, newpin); } static void io_apic_modify_irq(struct irq_cfg *cfg, -- cgit v1.1 From 638f2f8c52a92c15ebda9e50d84c1ab56fc42e42 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:37:52 -0700 Subject: x86/ioapic.c: convert __target_IO_APIC_irq to conventional for() loop Use a normal for() loop in __target_IO_APIC_irq(). Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8245e62..17883cd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2236,13 +2236,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq struct irq_pin_list *entry; u8 vector = cfg->vector; - entry = cfg->irq_2_pin; - for (;;) { + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; - if (!entry) - break; - apic = entry->apic; pin = entry->pin; /* @@ -2255,9 +2251,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; } } -- cgit v1.1 From e25371d60cb06a44d7a32d7966ab9bfbeacb9390 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 8 Jun 2009 03:49:01 -0700 Subject: x86/ioapic.c: unify ioapic_retrigger_irq() The 32 and 64-bit versions of ioapic_retrigger_irq() are identical except the 64-bit one takes vector_lock. vector_lock is defined and used on 32-bit too, so just use a common ioapic_retrigger_irq(). Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/apic/io_apic.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 17883cd..cf51b0b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2178,7 +2178,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } -#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { @@ -2191,14 +2190,6 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - apic->send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif /* * Level and edge triggered IO-APIC interrupts need different handling, -- cgit v1.1 From 254e0a6bff87ab8b22293c4bd1443507df698407 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 19 Jul 2009 00:08:54 +0900 Subject: x86: Use get_desc_base() Use get_desc_base() to get the base address in desc_struct Signed-off-by: Akinobu Mita LKML-Reference: <20090718150853.GA11294@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/doublefault_32.c | 4 +--- arch/x86/kernel/step.c | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index b4f14c6..37250fe 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -27,9 +27,7 @@ static void doublefault_fn(void) if (ptr_ok(gdt)) { gdt += GDT_ENTRY_TSS << 3; - tss = *(u16 *)(gdt+2); - tss += *(u8 *)(gdt+4) << 16; - tss += *(u8 *)(gdt+7) << 24; + tss = get_desc_base((struct desc_struct *)gdt); printk(KERN_EMERG "double fault, tss at %08lx\n", tss); if (ptr_ok(tss)) { diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index e8b9863..3149032 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -4,6 +4,7 @@ #include #include #include +#include unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re * and APM bios ones we just ignore here. */ if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { - u32 *desc; + struct desc_struct *desc; unsigned long base; seg &= ~7UL; @@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re addr = -1L; /* bogus selector, access would fault */ else { desc = child->mm->context.ldt + seg; - base = ((desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000)); + base = get_desc_base(desc); /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) + if (!desc->d) addr &= 0xffff; addr += base; } -- cgit v1.1 From fde0312d01b60a3fd5dc56e69a9613defbbc7097 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 19 Jul 2009 00:09:56 +0900 Subject: x86: Remove unused patch_espfix_desc() patch_espfix_desc() is not used after commit dc4c2a0aed3b09f6e255bd5c3faa50fe6e0b2ded Signed-off-by: Akinobu Mita LKML-Reference: <20090718150955.GB11294@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 4 +--- arch/x86/kernel/traps.c | 21 --------------------- 2 files changed, 1 insertion(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bfd74c0..4da91ad 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -81,9 +81,7 @@ extern int panic_on_unrecovered_nmi; void math_error(void __user *); void math_emulate(struct math_emu_info *); -#ifdef CONFIG_X86_32 -unsigned long patch_espfix_desc(unsigned long, unsigned long); -#else +#ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5204332..2367941 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -786,27 +786,6 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) #endif } -#ifdef CONFIG_X86_32 -unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) -{ - struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); - unsigned long base = (kesp - uesp) & -THREAD_SIZE; - unsigned long new_kesp = kesp - base; - unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; - __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; - - /* Set up base for espfix segment */ - desc &= 0x00f0ff0000000000ULL; - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)base) << 32) & 0xff00000000000000ULL) | - ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | - (lim_pages & 0xffff); - *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; - - return new_kesp; -} -#endif - asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } -- cgit v1.1 From 57594742a2b545f8f114cda34f15650be8ae976d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 19 Jul 2009 00:11:06 +0900 Subject: x86: Introduce set_desc_base() and set_desc_limit() Rename set_base()/set_limit to set_desc_base()/set_desc_limit() and rewrite them in C. These are naturally introduced by the idea of get_desc_base()/get_desc_limit(). The conversion actually found the bug in apm_32.c: bad_bios_desc is written at run-time, but it is defined const variable. Signed-off-by: Akinobu Mita LKML-Reference: <20090718151105.GC11294@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 13 +++++++++++++ arch/x86/include/asm/stackprotector.h | 4 +--- arch/x86/include/asm/system.h | 27 --------------------------- arch/x86/kernel/apm_32.c | 18 +++++++++--------- 4 files changed, 23 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c993e9e..e8de2f6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -291,11 +291,24 @@ static inline unsigned long get_desc_base(const struct desc_struct *desc) return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); } +static inline void set_desc_base(struct desc_struct *desc, unsigned long base) +{ + desc->base0 = base & 0xffff; + desc->base1 = (base >> 16) & 0xff; + desc->base2 = (base >> 24) & 0xff; +} + static inline unsigned long get_desc_limit(const struct desc_struct *desc) { return desc->limit0 | (desc->limit << 16); } +static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) +{ + desc->limit0 = limit & 0xffff; + desc->limit = (limit >> 16) & 0xf; +} + static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c2d742c..cdc5e0b 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -90,9 +90,7 @@ static inline void setup_stack_canary_segment(int cpu) struct desc_struct desc; desc = gdt_table[GDT_ENTRY_STACK_CANARY]; - desc.base0 = canary & 0xffff; - desc.base1 = (canary >> 16) & 0xff; - desc.base2 = (canary >> 24) & 0xff; + set_desc_base(&desc, canary); write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); #endif } diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 643c59b..75c49c7 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -150,33 +150,6 @@ do { \ #endif #ifdef __KERNEL__ -#define _set_base(addr, base) do { unsigned long __pr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%2\n\t" \ - "movb %%dh,%3" \ - :"=&d" (__pr) \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "0" (base) \ - ); } while (0) - -#define _set_limit(addr, limit) do { unsigned long __lr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %2,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%2" \ - :"=&d" (__lr) \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "0" (limit) \ - ); } while (0) - -#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) -#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) extern void native_load_gs_index(unsigned); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9..b5e841b 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); -static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; +static struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; static const char driver_version[] = "1.16ac"; /* no spaces */ @@ -2337,8 +2337,8 @@ static int __init apm_init(void) * This is for buggy BIOS's that refer to (real mode) segment 0x40 * even though they are called in protected mode. */ - set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); - _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + set_desc_base(&bad_bios_desc, (unsigned long)__va(0x40UL << 4)); + set_desc_limit(&bad_bios_desc, 4095 - (0x40 << 4)); /* * Set up the long jump entry point to the APM BIOS, which is called @@ -2358,12 +2358,12 @@ static int __init apm_init(void) * code to that CPU. */ gdt = get_cpu_gdt_table(0); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); + set_desc_base(&gdt[APM_CS >> 3], + (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); + set_desc_base(&gdt[APM_CS_16 >> 3], + (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_desc_base(&gdt[APM_DS >> 3], + (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); proc_create("apm", 0, NULL, &apm_file_ops); -- cgit v1.1 From 4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a Mon Sep 17 00:00:00 2001 From: Jason Yeh Date: Wed, 8 Jul 2009 13:49:38 +0200 Subject: oprofile: Implement performance counter multiplexing The number of hardware counters is limited. The multiplexing feature enables OProfile to gather more events than counters are provided by the hardware. This is realized by switching between events at an user specified time interval. A new file (/dev/oprofile/time_slice) is added for the user to specify the timer interval in ms. If the number of events to profile is higher than the number of hardware counters available, the patch will schedule a work queue that switches the event counter and re-writes the different sets of values into it. The switching mechanism needs to be implemented for each architecture to support multiplexing. This patch only implements AMD CPU support, but multiplexing can be easily extended for other models and architectures. There are follow-on patches that rework parts of this patch. Signed-off-by: Jason Yeh Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 162 ++++++++++++++++++++++++++++++++++++-- arch/x86/oprofile/op_counter.h | 2 +- arch/x86/oprofile/op_model_amd.c | 110 ++++++++++++++++++++++---- arch/x86/oprofile/op_model_p4.c | 4 + arch/x86/oprofile/op_model_ppro.c | 2 + arch/x86/oprofile/op_x86_model.h | 7 ++ 6 files changed, 267 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index fca8dc9..e54f6a0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -1,11 +1,14 @@ /** * @file nmi_int.c * - * @remark Copyright 2002-2008 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Robert Richter + * @author Barry Kasindorf + * @author Jason Yeh + * @author Suravee Suthikulpanit */ #include @@ -24,6 +27,12 @@ #include "op_counter.h" #include "op_x86_model.h" + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +DEFINE_PER_CPU(int, switch_index); +#endif + + static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); @@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +extern atomic_t multiplex_counter; +#endif + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + /* common functions */ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, @@ -95,6 +111,11 @@ static void free_msrs(void) per_cpu(cpu_msrs, i).counters = NULL; kfree(per_cpu(cpu_msrs, i).controls); per_cpu(cpu_msrs, i).controls = NULL; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + kfree(per_cpu(cpu_msrs, i).multiplex); + per_cpu(cpu_msrs, i).multiplex = NULL; +#endif } } @@ -103,6 +124,9 @@ static int allocate_msrs(void) int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters; +#endif int i; for_each_possible_cpu(i) { @@ -118,6 +142,14 @@ static int allocate_msrs(void) success = 0; break; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + per_cpu(cpu_msrs, i).multiplex = + kmalloc(multiplex_size, GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).multiplex) { + success = 0; + break; + } +#endif } if (!success) @@ -126,6 +158,25 @@ static int allocate_msrs(void) return success; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_setup_cpu_mux(struct op_msrs const * const msrs) +{ + int i; + struct op_msr *multiplex = msrs->multiplex; + + for (i = 0; i < model->num_virt_counters; ++i) { + if (counter_config[i].enabled) { + multiplex[i].saved = -(u64)counter_config[i].count; + } else { + multiplex[i].addr = 0; + multiplex[i].saved = 0; + } + } +} + +#endif + static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); @@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy) nmi_cpu_save_registers(msrs); spin_lock(&oprofilefs_lock); model->setup_ctrs(model, msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + nmi_setup_cpu_mux(msrs); +#endif spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); @@ -173,14 +227,52 @@ static int nmi_setup(void) memcpy(per_cpu(cpu_msrs, cpu).controls, per_cpu(cpu_msrs, 0).controls, sizeof(struct op_msr) * model->num_controls); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + memcpy(per_cpu(cpu_msrs, cpu).multiplex, + per_cpu(cpu_msrs, 0).multiplex, + sizeof(struct op_msr) * model->num_virt_counters); +#endif } - } on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; return 0; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + struct op_msr *multiplex = msrs->multiplex; + unsigned int i; + + for (i = 0; i < model->num_counters; ++i) { + int offset = i + si; + if (multiplex[offset].addr) { + rdmsrl(multiplex[offset].addr, + multiplex[offset].saved); + } + } +} + +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + struct op_msr *multiplex = msrs->multiplex; + unsigned int i; + + for (i = 0; i < model->num_counters; ++i) { + int offset = i + si; + if (multiplex[offset].addr) { + wrmsrl(multiplex[offset].addr, + multiplex[offset].saved); + } + } +} + +#endif + static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + __get_cpu_var(switch_index) = 0; +#endif } static void nmi_shutdown(void) @@ -252,16 +347,15 @@ static void nmi_stop(void) on_each_cpu(nmi_cpu_stop, NULL, 1); } -struct op_counter_config counter_config[OP_MAX_COUNTER]; - static int nmi_create_files(struct super_block *sb, struct dentry *root) { unsigned int i; - for (i = 0; i < model->num_counters; ++i) { + for (i = 0; i < model->num_virt_counters; ++i) { struct dentry *dir; char buf[4]; +#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX /* quick little hack to _not_ expose a counter if it is not * available for use. This should protect userspace app. * NOTE: assumes 1:1 mapping here (that counters are organized @@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) */ if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) continue; +#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */ snprintf(buf, sizeof(buf), "%d", i); dir = oprofilefs_mkdir(sb, root, buf); @@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) return 0; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_cpu_switch(void *dummy) +{ + int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_counters; + if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, cpu) = 0; + else + per_cpu(switch_index, cpu) = si; + + model->switch_ctrl(model, msrs); + nmi_cpu_restore_mpx_registers(msrs); + + nmi_cpu_start(NULL); +} + + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (!model->switch_ctrl) + return -ENOSYS; /* not implemented */ + if (nmi_multiplex_on() < 0) + return -EINVAL; /* not necessary */ + + on_each_cpu(nmi_cpu_switch, NULL, 1); + + atomic_inc(&multiplex_counter); + + return 0; +} + +#endif + #ifdef CONFIG_SMP static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, void *data) @@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops) register_cpu_notifier(&oprofile_cpu_nb); #endif /* default values, can be overwritten by model */ +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + __raw_get_cpu_var(switch_index) = 0; +#endif ops->create_files = nmi_create_files; ops->setup = nmi_setup; ops->shutdown = nmi_shutdown; ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + ops->switch_events = nmi_switch_event; +#endif if (model->init) ret = model->init(ops); diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a11..e28398d 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,7 +10,7 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f676f88..fdbed3a 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -9,12 +9,15 @@ * @author Philippe Elie * @author Graydon Hoare * @author Robert Richter - * @author Barry Kasindorf + * @author Barry Kasindorf + * @author Jason Yeh + * @author Suravee Suthikulpanit */ #include #include #include +#include #include #include @@ -25,12 +28,23 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +#define NUM_VIRT_COUNTERS 32 +#define NUM_VIRT_CONTROLS 32 +#else +#define NUM_VIRT_COUNTERS NUM_COUNTERS +#define NUM_VIRT_CONTROLS NUM_CONTROLS +#endif + #define OP_EVENT_MASK 0x0FFF #define OP_CTR_OVERFLOW (1ULL<<31) #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) -static unsigned long reset_value[NUM_COUNTERS]; +static unsigned long reset_value[NUM_VIRT_COUNTERS]; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +DECLARE_PER_CPU(int, switch_index); +#endif #ifdef CONFIG_OPROFILE_IBS @@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) else msrs->controls[i].addr = 0; } + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + for (i = 0; i < NUM_VIRT_COUNTERS; i++) { + int hw_counter = i % NUM_CONTROLS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; + else + msrs->multiplex[i].addr = 0; + } +#endif } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, @@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; + /* setup reset_value */ + for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + } else { + reset_value[i] = 0; + } + } + /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { if (unlikely(!msrs->controls[i].addr)) @@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled && msrs->counters[i].addr) { - reset_value[i] = counter_config[i].count; - wrmsrl(msrs->counters[i].addr, - -(u64)counter_config[i].count); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (counter_config[offset].enabled && msrs->counters[i].addr) { + /* setup counter registers */ + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); + + /* setup control registers */ rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; - val |= op_x86_get_ctrl(model, &counter_config[i]); + val |= op_x86_get_ctrl(model, &counter_config[offset]); + wrmsrl(msrs->controls[i].addr, val); + } + } +} + + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void op_amd_switch_ctrl(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) +{ + u64 val; + int i; + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (counter_config[offset].enabled) { + /* setup control registers */ + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[offset]); wrmsrl(msrs->controls[i].addr, val); - } else { - reset_value[i] = 0; } } } +#endif + + #ifdef CONFIG_OPROFILE_IBS static inline int @@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, int i; for (i = 0; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (!reset_value[offset]) continue; rdmsrl(msrs->counters[i].addr, val); /* bit is clear if overflowed: */ if (val & OP_CTR_OVERFLOW) continue; - oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]); + oprofile_add_sample(regs, offset); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); } op_amd_handle_ibs(regs, msrs); @@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs) { u64 val; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { - if (reset_value[i]) { +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (reset_value[offset]) { rdmsrl(msrs->controls[i].addr, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); @@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs) * pm callback */ for (i = 0; i < NUM_COUNTERS; ++i) { +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) +#else if (!reset_value[i]) +#endif continue; rdmsrl(msrs->controls[i].addr, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0; i < NUM_CONTROLS; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -463,6 +540,8 @@ static void op_amd_exit(void) {} struct op_x86_model_spec const op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_virt_counters = NUM_VIRT_COUNTERS, + .num_virt_controls = NUM_VIRT_CONTROLS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, @@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = { .start = &op_amd_start, .stop = &op_amd_stop, .shutdown = &op_amd_shutdown, +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + .switch_ctrl = &op_amd_switch_ctrl, +#endif }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 5921b7f..65b9237 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -698,6 +698,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, + .num_virt_counters = NUM_COUNTERS_HT2, + .num_virt_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -710,6 +712,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { struct op_x86_model_spec const op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, + .num_virt_counters = NUM_COUNTERS_NON_HT, + .num_virt_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 570d717..098cbca 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -206,6 +206,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = 2, .num_controls = 2, + .num_virt_counters = 2, + .num_virt_controls = 2, .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 5054898..0d07d23 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -23,6 +23,7 @@ struct op_msr { struct op_msrs { struct op_msr *counters; struct op_msr *controls; + struct op_msr *multiplex; }; struct pt_regs; @@ -35,6 +36,8 @@ struct oprofile_operations; struct op_x86_model_spec { unsigned int num_counters; unsigned int num_controls; + unsigned int num_virt_counters; + unsigned int num_virt_controls; u64 reserved; u16 event_mask; int (*init)(struct oprofile_operations *ops); @@ -47,6 +50,10 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + void (*switch_ctrl)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); +#endif }; struct op_counter_config; -- cgit v1.1 From 5e766e3e433fa2d5d2fdfd8e2432804c91393387 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 8 Jul 2009 14:54:17 +0200 Subject: x86/oprofile: Fix usage of NUM_CONTROLS/NUM_COUNTERS macros Use the corresponding macros when iterating over counter and control registers. Since NUM_CONTROLS and NUM_COUNTERS are equal for AMD cpus the fix is more a cosmetical change. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index fdbed3a..dcfd450 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -99,7 +99,7 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX for (i = 0; i < NUM_VIRT_COUNTERS; i++) { - int hw_counter = i % NUM_CONTROLS; + int hw_counter = i % NUM_COUNTERS; if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; else @@ -366,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_CONTROLS; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } -- cgit v1.1 From 82a225283fb0d9438549595d9e6f3ecc42b42ad6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 16:29:34 +0200 Subject: x86/oprofile: Use per_cpu() instead of __get_cpu_var() __get_cpu_var() calls smp_processor_id(). When the cpu id is already known, instead use per_cpu() to avoid generating the id again. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index e54f6a0..8cd4658 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -294,7 +294,7 @@ static void nmi_cpu_shutdown(void *dummy) { unsigned int v; int cpu = smp_processor_id(); - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); /* restoring APIC_LVTPC can trigger an apic error because the delivery * mode and vector nr combination can be illegal. That's by design: on @@ -307,7 +307,7 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - __get_cpu_var(switch_index) = 0; + per_cpu(switch_index, cpu) = 0; #endif } -- cgit v1.1 From 6bfccd099c2841e1c42530f1b6d2553bfa13be3a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 19:23:50 +0200 Subject: x86/oprofile: Fix initialization of switch_index Variable switch_index must be initialized for each cpu. This patch fixes the initialization by moving it to the per-cpu init function nmi_cpu_setup(). Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 8cd4658..b211d33 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -160,7 +160,7 @@ static int allocate_msrs(void) #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -static void nmi_setup_cpu_mux(struct op_msrs const * const msrs) +static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { int i; struct op_msr *multiplex = msrs->multiplex; @@ -173,8 +173,15 @@ static void nmi_setup_cpu_mux(struct op_msrs const * const msrs) multiplex[i].saved = 0; } } + + per_cpu(switch_index, cpu) = 0; } +#else + +static inline void +nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } + #endif static void nmi_cpu_setup(void *dummy) @@ -184,9 +191,7 @@ static void nmi_cpu_setup(void *dummy) nmi_cpu_save_registers(msrs); spin_lock(&oprofilefs_lock); model->setup_ctrs(model, msrs); -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - nmi_setup_cpu_mux(msrs); -#endif + nmi_cpu_setup_mux(cpu, msrs); spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); @@ -662,9 +667,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) register_cpu_notifier(&oprofile_cpu_nb); #endif /* default values, can be overwritten by model */ -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - __raw_get_cpu_var(switch_index) = 0; -#endif ops->create_files = nmi_create_files; ops->setup = nmi_setup; ops->shutdown = nmi_shutdown; -- cgit v1.1 From d8471ad3ab613a1ba7abd3aad46659de39a2871c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Jul 2009 13:04:43 +0200 Subject: oprofile: Introduce op_x86_phys_to_virt() This new function translates physical to virtual counter numbers. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 43 +++++++++++---------- arch/x86/oprofile/op_model_amd.c | 80 ++++++++++++++++------------------------ arch/x86/oprofile/op_x86_model.h | 1 + 3 files changed, 55 insertions(+), 69 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b211d33..02b57b8 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -27,12 +27,6 @@ #include "op_counter.h" #include "op_x86_model.h" - -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -DEFINE_PER_CPU(int, switch_index); -#endif - - static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); @@ -103,6 +97,21 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) } } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static DEFINE_PER_CPU(int, switch_index); + +inline int op_x86_phys_to_virt(int phys) +{ + return __get_cpu_var(switch_index) + phys; +} + +#else + +inline int op_x86_phys_to_virt(int phys) { return phys; } + +#endif + static void free_msrs(void) { int i; @@ -248,31 +257,25 @@ static int nmi_setup(void) static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) { - unsigned int si = __get_cpu_var(switch_index); struct op_msr *multiplex = msrs->multiplex; - unsigned int i; + int i; for (i = 0; i < model->num_counters; ++i) { - int offset = i + si; - if (multiplex[offset].addr) { - rdmsrl(multiplex[offset].addr, - multiplex[offset].saved); - } + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + rdmsrl(multiplex[virt].addr, multiplex[virt].saved); } } static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) { - unsigned int si = __get_cpu_var(switch_index); struct op_msr *multiplex = msrs->multiplex; - unsigned int i; + int i; for (i = 0; i < model->num_counters; ++i) { - int offset = i + si; - if (multiplex[offset].addr) { - wrmsrl(multiplex[offset].addr, - multiplex[offset].saved); - } + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + wrmsrl(multiplex[virt].addr, multiplex[virt].saved); } } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index dcfd450..67f830d 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -42,9 +42,6 @@ #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) static unsigned long reset_value[NUM_VIRT_COUNTERS]; -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -DECLARE_PER_CPU(int, switch_index); -#endif #ifdef CONFIG_OPROFILE_IBS @@ -141,21 +138,20 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - int offset = i + __get_cpu_var(switch_index); -#else - int offset = i; -#endif - if (counter_config[offset].enabled && msrs->counters[i].addr) { - /* setup counter registers */ - wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); - - /* setup control registers */ - rdmsrl(msrs->controls[i].addr, val); - val &= model->reserved; - val |= op_x86_get_ctrl(model, &counter_config[offset]); - wrmsrl(msrs->controls[i].addr, val); - } + int virt = op_x86_phys_to_virt(i); + if (!counter_config[virt].enabled) + continue; + if (!msrs->counters[i].addr) + continue; + + /* setup counter registers */ + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); + + /* setup control registers */ + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); } } @@ -170,14 +166,13 @@ static void op_amd_switch_ctrl(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - int offset = i + __get_cpu_var(switch_index); - if (counter_config[offset].enabled) { - /* setup control registers */ - rdmsrl(msrs->controls[i].addr, val); - val &= model->reserved; - val |= op_x86_get_ctrl(model, &counter_config[offset]); - wrmsrl(msrs->controls[i].addr, val); - } + int virt = op_x86_phys_to_virt(i); + if (!counter_config[virt].enabled) + continue; + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); } } @@ -292,19 +287,15 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, int i; for (i = 0; i < NUM_COUNTERS; ++i) { -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - int offset = i + __get_cpu_var(switch_index); -#else - int offset = i; -#endif - if (!reset_value[offset]) + int virt = op_x86_phys_to_virt(i); + if (!reset_value[virt]) continue; rdmsrl(msrs->counters[i].addr, val); /* bit is clear if overflowed: */ if (val & OP_CTR_OVERFLOW) continue; - oprofile_add_sample(regs, offset); - wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); + oprofile_add_sample(regs, virt); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); } op_amd_handle_ibs(regs, msrs); @@ -319,16 +310,11 @@ static void op_amd_start(struct op_msrs const * const msrs) int i; for (i = 0; i < NUM_COUNTERS; ++i) { -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - int offset = i + __get_cpu_var(switch_index); -#else - int offset = i; -#endif - if (reset_value[offset]) { - rdmsrl(msrs->controls[i].addr, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(msrs->controls[i].addr, val); - } + if (!reset_value[op_x86_phys_to_virt(i)]) + continue; + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } op_amd_start_ibs(); @@ -344,11 +330,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) * pm callback */ for (i = 0; i < NUM_COUNTERS; ++i) { -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) -#else - if (!reset_value[i]) -#endif + if (!reset_value[op_x86_phys_to_virt(i)]) continue; rdmsrl(msrs->controls[i].addr, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 0d07d23..e874dc3 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -60,6 +60,7 @@ struct op_counter_config; extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); +extern int op_x86_phys_to_virt(int phys); extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; -- cgit v1.1 From 7e7478c6bc0e011d2854b21f190cc3a1dba89905 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Jul 2009 13:09:53 +0200 Subject: oprofile: Grouping multiplexing code in op_model_amd.c This patch moves some multiplexing code to the new function op_mux_fill_in_addresses(). Also, the whole multiplexing code is now at a single location. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 75 ++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 67f830d..644980f 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -74,6 +74,45 @@ static struct op_ibs_config ibs_config; #endif +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void op_mux_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; + + for (i = 0; i < NUM_VIRT_COUNTERS; i++) { + int hw_counter = i % NUM_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; + else + msrs->multiplex[i].addr = 0; + } +} + +static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) +{ + u64 val; + int i; + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!counter_config[virt].enabled) + continue; + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); + } +} + +#else + +static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } + +#endif + /* functions for op_amd_spec */ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) @@ -94,15 +133,7 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) msrs->controls[i].addr = 0; } -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - for (i = 0; i < NUM_VIRT_COUNTERS; i++) { - int hw_counter = i % NUM_COUNTERS; - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; - else - msrs->multiplex[i].addr = 0; - } -#endif + op_mux_fill_in_addresses(msrs); } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, @@ -155,30 +186,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } - -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - -static void op_amd_switch_ctrl(struct op_x86_model_spec const *model, - struct op_msrs const * const msrs) -{ - u64 val; - int i; - - /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - int virt = op_x86_phys_to_virt(i); - if (!counter_config[virt].enabled) - continue; - rdmsrl(msrs->controls[i].addr, val); - val &= model->reserved; - val |= op_x86_get_ctrl(model, &counter_config[virt]); - wrmsrl(msrs->controls[i].addr, val); - } -} - -#endif - - #ifdef CONFIG_OPROFILE_IBS static inline int @@ -535,6 +542,6 @@ struct op_x86_model_spec const op_amd_spec = { .stop = &op_amd_stop, .shutdown = &op_amd_shutdown, #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - .switch_ctrl = &op_amd_switch_ctrl, + .switch_ctrl = &op_mux_switch_ctrl, #endif }; -- cgit v1.1 From 6ab82f958a5dca591a6ea17a3ca6f2aca06f4f2f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 14:40:04 +0200 Subject: x86/oprofile: Implement multiplexing setup/shutdown functions This patch implements nmi_setup_mux() and nmi_shutdown_mux() functions to setup/shutdown multiplexing. Multiplexing code in nmi_int.c is now much more separated. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 76 ++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 02b57b8..674fa37 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -106,9 +106,35 @@ inline int op_x86_phys_to_virt(int phys) return __get_cpu_var(switch_index) + phys; } +static void nmi_shutdown_mux(void) +{ + int i; + for_each_possible_cpu(i) { + kfree(per_cpu(cpu_msrs, i).multiplex); + per_cpu(cpu_msrs, i).multiplex = NULL; + per_cpu(switch_index, i) = 0; + } +} + +static int nmi_setup_mux(void) +{ + size_t multiplex_size = + sizeof(struct op_msr) * model->num_virt_counters; + int i; + for_each_possible_cpu(i) { + per_cpu(cpu_msrs, i).multiplex = + kmalloc(multiplex_size, GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).multiplex) + return 0; + } + return 1; +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } +static inline void nmi_shutdown_mux(void) { } +static inline int nmi_setup_mux(void) { return 1; } #endif @@ -120,51 +146,27 @@ static void free_msrs(void) per_cpu(cpu_msrs, i).counters = NULL; kfree(per_cpu(cpu_msrs, i).controls); per_cpu(cpu_msrs, i).controls = NULL; - -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - kfree(per_cpu(cpu_msrs, i).multiplex); - per_cpu(cpu_msrs, i).multiplex = NULL; -#endif } } static int allocate_msrs(void) { - int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters; -#endif int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).counters) { - success = 0; - break; - } + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).counters) + return 0; per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).controls) { - success = 0; - break; - } -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - per_cpu(cpu_msrs, i).multiplex = - kmalloc(multiplex_size, GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).multiplex) { - success = 0; - break; - } -#endif + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).controls) + return 0; } - if (!success) - free_msrs(); - - return success; + return 1; } #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX @@ -218,11 +220,15 @@ static int nmi_setup(void) int cpu; if (!allocate_msrs()) - return -ENOMEM; + err = -ENOMEM; + else if (!nmi_setup_mux()) + err = -ENOMEM; + else + err = register_die_notifier(&profile_exceptions_nb); - err = register_die_notifier(&profile_exceptions_nb); if (err) { free_msrs(); + nmi_shutdown_mux(); return err; } @@ -314,9 +320,6 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - per_cpu(switch_index, cpu) = 0; -#endif } static void nmi_shutdown(void) @@ -326,6 +329,7 @@ static void nmi_shutdown(void) nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 1); unregister_die_notifier(&profile_exceptions_nb); + nmi_shutdown_mux(); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); -- cgit v1.1 From 48fb4b46712c7d3e8adc79826311abd9ccbf7f1d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 14:38:49 +0200 Subject: x86/oprofile: Moving nmi_setup_cpu_mux() in nmi_int.c This patch moves some code in nmi_int.c to get a single separate multiplexing code section. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 674fa37..b1edfc9 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -130,11 +130,30 @@ static int nmi_setup_mux(void) return 1; } +static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) +{ + int i; + struct op_msr *multiplex = msrs->multiplex; + + for (i = 0; i < model->num_virt_counters; ++i) { + if (counter_config[i].enabled) { + multiplex[i].saved = -(u64)counter_config[i].count; + } else { + multiplex[i].addr = 0; + multiplex[i].saved = 0; + } + } + + per_cpu(switch_index, cpu) = 0; +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } static inline void nmi_shutdown_mux(void) { } static inline int nmi_setup_mux(void) { return 1; } +static inline void +nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } #endif @@ -169,32 +188,6 @@ static int allocate_msrs(void) return 1; } -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - -static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) -{ - int i; - struct op_msr *multiplex = msrs->multiplex; - - for (i = 0; i < model->num_virt_counters; ++i) { - if (counter_config[i].enabled) { - multiplex[i].saved = -(u64)counter_config[i].count; - } else { - multiplex[i].addr = 0; - multiplex[i].saved = 0; - } - } - - per_cpu(switch_index, cpu) = 0; -} - -#else - -static inline void -nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } - -#endif - static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); -- cgit v1.1 From d0f585dd20010f8479e56b5c6f391ef18e26877e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 14:38:49 +0200 Subject: x86/oprofile: Moving nmi_cpu_save/restore_mpx_registers() in nmi_int.c This patch moves some code in nmi_int.c to get a single separate multiplexing code section. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 52 +++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b1edfc9..f38c5cf 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -147,6 +147,30 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) per_cpu(switch_index, cpu) = 0; } +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + rdmsrl(multiplex[virt].addr, multiplex[virt].saved); + } +} + +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + wrmsrl(multiplex[virt].addr, multiplex[virt].saved); + } +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } @@ -252,34 +276,6 @@ static int nmi_setup(void) return 0; } -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - -static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) -{ - struct op_msr *multiplex = msrs->multiplex; - int i; - - for (i = 0; i < model->num_counters; ++i) { - int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - rdmsrl(multiplex[virt].addr, multiplex[virt].saved); - } -} - -static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) -{ - struct op_msr *multiplex = msrs->multiplex; - int i; - - for (i = 0; i < model->num_counters; ++i) { - int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - wrmsrl(multiplex[virt].addr, multiplex[virt].saved); - } -} - -#endif - static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; -- cgit v1.1 From b28d1b923ab52d535c0719155dccf3b3d98bab9f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 14:38:49 +0200 Subject: x86/oprofile: Moving nmi_cpu_switch() in nmi_int.c This patch moves some code in nmi_int.c to get a single separate multiplexing code section. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 144 +++++++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 74 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f38c5cf..998c7dc 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -97,6 +97,29 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) } } +static void nmi_cpu_start(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + model->start(msrs); +} + +static int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 1); + return 0; +} + +static void nmi_cpu_stop(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + model->stop(msrs); +} + +static void nmi_stop(void) +{ + on_each_cpu(nmi_cpu_stop, NULL, 1); +} + #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static DEFINE_PER_CPU(int, switch_index); @@ -171,6 +194,53 @@ static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) } } +static void nmi_cpu_switch(void *dummy) +{ + int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_counters; + if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, cpu) = 0; + else + per_cpu(switch_index, cpu) = si; + + model->switch_ctrl(model, msrs); + nmi_cpu_restore_mpx_registers(msrs); + + nmi_cpu_start(NULL); +} + + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (!model->switch_ctrl) + return -ENOSYS; /* not implemented */ + if (nmi_multiplex_on() < 0) + return -EINVAL; /* not necessary */ + + on_each_cpu(nmi_cpu_switch, NULL, 1); + + atomic_inc(&multiplex_counter); + + return 0; +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } @@ -325,29 +395,6 @@ static void nmi_shutdown(void) put_cpu_var(cpu_msrs); } -static void nmi_cpu_start(void *dummy) -{ - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->start(msrs); -} - -static int nmi_start(void) -{ - on_each_cpu(nmi_cpu_start, NULL, 1); - return 0; -} - -static void nmi_cpu_stop(void *dummy) -{ - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->stop(msrs); -} - -static void nmi_stop(void) -{ - on_each_cpu(nmi_cpu_stop, NULL, 1); -} - static int nmi_create_files(struct super_block *sb, struct dentry *root) { unsigned int i; @@ -379,57 +426,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) return 0; } -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - -static void nmi_cpu_switch(void *dummy) -{ - int cpu = smp_processor_id(); - int si = per_cpu(switch_index, cpu); - struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - - nmi_cpu_stop(NULL); - nmi_cpu_save_mpx_registers(msrs); - - /* move to next set */ - si += model->num_counters; - if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) - per_cpu(switch_index, cpu) = 0; - else - per_cpu(switch_index, cpu) = si; - - model->switch_ctrl(model, msrs); - nmi_cpu_restore_mpx_registers(msrs); - - nmi_cpu_start(NULL); -} - - -/* - * Quick check to see if multiplexing is necessary. - * The check should be sufficient since counters are used - * in ordre. - */ -static int nmi_multiplex_on(void) -{ - return counter_config[model->num_counters].count ? 0 : -EINVAL; -} - -static int nmi_switch_event(void) -{ - if (!model->switch_ctrl) - return -ENOSYS; /* not implemented */ - if (nmi_multiplex_on() < 0) - return -EINVAL; /* not necessary */ - - on_each_cpu(nmi_cpu_switch, NULL, 1); - - atomic_inc(&multiplex_counter); - - return 0; -} - -#endif - #ifdef CONFIG_SMP static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, void *data) -- cgit v1.1 From 259a83a8abdb9d2664819ec80ad12ebaeb251e32 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 15:12:35 +0200 Subject: x86/oprofile: Remove const qualifier from struct op_x86_model_spec This patch removes the const qualifier from struct op_x86_model_spec to make model parameters changable. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 4 ++-- arch/x86/oprofile/op_model_amd.c | 2 +- arch/x86/oprofile/op_model_p4.c | 4 ++-- arch/x86/oprofile/op_model_ppro.c | 2 +- arch/x86/oprofile/op_x86_model.h | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 998c7dc..826f391 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -27,7 +27,7 @@ #include "op_counter.h" #include "op_x86_model.h" -static struct op_x86_model_spec const *model; +static struct op_x86_model_spec *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); @@ -542,7 +542,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; - struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */ + struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ if (force_arch_perfmon && cpu_has_arch_perfmon) return 0; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 644980f..39604b4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -526,7 +526,7 @@ static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ -struct op_x86_model_spec const op_amd_spec = { +struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, .num_virt_counters = NUM_VIRT_COUNTERS, diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 65b9237..40df028 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -695,7 +695,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) #ifdef CONFIG_SMP -struct op_x86_model_spec const op_p4_ht2_spec = { +struct op_x86_model_spec op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, .num_virt_counters = NUM_COUNTERS_HT2, @@ -709,7 +709,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = { }; #endif -struct op_x86_model_spec const op_p4_spec = { +struct op_x86_model_spec op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, .num_virt_counters = NUM_COUNTERS_NON_HT, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 098cbca..659f3b6 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -203,7 +203,7 @@ static void ppro_shutdown(struct op_msrs const * const msrs) } -struct op_x86_model_spec const op_ppro_spec = { +struct op_x86_model_spec op_ppro_spec = { .num_counters = 2, .num_controls = 2, .num_virt_counters = 2, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index e874dc3..0c886fa 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -62,10 +62,10 @@ extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern int op_x86_phys_to_virt(int phys); -extern struct op_x86_model_spec const op_ppro_spec; -extern struct op_x86_model_spec const op_p4_spec; -extern struct op_x86_model_spec const op_p4_ht2_spec; -extern struct op_x86_model_spec const op_amd_spec; +extern struct op_x86_model_spec op_ppro_spec; +extern struct op_x86_model_spec op_p4_spec; +extern struct op_x86_model_spec op_p4_ht2_spec; +extern struct op_x86_model_spec op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; #endif /* OP_X86_MODEL_H */ -- cgit v1.1 From 2904a527575344a804fdd82b1f8d09a8731d8d49 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 12:33:41 +0200 Subject: x86/oprofile: Remove unused num_virt_controls from struct op_x86_model_spec The member num_virt_controls of struct op_x86_model_spec is not used. This patch removes it. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 1 - arch/x86/oprofile/op_model_p4.c | 2 -- arch/x86/oprofile/op_model_ppro.c | 1 - arch/x86/oprofile/op_x86_model.h | 1 - 4 files changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39604b4..dce69b5 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -530,7 +530,6 @@ struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, .num_virt_counters = NUM_VIRT_COUNTERS, - .num_virt_controls = NUM_VIRT_CONTROLS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 40df028..0a4f2de 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -699,7 +699,6 @@ struct op_x86_model_spec op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, .num_virt_counters = NUM_COUNTERS_HT2, - .num_virt_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -713,7 +712,6 @@ struct op_x86_model_spec op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, .num_virt_counters = NUM_COUNTERS_NON_HT, - .num_virt_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 659f3b6..753a02a 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -207,7 +207,6 @@ struct op_x86_model_spec op_ppro_spec = { .num_counters = 2, .num_controls = 2, .num_virt_counters = 2, - .num_virt_controls = 2, .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 0c886fa..4e2e7c2 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -37,7 +37,6 @@ struct op_x86_model_spec { unsigned int num_counters; unsigned int num_controls; unsigned int num_virt_counters; - unsigned int num_virt_controls; u64 reserved; u16 event_mask; int (*init)(struct oprofile_operations *ops); -- cgit v1.1 From 52471c67ee2fa5ed6f700ef57bf27833c63b2192 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 6 Jul 2009 14:43:55 +0200 Subject: x86/oprofile: Modify initialization of num_virt_counters Models that do not yet support counter multiplexing have to setup num_virt_counters. This patch implements the setup from num_counters if num_virt_counters is not set. Thus, num_virt_counters must be setup only for multiplexing support. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 3 +++ arch/x86/oprofile/op_model_p4.c | 2 -- arch/x86/oprofile/op_model_ppro.c | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 826f391..82ee2951 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -674,6 +674,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (ret) return ret; + if (!model->num_virt_counters) + model->num_virt_counters = model->num_counters; + init_sysfs(); using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 0a4f2de..ac6b354 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -698,7 +698,6 @@ static void p4_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, - .num_virt_counters = NUM_COUNTERS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -711,7 +710,6 @@ struct op_x86_model_spec op_p4_ht2_spec = { struct op_x86_model_spec op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, - .num_virt_counters = NUM_COUNTERS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 753a02a..4899215 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -206,7 +206,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec op_ppro_spec = { .num_counters = 2, .num_controls = 2, - .num_virt_counters = 2, .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, -- cgit v1.1 From 39e97f40c3a5e71de0532368deaa683e09b74ba2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 15:11:45 +0200 Subject: x86/oprofile: Add function has_mux() to check multiplexing support The check is used to prevent running multiplexing code for models not supporting multiplexing. Before, the code was running but without effect. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 82ee2951..dca7240 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -124,6 +124,11 @@ static void nmi_stop(void) static DEFINE_PER_CPU(int, switch_index); +static inline int has_mux(void) +{ + return !!model->switch_ctrl; +} + inline int op_x86_phys_to_virt(int phys) { return __get_cpu_var(switch_index) + phys; @@ -132,6 +137,10 @@ inline int op_x86_phys_to_virt(int phys) static void nmi_shutdown_mux(void) { int i; + + if (!has_mux()) + return; + for_each_possible_cpu(i) { kfree(per_cpu(cpu_msrs, i).multiplex); per_cpu(cpu_msrs, i).multiplex = NULL; @@ -144,12 +153,17 @@ static int nmi_setup_mux(void) size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters; int i; + + if (!has_mux()) + return 1; + for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).multiplex = kmalloc(multiplex_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).multiplex) return 0; } + return 1; } @@ -158,6 +172,9 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) int i; struct op_msr *multiplex = msrs->multiplex; + if (!has_mux()) + return; + for (i = 0; i < model->num_virt_counters; ++i) { if (counter_config[i].enabled) { multiplex[i].saved = -(u64)counter_config[i].count; @@ -229,7 +246,7 @@ static int nmi_multiplex_on(void) static int nmi_switch_event(void) { - if (!model->switch_ctrl) + if (!has_mux()) return -ENOSYS; /* not implemented */ if (nmi_multiplex_on() < 0) return -EINVAL; /* not necessary */ -- cgit v1.1 From 5280514471c2803776701c43c027038decac1103 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 16:02:44 +0200 Subject: x86/oprofile: Enable multiplexing only if the model supports it This patch checks if the model supports multiplexing. Only then multiplexing will be enabled. The code is added to the common x86 initialization. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index dca7240..f0fb447 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -258,6 +258,12 @@ static int nmi_switch_event(void) return 0; } +static inline void mux_init(struct oprofile_operations *ops) +{ + if (has_mux()) + ops->switch_events = nmi_switch_event; +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } @@ -265,6 +271,7 @@ static inline void nmi_shutdown_mux(void) { } static inline int nmi_setup_mux(void) { return 1; } static inline void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } +static inline void mux_init(struct oprofile_operations *ops) { } #endif @@ -682,9 +689,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - ops->switch_events = nmi_switch_event; -#endif if (model->init) ret = model->init(ops); @@ -694,6 +698,8 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!model->num_virt_counters) model->num_virt_counters = model->num_counters; + mux_init(ops); + init_sysfs(); using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); -- cgit v1.1 From 4d015f79e972cea1761cfee8872b1c0992ccd8b2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 21:42:51 +0200 Subject: x86/oprofile: Implement mux_clone() To setup a counter for all cpus, its structure is cloned from cpu 0. This patch implements mux_clone() to do this part for multiplexing data. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f0fb447..da6d2ab 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -264,6 +264,16 @@ static inline void mux_init(struct oprofile_operations *ops) ops->switch_events = nmi_switch_event; } +static void mux_clone(int cpu) +{ + if (!has_mux()) + return; + + memcpy(per_cpu(cpu_msrs, cpu).multiplex, + per_cpu(cpu_msrs, 0).multiplex, + sizeof(struct op_msr) * model->num_virt_counters); +} + #else inline int op_x86_phys_to_virt(int phys) { return phys; } @@ -272,6 +282,7 @@ static inline int nmi_setup_mux(void) { return 1; } static inline void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } static inline void mux_init(struct oprofile_operations *ops) { } +static void mux_clone(int cpu) { } #endif @@ -350,20 +361,18 @@ static int nmi_setup(void) /* Assume saved/restored counters are the same on all CPUs */ model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); for_each_possible_cpu(cpu) { - if (cpu != 0) { - memcpy(per_cpu(cpu_msrs, cpu).counters, - per_cpu(cpu_msrs, 0).counters, - sizeof(struct op_msr) * model->num_counters); - - memcpy(per_cpu(cpu_msrs, cpu).controls, - per_cpu(cpu_msrs, 0).controls, - sizeof(struct op_msr) * model->num_controls); -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX - memcpy(per_cpu(cpu_msrs, cpu).multiplex, - per_cpu(cpu_msrs, 0).multiplex, - sizeof(struct op_msr) * model->num_virt_counters); -#endif - } + if (!cpu) + continue; + + memcpy(per_cpu(cpu_msrs, cpu).counters, + per_cpu(cpu_msrs, 0).counters, + sizeof(struct op_msr) * model->num_counters); + + memcpy(per_cpu(cpu_msrs, cpu).controls, + per_cpu(cpu_msrs, 0).controls, + sizeof(struct op_msr) * model->num_controls); + + mux_clone(cpu); } on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; -- cgit v1.1 From 1b294f5960cd89e49eeb3e797860c552b03f2272 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 9 Jul 2009 14:56:25 +0200 Subject: oprofile: Adding switch counter to oprofile statistic variables This patch moves the multiplexing switch counter from x86 code to common oprofile statistic variables. Now the value will be available and usable for all architectures. The initialization and incrementation also moved to common code. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index da6d2ab..7b3362f 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -34,11 +34,6 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; - -#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -extern atomic_t multiplex_counter; -#endif - struct op_counter_config counter_config[OP_MAX_COUNTER]; /* common functions */ @@ -253,8 +248,6 @@ static int nmi_switch_event(void) on_each_cpu(nmi_cpu_switch, NULL, 1); - atomic_inc(&multiplex_counter); - return 0; } -- cgit v1.1 From 61d149d5248ad7428801cdede0f5fcc2b90cd61c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 10 Jul 2009 15:47:17 +0200 Subject: x86/oprofile: Implement op_x86_virt_to_phys() This patch implements a common x86 function to convert virtual counter numbers to physical. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 6 ++++++ arch/x86/oprofile/op_model_amd.c | 2 +- arch/x86/oprofile/op_x86_model.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7b3362f..5856e61 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -129,6 +129,11 @@ inline int op_x86_phys_to_virt(int phys) return __get_cpu_var(switch_index) + phys; } +inline int op_x86_virt_to_phys(int virt) +{ + return virt % model->num_counters; +} + static void nmi_shutdown_mux(void) { int i; @@ -270,6 +275,7 @@ static void mux_clone(int cpu) #else inline int op_x86_phys_to_virt(int phys) { return phys; } +inline int op_x86_virt_to_phys(int virt) { return virt; } static inline void nmi_shutdown_mux(void) { } static inline int nmi_setup_mux(void) { return 1; } static inline void diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index dce69b5..1ea1982 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -81,7 +81,7 @@ static void op_mux_fill_in_addresses(struct op_msrs * const msrs) int i; for (i = 0; i < NUM_VIRT_COUNTERS; i++) { - int hw_counter = i % NUM_COUNTERS; + int hw_counter = op_x86_virt_to_phys(i); if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; else diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 4e2e7c2..b837761 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -60,6 +60,7 @@ struct op_counter_config; extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern int op_x86_phys_to_virt(int phys); +extern int op_x86_virt_to_phys(int virt); extern struct op_x86_model_spec op_ppro_spec; extern struct op_x86_model_spec op_p4_spec; -- cgit v1.1 From 11be1a7b54283021777f409aa983ce125945e67c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 10 Jul 2009 18:15:21 +0200 Subject: x86/oprofile: Add counter reservation check for virtual counters This patch adds a check for the availability of a counter. A virtual counter is used only if its physical counter is not reserved. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 5856e61..cb88b1a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -435,15 +435,13 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) struct dentry *dir; char buf[4]; -#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX /* quick little hack to _not_ expose a counter if it is not * available for use. This should protect userspace app. * NOTE: assumes 1:1 mapping here (that counters are organized * sequentially in their struct assignment). */ - if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) continue; -#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */ snprintf(buf, sizeof(buf), "%d", i); dir = oprofilefs_mkdir(sb, root, buf); -- cgit v1.1 From c550091edd6fac2ed9dac1b30d986b6c58b216fa Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 16 Jul 2009 13:11:16 +0200 Subject: x86/oprofile: Small coding style fixes Some small coding style fixes. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 1ea1982..827beec 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -144,11 +144,10 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* setup reset_value */ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { - if (counter_config[i].enabled) { + if (counter_config[i].enabled) reset_value[i] = counter_config[i].count; - } else { + else reset_value[i] = 0; - } } /* clear all counters */ -- cgit v1.1 From 3162534069597e34dd0ac9eb711be8dc23835ae7 Mon Sep 17 00:00:00 2001 From: Joseph Cihula Date: Tue, 30 Jun 2009 19:30:59 -0700 Subject: x86, intel_txt: Intel TXT boot support This patch adds kernel configuration and boot support for Intel Trusted Execution Technology (Intel TXT). Intel's technology for safer computing, Intel Trusted Execution Technology (Intel TXT), defines platform-level enhancements that provide the building blocks for creating trusted platforms. Intel TXT was formerly known by the code name LaGrande Technology (LT). Intel TXT in Brief: o Provides dynamic root of trust for measurement (DRTM) o Data protection in case of improper shutdown o Measurement and verification of launched environment Intel TXT is part of the vPro(TM) brand and is also available some non-vPro systems. It is currently available on desktop systems based on the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell Optiplex 755, HP dc7800, etc.) and mobile systems based on the GM45, PM45, and GS45 Express chipsets. For more information, see http://www.intel.com/technology/security/. This site also has a link to the Intel TXT MLE Developers Manual, which has been updated for the new released platforms. A much more complete description of how these patches support TXT, how to configure a system for it, etc. is in the Documentation/intel_txt.txt file in this patch. This patch provides the TXT support routines for complete functionality, documentation for TXT support and for the changes to the boot_params structure, and boot detection of a TXT launch. Attempts to shutdown (reboot, Sx) the system will result in platform resets; subsequent patches will support these shutdown modes properly. Documentation/intel_txt.txt | 210 +++++++++++++++++++++ Documentation/x86/zero-page.txt | 1 arch/x86/include/asm/bootparam.h | 3 arch/x86/include/asm/fixmap.h | 3 arch/x86/include/asm/tboot.h | 197 ++++++++++++++++++++ arch/x86/kernel/Makefile | 1 arch/x86/kernel/setup.c | 4 arch/x86/kernel/tboot.c | 379 +++++++++++++++++++++++++++++++++++++++ security/Kconfig | 30 +++ 9 files changed, 827 insertions(+), 1 deletion(-) Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam.h | 3 +- arch/x86/include/asm/fixmap.h | 3 + arch/x86/include/asm/tboot.h | 197 ++++++++++++++++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/setup.c | 4 + arch/x86/kernel/tboot.c | 379 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 586 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/tboot.h create mode 100644 arch/x86/kernel/tboot.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 1724e8d..6ca2021 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -85,7 +85,8 @@ struct efi_info { struct boot_params { struct screen_info screen_info; /* 0x000 */ struct apm_bios_info apm_bios_info; /* 0x040 */ - __u8 _pad2[12]; /* 0x054 */ + __u8 _pad2[4]; /* 0x054 */ + __u64 tboot_addr; /* 0x058 */ struct ist_info ist_info; /* 0x060 */ __u8 _pad3[16]; /* 0x070 */ __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 7b2d71d..14f9890 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -132,6 +132,9 @@ enum fixed_addresses { #ifdef CONFIG_X86_32 FIX_WP_TEST, #endif +#ifdef CONFIG_INTEL_TXT + FIX_TBOOT_BASE, +#endif __end_of_fixed_addresses }; diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h new file mode 100644 index 0000000..b13929d --- /dev/null +++ b/arch/x86/include/asm/tboot.h @@ -0,0 +1,197 @@ +/* + * tboot.h: shared data structure with tboot and kernel and functions + * used by kernel for runtime support of Intel(R) Trusted + * Execution Technology + * + * Copyright (c) 2006-2009, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef _ASM_TBOOT_H +#define _ASM_TBOOT_H + +#include + +/* these must have the values from 0-5 in this order */ +enum { + TB_SHUTDOWN_REBOOT = 0, + TB_SHUTDOWN_S5, + TB_SHUTDOWN_S4, + TB_SHUTDOWN_S3, + TB_SHUTDOWN_HALT, + TB_SHUTDOWN_WFS +}; + +#ifdef CONFIG_INTEL_TXT + +/* used to communicate between tboot and the launched kernel */ + +#define TB_KEY_SIZE 64 /* 512 bits */ + +#define MAX_TB_MAC_REGIONS 32 + +struct tboot_mac_region { + u64 start; /* must be 64 byte -aligned */ + u32 size; /* must be 64 byte -granular */ +} __packed; + +/* GAS - Generic Address Structure (ACPI 2.0+) */ +struct tboot_acpi_generic_address { + u8 space_id; + u8 bit_width; + u8 bit_offset; + u8 access_width; + u64 address; +} __packed; + +/* + * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec + * (http://www.acpi.info/) + */ +struct tboot_acpi_sleep_info { + struct tboot_acpi_generic_address pm1a_cnt_blk; + struct tboot_acpi_generic_address pm1b_cnt_blk; + struct tboot_acpi_generic_address pm1a_evt_blk; + struct tboot_acpi_generic_address pm1b_evt_blk; + u16 pm1a_cnt_val; + u16 pm1b_cnt_val; + u64 wakeup_vector; + u32 vector_width; + u64 kernel_s3_resume_vector; +} __packed; + +/* + * shared memory page used for communication between tboot and kernel + */ +struct tboot { + /* + * version 3+ fields: + */ + + /* TBOOT_UUID */ + u8 uuid[16]; + + /* version number: 5 is current */ + u32 version; + + /* physical addr of tb_log_t log */ + u32 log_addr; + + /* + * physical addr of entry point for tboot shutdown and + * type of shutdown (TB_SHUTDOWN_*) being requested + */ + u32 shutdown_entry; + u32 shutdown_type; + + /* kernel-specified ACPI info for Sx shutdown */ + struct tboot_acpi_sleep_info acpi_sinfo; + + /* tboot location in memory (physical) */ + u32 tboot_base; + u32 tboot_size; + + /* memory regions (phys addrs) for tboot to MAC on S3 */ + u8 num_mac_regions; + struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS]; + + + /* + * version 4+ fields: + */ + + /* symmetric key for use by kernel; will be encrypted on S3 */ + u8 s3_key[TB_KEY_SIZE]; + + + /* + * version 5+ fields: + */ + + /* used to 4byte-align num_in_wfs */ + u8 reserved_align[3]; + + /* number of processors in wait-for-SIPI */ + u32 num_in_wfs; +} __packed; + +/* + * UUID for tboot data struct to facilitate matching + * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is + * represented as {} in the char array used here + */ +#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\ + 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8} + +extern struct tboot *tboot; + +static inline int tboot_enabled(void) +{ + return tboot != NULL; +} + +extern void tboot_probe(void); +extern void tboot_create_trampoline(void); +extern void tboot_shutdown(u32 shutdown_type); +extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); +extern int tboot_wait_for_aps(int num_aps); +extern struct acpi_table_header *tboot_get_dmar_table( + struct acpi_table_header *dmar_tbl); +extern int tboot_force_iommu(void); + +#else /* CONFIG_INTEL_TXT */ + +static inline int tboot_enabled(void) +{ + return 0; +} + +static inline void tboot_probe(void) +{ +} + +static inline void tboot_create_trampoline(void) +{ +} + +static inline void tboot_shutdown(u32 shutdown_type) +{ +} + +static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control, + u32 pm1b_control) +{ +} + +static inline int tboot_wait_for_aps(int num_aps) +{ + return 0; +} + +static inline struct acpi_table_header *tboot_get_dmar_table( + struct acpi_table_header *dmar_tbl) +{ + return dmar_tbl; +} + +static inline int tboot_force_iommu(void) +{ + return 0; +} + +#endif /* !CONFIG_INTEL_TXT */ + +#endif /* _ASM_TBOOT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 430d5b2..832cb83 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o +obj-$(CONFIG_INTEL_TXT) += tboot.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index de2cab1..80d6e9e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -145,6 +145,8 @@ struct boot_params __initdata boot_params; struct boot_params boot_params; #endif +#include + /* * Machine setup.. */ @@ -964,6 +966,8 @@ void __init setup_arch(char **cmdline_p) paravirt_pagetable_setup_done(swapper_pg_dir); paravirt_post_allocator_init(); + tboot_probe(); + #ifdef CONFIG_X86_64 map_vsyscall(); #endif diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c new file mode 100644 index 0000000..263591a --- /dev/null +++ b/arch/x86/kernel/tboot.c @@ -0,0 +1,379 @@ +/* + * tboot.c: main implementation of helper functions used by kernel for + * runtime support of Intel(R) Trusted Execution Technology + * + * Copyright (c) 2006-2009, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acpi/realmode/wakeup.h" + +/* Global pointer to shared data; NULL means no measured launch. */ +struct tboot *tboot __read_mostly; + +/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ +#define AP_WAIT_TIMEOUT 1 + +#undef pr_fmt +#define pr_fmt(fmt) "tboot: " fmt + +static u8 tboot_uuid[16] __initdata = TBOOT_UUID; + +void __init tboot_probe(void) +{ + /* Look for valid page-aligned address for shared page. */ + if (!boot_params.tboot_addr) + return; + /* + * also verify that it is mapped as we expect it before calling + * set_fixmap(), to reduce chance of garbage value causing crash + */ + if (!e820_any_mapped(boot_params.tboot_addr, + boot_params.tboot_addr, E820_RESERVED)) { + pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n"); + return; + } + + /* only a natively booted kernel should be using TXT */ + if (paravirt_enabled()) { + pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); + return; + } + + /* Map and check for tboot UUID. */ + set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); + tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); + if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { + pr_warning("tboot at 0x%llx is invalid\n", + boot_params.tboot_addr); + tboot = NULL; + return; + } + if (tboot->version < 5) { + pr_warning("tboot version is invalid: %u\n", tboot->version); + tboot = NULL; + return; + } + + pr_info("found shared page at phys addr 0x%llx:\n", + boot_params.tboot_addr); + pr_debug("version: %d\n", tboot->version); + pr_debug("log_addr: 0x%08x\n", tboot->log_addr); + pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry); + pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base); + pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); +} + +static pgd_t *tboot_pg_dir; +static struct mm_struct tboot_mm = { + .mm_rb = RB_ROOT, + .pgd = swapper_pg_dir, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .cpu_vm_mask = CPU_MASK_ALL, +}; + +static inline void switch_to_tboot_pt(void) +{ + write_cr3(virt_to_phys(tboot_pg_dir)); +} + +static int map_tboot_page(unsigned long vaddr, unsigned long pfn, + pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(&tboot_mm, vaddr); + pud = pud_alloc(&tboot_mm, pgd, vaddr); + if (!pud) + return -1; + pmd = pmd_alloc(&tboot_mm, pud, vaddr); + if (!pmd) + return -1; + pte = pte_alloc_map(&tboot_mm, pmd, vaddr); + if (!pte) + return -1; + set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); + pte_unmap(pte); + return 0; +} + +static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, + unsigned long nr) +{ + /* Reuse the original kernel mapping */ + tboot_pg_dir = pgd_alloc(&tboot_mm); + if (!tboot_pg_dir) + return -1; + + for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { + if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) + return -1; + } + + return 0; +} + +void tboot_create_trampoline(void) +{ + u32 map_base, map_size; + + if (!tboot_enabled()) + return; + + /* Create identity map for tboot shutdown code. */ + map_base = PFN_DOWN(tboot->tboot_base); + map_size = PFN_UP(tboot->tboot_size); + if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) + panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", map_base, map_size); +} + +static void set_mac_regions(void) +{ + tboot->num_mac_regions = 3; + /* S3 resume code */ + tboot->mac_regions[0].start = PFN_PHYS(PFN_DOWN(acpi_wakeup_address)); + tboot->mac_regions[0].size = PFN_UP(WAKEUP_SIZE) << PAGE_SHIFT; + /* AP trampoline code */ + tboot->mac_regions[1].start = + PFN_PHYS(PFN_DOWN(virt_to_phys(trampoline_base))); + tboot->mac_regions[1].size = PFN_UP(TRAMPOLINE_SIZE) << PAGE_SHIFT; + /* kernel code + data + bss */ + tboot->mac_regions[2].start = PFN_PHYS(PFN_DOWN(virt_to_phys(&_text))); + tboot->mac_regions[2].size = PFN_PHYS(PFN_UP(virt_to_phys(&_end))) - + PFN_PHYS(PFN_DOWN(virt_to_phys(&_text))); +} + +void tboot_shutdown(u32 shutdown_type) +{ + void (*shutdown)(void); + + if (!tboot_enabled()) + return; + + /* + * if we're being called before the 1:1 mapping is set up then just + * return and let the normal shutdown happen; this should only be + * due to very early panic() + */ + if (!tboot_pg_dir) + return; + + /* if this is S3 then set regions to MAC */ + if (shutdown_type == TB_SHUTDOWN_S3) + set_mac_regions(); + + tboot->shutdown_type = shutdown_type; + + switch_to_tboot_pt(); + + shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; + shutdown(); + + /* should not reach here */ + while (1) + halt(); +} + +static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) +{ +#define TB_COPY_GAS(tbg, g) \ + tbg.space_id = g.space_id; \ + tbg.bit_width = g.bit_width; \ + tbg.bit_offset = g.bit_offset; \ + tbg.access_width = g.access_width; \ + tbg.address = g.address; + + TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block); + TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block); + TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block); + TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block); + + /* + * We need phys addr of waking vector, but can't use virt_to_phys() on + * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys + * addr. + */ + tboot->acpi_sinfo.wakeup_vector = fadt->facs + + offsetof(struct acpi_table_facs, firmware_waking_vector); +} + +void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) +{ + static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { + /* S0,1,2: */ -1, -1, -1, + /* S3: */ TB_SHUTDOWN_S3, + /* S4: */ TB_SHUTDOWN_S4, + /* S5: */ TB_SHUTDOWN_S5 }; + + if (!tboot_enabled()) + return; + + tboot_copy_fadt(&acpi_gbl_FADT); + tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; + tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; + /* we always use the 32b wakeup vector */ + tboot->acpi_sinfo.vector_width = 32; + tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; + + if (sleep_state >= ACPI_S_STATE_COUNT || + acpi_shutdown_map[sleep_state] == -1) { + pr_warning("unsupported sleep state 0x%x\n", sleep_state); + return; + } + + tboot_shutdown(acpi_shutdown_map[sleep_state]); +} + +int tboot_wait_for_aps(int num_aps) +{ + unsigned long timeout; + + if (!tboot_enabled()) + return 0; + + timeout = jiffies + AP_WAIT_TIMEOUT*HZ; + while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps && + time_before(jiffies, timeout)) + cpu_relax(); + + return time_before(jiffies, timeout) ? 0 : 1; +} + +/* + * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) + */ + +#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 +#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 + +/* # pages for each config regs space - used by fixmap */ +#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ + TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) + +/* offsets from pub/priv config space */ +#define TXTCR_HEAP_BASE 0x0300 +#define TXTCR_HEAP_SIZE 0x0308 + +#define SHA1_SIZE 20 + +struct sha1_hash { + u8 hash[SHA1_SIZE]; +}; + +struct sinit_mle_data { + u32 version; /* currently 6 */ + struct sha1_hash bios_acm_id; + u32 edx_senter_flags; + u64 mseg_valid; + struct sha1_hash sinit_hash; + struct sha1_hash mle_hash; + struct sha1_hash stm_hash; + struct sha1_hash lcp_policy_hash; + u32 lcp_policy_control; + u32 rlp_wakeup_addr; + u32 reserved; + u32 num_mdrs; + u32 mdrs_off; + u32 num_vtd_dmars; + u32 vtd_dmars_off; +} __packed; + +struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl) +{ + void *heap_base, *heap_ptr, *config; + + if (!tboot_enabled()) + return dmar_tbl; + + /* + * ACPI tables may not be DMA protected by tboot, so use DMAR copy + * SINIT saved in SinitMleData in TXT heap (which is DMA protected) + */ + + /* map config space in order to get heap addr */ + config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES * + PAGE_SIZE); + if (!config) + return NULL; + + /* now map TXT heap */ + heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE), + *(u64 *)(config + TXTCR_HEAP_SIZE)); + iounmap(config); + if (!heap_base) + return NULL; + + /* walk heap to SinitMleData */ + /* skip BiosData */ + heap_ptr = heap_base + *(u64 *)heap_base; + /* skip OsMleData */ + heap_ptr += *(u64 *)heap_ptr; + /* skip OsSinitData */ + heap_ptr += *(u64 *)heap_ptr; + /* now points to SinitMleDataSize; set to SinitMleData */ + heap_ptr += sizeof(u64); + /* get addr of DMAR table */ + dmar_tbl = (struct acpi_table_header *)(heap_ptr + + ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off - + sizeof(u64)); + + /* don't unmap heap because dmar.c needs access to this */ + + return dmar_tbl; +} + +int tboot_force_iommu(void) +{ + if (!tboot_enabled()) + return 0; + + if (no_iommu || swiotlb || dmar_disabled) + pr_warning("Forcing Intel-IOMMU to enabled\n"); + + dmar_disabled = 0; +#ifdef CONFIG_SWIOTLB + swiotlb = 0; +#endif + no_iommu = 0; + + return 1; +} -- cgit v1.1 From 840c2baf2d4cdf35ecc3b7fcbba7740f97de30a4 Mon Sep 17 00:00:00 2001 From: Joseph Cihula Date: Tue, 30 Jun 2009 19:31:02 -0700 Subject: x86, intel_txt: Intel TXT reboot/halt shutdown support Support for graceful handling of kernel reboots after an Intel(R) TXT launch. Without this patch, attempting to reboot or halt the system will cause the TXT hardware to lock memory upon system restart because the secrets-in-memory flag that was set on launch was never cleared. This will in turn cause BIOS to execute a TXT Authenticated Code Module (ACM) that will scrub all of memory and then unlock it. Depending on the amount of memory in the system and its type, this may take some time. This patch creates a 1:1 address mapping to the tboot module and then calls back into tboot so that it may properly and securely clean up system state and clear the secrets-in-memory flag. When it has completed these steps, the tboot module will reboot or halt the system. arch/x86/kernel/reboot.c | 8 ++++++++ init/main.c | 3 +++ 2 files changed, 11 insertions(+) Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d2d1ce8..9de01c5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,6 +24,8 @@ # include #endif +#include + /* * Power off function, if any */ @@ -460,6 +462,8 @@ static void native_machine_emergency_restart(void) if (reboot_emergency) emergency_vmx_disable_all(); + tboot_shutdown(TB_SHUTDOWN_REBOOT); + /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; @@ -586,6 +590,8 @@ static void native_machine_halt(void) /* stop other cpus and apics */ machine_shutdown(); + tboot_shutdown(TB_SHUTDOWN_HALT); + /* stop this cpu */ stop_this_cpu(NULL); } @@ -597,6 +603,8 @@ static void native_machine_power_off(void) machine_shutdown(); pm_power_off(); } + /* a fallback in case there is no PM info available */ + tboot_shutdown(TB_SHUTDOWN_HALT); } struct machine_ops machine_ops = { -- cgit v1.1 From 86886e55b273f565935491816c7c96b82469d4f8 Mon Sep 17 00:00:00 2001 From: Joseph Cihula Date: Tue, 30 Jun 2009 19:31:07 -0700 Subject: x86, intel_txt: Intel TXT Sx shutdown support Support for graceful handling of sleep states (S3/S4/S5) after an Intel(R) TXT launch. Without this patch, attempting to place the system in one of the ACPI sleep states (S3/S4/S5) will cause the TXT hardware to treat this as an attack and will cause a system reset, with memory locked. Not only may the subsequent memory scrub take some time, but the platform will be unable to enter the requested power state. This patch calls back into the tboot so that it may properly and securely clean up system state and clear the secrets-in-memory flag, after which it will place the system into the requested sleep state using ACPI information passed by the kernel. arch/x86/kernel/smpboot.c | 2 ++ drivers/acpi/acpica/hwsleep.c | 3 +++ kernel/cpu.c | 7 ++++++- 3 files changed, 11 insertions(+), 1 deletion(-) Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: H. Peter Anvin --- arch/x86/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda6..61cc408 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -1317,6 +1318,7 @@ void play_dead_common(void) void native_play_dead(void) { play_dead_common(); + tboot_shutdown(TB_SHUTDOWN_WFS); wbinvd_halt(); } -- cgit v1.1 From d7aacaddcac3971e33cf52d7e610c06696cb347f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 8 Jul 2009 13:21:31 +0200 Subject: Driver Core: Add platform device arch data V3 Allow architecture specific data in struct platform_device V3. With this patch struct pdev_archdata is added to struct platform_device, similar to struct dev_archdata in found in struct device. Useful for architecture code that needs to keep extra data associated with each platform device. Struct pdev_archdata is different from dev.platform_data, the convention is that dev.platform_data points to driver-specific data. It may or may not be required by the driver. The format of this depends on driver but is the same across architectures. The structure pdev_archdata is a place for architecture specific data. This data is handled by architecture specific code (for example runtime PM), and since it is architecture specific it should _never_ be touched by device driver code. Exactly like struct dev_archdata but for platform devices. [rjw: This change is for power management mostly and that's why it goes through the suspend tree.] Signed-off-by: Magnus Damm Acked-by: Kevin Hilman Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 4994a20..cee34e9 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -13,4 +13,7 @@ struct dma_map_ops *dma_ops; #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_X86_DEVICE_H */ -- cgit v1.1 From 3885123da8335dc6b67387e5e626acbffc56f664 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:50 +0900 Subject: swiotlb: remove unused swiotlb_alloc_boot() Nobody uses swiotlb_alloc_boot(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee..0ac7cd5 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,11 +13,6 @@ int swiotlb __read_mostly; -void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - void *swiotlb_alloc(unsigned order, unsigned long nslabs) { return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -- cgit v1.1 From bb52196be37ce154ddc50b1f39496146d181cbe7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:51 +0900 Subject: swiotlb: remove unused swiotlb_alloc() Nobody uses swiotlb_alloc(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ac7cd5..ea675cf 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,11 +13,6 @@ int swiotlb __read_mostly; -void *swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) { return paddr; -- cgit v1.1 From cf56e3f2e8a8d5b7bc719980869b0e7985c256f3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:52 +0900 Subject: swiotlb: remove swiotlb_arch_range_needs_mapping Nobody uses swiotlb_arch_range_needs_mapping(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index ea675cf..165bd7f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -23,11 +23,6 @@ phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) return baddr; } -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { -- cgit v1.1 From 99becaca86d184a4433e9fde879ff97303d7669f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:54 +0900 Subject: x86: add dma_capable() to replace is_buffer_dma_capable() dma_capable() eventually replaces is_buffer_dma_capable(), which tells if a memory area is dma-capable or not. The problem of is_buffer_dma_capable() is that it doesn't take a pointer to struct device so it doesn't work for POWERPC. Signed-off-by: FUJITA Tomonori --- arch/x86/include/asm/dma-mapping.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1c3f943..adac59c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -55,6 +55,14 @@ extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag); +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) -- cgit v1.1 From a4c2baa6e148adfb27beaf16b6fb6d465b5b3acb Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:55 +0900 Subject: x86: replace is_buffer_dma_capable() with dma_capable Signed-off-by: FUJITA Tomonori --- arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-gart_64.c | 5 ++--- arch/x86/kernel/pci-nommu.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bc..3c945c0 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -147,7 +147,7 @@ again: return NULL; addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size)) { + if (addr + size > dma_mask) { __free_pages(page, get_order(size)); if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d2e56b8..98a827e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - return force_iommu || - !is_buffer_dma_capable(*dev->dma_mask, addr, size); + return force_iommu || !dma_capable(dev, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - return !is_buffer_dma_capable(*dev->dma_mask, addr, size); + return !dma_capable(dev, addr, size); } /* Map a single continuous physical area into the IOMMU. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a..c0a4222 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { + if (hwdev && !dma_capable(hwdev, bus, size)) { if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", -- cgit v1.1 From 8d4f5339d1ee4027c07e6b2a1cfa9dc41b0d383b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:05:01 +0900 Subject: x86, IA64, powerpc: add phys_to_dma() and dma_to_phys() This adds two functions, phys_to_dma() and dma_to_phys() to x86, IA64 and powerpc. swiotlb uses them. phys_to_dma() converts a physical address to a dma address. dma_to_phys() does the opposite. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/include/asm/dma-mapping.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index adac59c..0ee770d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -63,6 +63,16 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size <= *dev->dma_mask; } +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) -- cgit v1.1 From b683d42693c4e92b838117f5c6f7b90bfa1525c9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:05:04 +0900 Subject: x86: remove unused swiotlb_phys_to_bus() and swiotlb_bus_to_phys() phys_to_dma() and dma_to_phys() are used instead of swiotlb_phys_to_bus() and swiotlb_bus_to_phys(). Signed-off-by: FUJITA Tomonori --- arch/x86/kernel/pci-swiotlb.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 165bd7f..e8a3501 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,16 +13,6 @@ int swiotlb __read_mostly; -dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) -{ - return baddr; -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { -- cgit v1.1 From 94699b04eddd4b247d871930431d6fa1a46c175e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:52:54 +0200 Subject: x86, mce: don't log boot MCEs on Pentium M (model == 13) CPUs On my legacy Pentium M laptop (Acer Extensa 2900) I get bogus MCE on a cold boot with CONFIG_X86_NEW_MCE enabled, i.e. (after decoding it with mcelog): MCE 0 HARDWARE ERROR. This is *NOT* a software problem! Please contact your hardware vendor CPU 0 BANK 1 MCG status: MCi status: Error overflow Uncorrected error Error enabled Processor context corrupt MCA: Data CACHE Level-1 UNKNOWN Error STATUS f200000000000195 MCGSTATUS 0 [ The other STATUS values observed: f2000000000001b5 (... UNKNOWN error) and f200000000000115 (... READ Error). To verify that this is not a CONFIG_X86_NEW_MCE bug I also modified the CONFIG_X86_OLD_MCE code (which doesn't log any MCEs) to dump content of STATUS MSR before it is cleared during initialization. ] Since the bogus MCE results in a kernel taint (which in turn disables lockdep support) don't log boot MCEs on Pentium M (model == 13) CPUs by default ("mce=bootlog" boot parameter can be be used to get the old behavior). Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 07139a0..7bd19c7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1269,6 +1269,10 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; + + /* There are also broken BIOSes on some Pentium M systems. */ + if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0) + mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; -- cgit v1.1 From e3346fc48204d780f92527d06df8bf6f28d603ec Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:55:09 +0200 Subject: x86, mce: fix "mce" boot option handling for CONFIG_X86_NEW_MCE "mce argument mce ignored. Please use /sys" message shouldn't be printed when using "mce" boot option. Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7bd19c7..7591944 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1549,8 +1549,10 @@ static struct miscdevice mce_log_device = { */ static int __init mcheck_enable(char *str) { - if (*str == 0) + if (*str == 0) { enable_p5_mce(); + return 1; + } if (*str == '=') str++; if (!strcmp(str, "off")) -- cgit v1.1 From 419d6162c0c0103fa2f44f6691dff9cac14c650d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:56:00 +0200 Subject: x86, mce: add missing __cpuinit tags mce_cap_init() and mce_cpu_quirks() can be tagged with __cpuinit. Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7591944..1ce6db1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1158,7 +1158,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int mce_cap_init(void) +static int __cpuinit mce_cap_init(void) { unsigned b; u64 cap; @@ -1222,7 +1222,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { -- cgit v1.1 From d0c87d1f61704ed589fc0788bedd753632340e98 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:56:37 +0200 Subject: x86, mce: remove never executed code fseverities_coverage is never NULL in err_out code path. Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index ff0807f..51f7c72 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -209,8 +209,6 @@ static int __init severities_debugfs_init(void) return 0; err_out: - if (fseverities_coverage) - debugfs_remove(fseverities_coverage); if (dmce) debugfs_remove(dmce); return -ENOMEM; -- cgit v1.1 From f3a0867b12e0cf1512c0bd0665f2339fc75ed2a8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 29 Jul 2009 00:04:59 +0200 Subject: x86, mce: fix reporting of Thermal Monitoring mechanism enabled Early Pentium M models use different method for enabling TM2 (per paragraph 13.5.2.3 of the "Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3A: System Programming Guide, Part 1"). Tested on the affected Pentium M variant (model == 13). Signed-off-by: Bartlomiej Zolnierkiewicz Cc: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/mcheck/therm_throt.c | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3d1ce09..cbec06d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -222,6 +222,10 @@ #define THERM_STATUS_PROCHOT (1 << 0) +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + #define MSR_IA32_MISC_ENABLE 0x000001a0 /* MISC_ENABLE bits: architectural */ diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd1..15f2bc0 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -253,9 +253,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) return; } - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - /* Check whether a vector already exists */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG @@ -264,6 +261,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c) return; } + /* early Pentium M models use different method for enabling TM2 */ + if (cpu_has(c, X86_FEATURE_TM2)) { + if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { + rdmsr(MSR_THERM2_CTL, l, h); + if (l & MSR_THERM2_CTL_TM_SELECT) + tm2 = 1; + } else if (l & MSR_IA32_MISC_ENABLE_TM2) + tm2 = 1; + } + /* We'll mask the thermal vector in the lapic till we're ready: */ h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; apic_write(APIC_LVTTHMR, h); -- cgit v1.1 From c1dc0b9c0c8979ce4d411caadff5c0d79dee58bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 2 Aug 2009 11:28:21 +0200 Subject: debug lockups: Improve lockup detection When debugging a recent lockup bug i found various deficiencies in how our current lockup detection helpers work: - SysRq-L is not very efficient as it uses a workqueue, hence it cannot punch through hard lockups and cannot see through most soft lockups either. - The SysRq-L code depends on the NMI watchdog - which is off by default. - We dont print backtraces from the RCU code's built-in 'RCU state machine is stuck' debug code. This debug code tends to be one of the first (and only) mechanisms that show that a lockup has occured. This patch changes the code so taht we: - Trigger the NMI backtrace code from SysRq-L instead of using a workqueue (which cannot punch through hard lockups) - Trigger print-all-CPU-backtraces from the RCU lockup detection code Also decouple the backtrace printing code from the NMI watchdog: - Dont use variable size cpumasks (it might not be initialized and they are a bit more fragile anyway) - Trigger an NMI immediately via an IPI, instead of waiting for the NMI tick to occur. This is a lot faster and can produce more relevant backtraces. It will also work if the NMI watchdog is disabled. - Dont print the 'dazed and confused' message when we print a backtrace from the NMI - Do a show_regs() plus a dump_stack() to get maximum info out of the dump. Worst-case we get two stacktraces - which is not a big deal. Sometimes, if register content is corrupted, the precise stack walker in show_regs() wont give us a full backtrace - in this case dump_stack() will do it. Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/nmi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b4..1bb1ac2 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,7 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_var_t backtrace_mask; +static cpumask_t backtrace_mask __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP @@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { + if (cpumask_test_cpu(cpu, &backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, backtrace_mask); + cpumask_clear_cpu(cpu, &backtrace_mask); + + rc = 1; } /* Could check oops_in_progress here too, but it's safer not to */ @@ -556,10 +558,14 @@ void __trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(backtrace_mask, cpu_online_mask); + cpumask_copy(&backtrace_mask, cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); + apic->send_IPI_all(NMI_VECTOR); + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(backtrace_mask)) + if (cpumask_empty(&backtrace_mask)) break; mdelay(1); } -- cgit v1.1 From 25f6e89bedd29cc49bfa0d55497e91a671b9ae6e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 30 Jul 2009 23:21:18 +0200 Subject: x86: Remove superfluous NULL pointer check in destroy_irq() This takes care of the following entry from Dan's list: arch/x86/kernel/apic/io_apic.c +3241 destroy_irq(11) warning: variable derefenced before check 'desc' Reported-by: Dan Carpenter Signed-off-by: Bartlomiej Zolnierkiewicz Cc: Jonathan Corbet Cc: Eugene Teo Cc: Julia Lawall LKML-Reference: <200907302321.19086.bzolnier@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cf51b0b..7e92a92 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3185,8 +3185,7 @@ void destroy_irq(unsigned int irq) cfg = desc->chip_data; dynamic_irq_cleanup(irq); /* connect back irq_cfg */ - if (desc) - desc->chip_data = cfg; + desc->chip_data = cfg; free_irte(irq); spin_lock_irqsave(&vector_lock, flags); -- cgit v1.1 From 47cab6a722d44c71c4f8224017ef548522243cf4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Aug 2009 09:31:54 +0200 Subject: debug lockups: Improve lockup detection, fix generic arch fallback As Andrew noted, my previous patch ("debug lockups: Improve lockup detection") broke/removed SysRq-L support from architecture that do not provide a __trigger_all_cpu_backtrace implementation. Restore a fallback path and clean up the SysRq-L machinery a bit: - Rename the arch method to arch_trigger_all_cpu_backtrace() - Simplify the define - Document the method a bit - in the hope of more architectures adding support for it. [ The patch touches Sparc code for the rename. ] Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: "David S. Miller" LKML-Reference: <20090802140809.7ec4bb6b.akpm@linux-foundation.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 4 ++-- arch/x86/kernel/apic/nmi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c86e5ed4..e63cf7d 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; -void __trigger_all_cpu_backtrace(void); -#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace static inline void localise_nmi_watchdog(void) { diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 1bb1ac2..db72202 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -554,7 +554,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) return 0; } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { int i; -- cgit v1.1 From 6a12235c7d2d75c7d94b9afcaaecd422ff845ce0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 29 Jul 2009 10:25:58 +0100 Subject: agp: kill phys_to_gart() and gart_to_phys() There seems to be no reason for these -- they're a 1:1 mapping on all platforms. Signed-off-by: David Woodhouse --- arch/x86/include/asm/agp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index 9825cd6..eec2a70 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h @@ -22,10 +22,6 @@ */ #define flush_agp_cache() wbinvd() -/* Convert a physical address to an address suitable for the GART. */ -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) - /* GATT allocation. Returns/accepts GATT kernel virtual address. */ #define alloc_gatt_pages(order) \ ((char *)__get_free_pages(GFP_KERNEL, (order))) -- cgit v1.1 From cfc65dd57967f2e0c7b3a8b73e6d12470b1cf1c1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 30 Jul 2009 16:15:18 -0600 Subject: iommu=pt is a valid early param This avoids a "Malformed early option 'iommu'" warning on boot when trying to use pass-through mode. Signed-off-by: Alex Williamson Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bc..ae13e34 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -212,10 +212,8 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "soft", 4)) swiotlb = 1; #endif - if (!strncmp(p, "pt", 2)) { + if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; - return 1; - } gart_parse_options(p); -- cgit v1.1 From ed8d9adf357ec331603fa1049510399812cea7e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Aug 2009 14:08:48 +0900 Subject: x86, percpu: Add 'percpu_read_stable()' interface for cacheable accesses This is very useful for some common things like 'get_current()' and 'get_thread_info()', which can be used multiple times in a function, and where the result is cacheable. tj: Added the magical undocumented "P" modifier to UP __percpu_arg() to force gcc to dereference the pointer value passed in via the "p" input constraint. Without this, percpu_read_stable() returns the address of the percpu variable. Also added comment explaining the difference between percpu_read() and percpu_read_stable(). Signed-off-by: Linus Torvalds Signed-off-by: Tejun Heo Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/percpu.h | 26 +++++++++++++++++++------- arch/x86/include/asm/thread_info.h | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index c68c361..4d447b7 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return percpu_read(current_task); + return percpu_read_stable(current_task); } #define current get_current() diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1dd..04eacef 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -49,7 +49,7 @@ #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) #else -#define __percpu_arg(x) "%" #x +#define __percpu_arg(x) "%P" #x #endif /* @@ -104,36 +104,48 @@ do { \ } \ } while (0) -#define percpu_from_op(op, var) \ +#define percpu_from_op(op, var, constraint) \ ({ \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ : "=q" (ret__) \ - : "m" (var)); \ + : constraint); \ break; \ case 2: \ asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : constraint); \ break; \ case 4: \ asm(op "l "__percpu_arg(1)",%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : constraint); \ break; \ case 8: \ asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : constraint); \ break; \ default: __bad_percpu_size(); \ } \ ret__; \ }) -#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +/* + * percpu_read() makes gcc load the percpu variable every time it is + * accessed while percpu_read_stable() allows the value to be cached. + * percpu_read_stable() is more efficient and can be used if its value + * is guaranteed to be valid across cpus. The current users include + * get_current() and get_thread_info() both of which are actually + * per-thread variables implemented as per-cpu variables and thus + * stable for the duration of the respective task. + */ +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ + "m" (per_cpu__##var)) +#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ + "p" (&per_cpu__##var)) #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fad7d40..a1bb5a11 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -213,7 +213,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - ti = (void *)(percpu_read(kernel_stack) + + ti = (void *)(percpu_read_stable(kernel_stack) + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } -- cgit v1.1 From 3e352aa8ee2bd48f1a19c7742810b3a4a7ba605e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2009 14:10:11 +0900 Subject: x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() put percpu variables in .page_aligned section without adding any alignment restrictions. Currently, this doesn't cause any problem because all users of the macros have explicit page alignment and page-sized but it's much safer to enforce page alignment from the macros. After all, it's what they claim to do. Add __aligned(PAGE_SIZE) to DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() and drop explicit alignment from it users. Signed-off-by: Tejun Heo Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c0..12493c5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1008,8 +1008,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) - __aligned(PAGE_SIZE); + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); /* May not be marked __init: used by software suspend */ void syscall_init(void) -- cgit v1.1 From bdf977b37418cdf8a2252504779a7e12a09b7575 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2009 14:12:19 +0900 Subject: x86, percpu: Collect hot percpu variables into one cacheline On x86_64, percpu variables current_task and kernel_stack are used for get_current() and current_thread_info() respectively and thus are often used close to each other. Move definition of current_task to kernel/cpu/common.c right above kernel_stack definition and align it to cacheline so that they always fall into the same cacheline. Two percpu variables defined there together - irq_stack_ptr and irq_count - are also pretty hot and will benefit from sharing the cacheline. For consistency, current_task definition for x86_32 is also moved to kernel/cpu/common.c. Putting current_task and kernel_stack into the same cacheline was suggested by Linus Torvalds. Signed-off-by: Tejun Heo Cc: Linus Torvalds Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 15 +++++++++++++-- arch/x86/kernel/process_32.c | 3 --- arch/x86/kernel/process_64.c | 3 --- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 12493c5..1bd88ed 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); -DEFINE_PER_CPU(char *, irq_stack_ptr) = - init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; +/* + * The following four percpu variables are hot. Align current_task to + * cacheline size such that all four fall in the same cacheline. + */ +DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = + &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); +DEFINE_PER_CPU(char *, irq_stack_ptr) = + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; + DEFINE_PER_CPU(unsigned int, irq_count) = -1; /* @@ -1041,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); #else /* CONFIG_X86_64 */ +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, stack_canary); #endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524..daa4107 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -61,9 +61,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - /* * Return saved PC of a blocked thread. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb54..c4c675d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -55,9 +55,6 @@ asmlinkage extern void ret_from_fork(void); -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - DEFINE_PER_CPU(unsigned long, old_rsp); static DEFINE_PER_CPU(unsigned char, is_idle); -- cgit v1.1 From c7bd0414d681706a32105895cae20fb9090db52e Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Thu, 23 Jul 2009 20:56:27 +0200 Subject: x86: Simplify the Makefile in a minor way through use of cc-ifversion Signed-off-by: Frans Pop Acked-by: Sam Ravnborg Reviewed-by: WANG Cong LKML-Reference: <200907232056.28635.elendil@planet.nl> Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659..1f3851a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -32,8 +32,8 @@ ifeq ($(CONFIG_X86_32),y) # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: - KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ - echo $(call cc-option,-fno-unit-at-a-time); fi ;) + KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \ + $(call cc-option,-fno-unit-at-a-time)) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/x86/Makefile_32.cpu -- cgit v1.1 From 54a0bf3c2cad3fd118ea725f26a493aece6ea01d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 4 Aug 2009 15:52:38 +0200 Subject: Revert "x86: oprofile/op_model_amd.c set return values for op_amd_handle_ibs()" This reverts commit 21e70878215f620fe99ea7d7c74bc641aeec932f. Instead Andrew's patch will be applied he posted at the same time. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 827beec..37d19c7 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -195,7 +195,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, struct op_entry entry; if (!has_ibs) - return 0; + return 1; if (ibs_config.fetch_enabled) { rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); @@ -277,10 +277,7 @@ static void op_amd_stop_ibs(void) #else static inline int op_amd_handle_ibs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - return 0; -} + struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } -- cgit v1.1 From 4680e64a88c4ce2c4e736dade99233e3def13fa7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 23 Jun 2009 12:36:08 -0700 Subject: arch/x86/oprofile/op_model_amd.c: fix op_amd_handle_ibs() return type arch/x86/oprofile/op_model_amd.c: In function 'op_amd_handle_ibs': arch/x86/oprofile/op_model_amd.c:217: warning: no return statement in function returning non-void Fix this by making op_amd_handle_ibs() return void. Cc: Robert Richter Signed-off-by: Andrew Morton Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 37d19c7..39686c2 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -187,7 +187,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, #ifdef CONFIG_OPROFILE_IBS -static inline int +static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { @@ -195,7 +195,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, struct op_entry entry; if (!has_ibs) - return 1; + return; if (ibs_config.fetch_enabled) { rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); @@ -241,8 +241,6 @@ op_amd_handle_ibs(struct pt_regs * const regs, wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } - - return 1; } static inline void op_amd_start_ibs(void) @@ -276,7 +274,7 @@ static void op_amd_stop_ibs(void) #else -static inline int op_amd_handle_ibs(struct pt_regs * const regs, +static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } -- cgit v1.1 From 19943b0e30b05d42e494ae6fef78156ebc8c637e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 4 Aug 2009 16:19:20 +0100 Subject: intel-iommu: Unify hardware and software passthrough support This makes the hardware passthrough mode work a lot more like the software version, so that the behaviour of a kernel with 'iommu=pt' is the same whether the hardware supports passthrough or not. In particular: - We use a single si_domain for the pass-through devices. - 32-bit devices can be taken out of the pass-through domain so that they don't have to use swiotlb. - Devices will work again after being removed from a KVM guest. - A potential oops on OOM (in init_context_pass_through()) is fixed. Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-swiotlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee..1e66b18 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -71,9 +71,8 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || - iommu_pass_through) - swiotlb = 1; + if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + swiotlb = 1; #endif if (swiotlb_force) swiotlb = 1; -- cgit v1.1 From 2977fb3ffc8493a2f4f0a362e8660a6cde9f1bb9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 1 Aug 2009 11:47:59 +0400 Subject: x86, ioapic: Introduce for_each_irq_pin() helper This allow us to save a few lines of code. Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org LKML-Reference: <20090801075435.597863129@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 43 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7e92a92..ffd8fdf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,8 @@ #include #define __apicdebuginit(type) static type __init +#define for_each_irq_pin(entry, head) \ + for (entry = head; entry; entry = entry->next) /* * Is the SiS APIC rmw bug present ? @@ -410,7 +412,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; @@ -490,22 +492,21 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list **entryp, *entry; + struct irq_pin_list **last, *entry; - for (entryp = &cfg->irq_2_pin; - *entryp != NULL; - entryp = &(*entryp)->next) { - entry = *entryp; - /* not again, please */ + /* don't allow duplicates */ + last = &cfg->irq_2_pin; + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == apic && entry->pin == pin) return; + last = &entry->next; } entry = get_one_free_irq_2_pin(node); entry->apic = apic; entry->pin = pin; - *entryp = entry; + *last = entry; } /* @@ -517,7 +518,7 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, { struct irq_pin_list *entry; - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; @@ -537,7 +538,7 @@ static void io_apic_modify_irq(struct irq_cfg *cfg, int pin; struct irq_pin_list *entry; - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin * 2); @@ -1669,12 +1670,8 @@ __apicdebuginit(void) print_IO_APIC(void) if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } printk("\n"); } @@ -2227,7 +2224,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq struct irq_pin_list *entry; u8 vector = cfg->vector; - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; apic = entry->apic; @@ -2556,20 +2553,10 @@ static void ack_apic_level(unsigned int irq) #ifdef CONFIG_INTR_REMAP static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { - int apic, pin; struct irq_pin_list *entry; - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } + for_each_irq_pin(entry, cfg->irq_2_pin) + io_apic_eoi(entry->apic, entry->pin); } static void -- cgit v1.1 From a7428cd2ef77734465e36bceb43290e37e2a97c6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 1 Aug 2009 11:48:00 +0400 Subject: x86, ioapic: Throw BUG instead of NULL dereference Instead of plain NULL deref we better throw error message with a backtrace. Actually we need more gracious error handling here. Meanwhile leave it as is. Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org LKML-Reference: <20090801075435.769301745@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ffd8fdf..2a145d3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -503,6 +503,10 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin } entry = get_one_free_irq_2_pin(node); + if (!entry) { + printk(KERN_ERR "can not alloc irq_pin_list\n"); + BUG_ON(1); + } entry->apic = apic; entry->pin = pin; -- cgit v1.1 From 9910887af84e33ba98fd6792029470ae80166208 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 23 Jul 2009 00:52:59 +0400 Subject: x86, apic: Drop redundant bit assignment cpu_has_apic has already investigated boot_cpu_data X86_FEATURE_APIC bit for being clear if condition is triggered. So there is no need to clear this bit second time. Signed-off-by: Cyrill Gorcuno v Cc: "Maciej W. Rozycki" LKML-Reference: <20090722205259.GE15805@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0a1c283..0b021c56 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1651,7 +1651,6 @@ int __init APIC_init_uniprocessor(void) APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { pr_err("BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } #endif -- cgit v1.1 From ce69a784504222c3ab6f1b3c357d09ec5772127a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 20 Jul 2009 15:24:17 +0300 Subject: x86/apic: Enable x2APIC without interrupt remapping under KVM KVM would like to provide x2APIC interface to a guest without emulating interrupt remapping device. The reason KVM prefers guest to use x2APIC is that x2APIC interface is better virtualizable and provides better performance than mmio xAPIC interface: - msr exits are faster than mmio (no page table walk, emulation) - no need to read back ICR to look at the busy bit - one 64 bit ICR write instead of two 32 bit writes - shared code with the Hyper-V paravirt interface Included patch changes x2APIC enabling logic to enable it even if IR initialization failed, but kernel runs under KVM and no apic id is greater than 255 (if there is one spec requires BIOS to move to x2apic mode before starting an OS). -v2: fix build -v3: fix bug causing compiler warning Signed-off-by: Gleb Natapov Acked-by: Suresh Siddha Cc: Sheng Yang Cc: "avi@redhat.com" LKML-Reference: <20090720122417.GR5638@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 7 ++++ arch/x86/kernel/apic/apic.c | 83 +++++++++++++++++++++++------------------ arch/x86/kernel/apic/probe_64.c | 6 +-- 3 files changed, 56 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bb7d479..586b7ad 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -183,6 +183,10 @@ static inline int x2apic_enabled(void) } #define x2apic_supported() (cpu_has_x2apic) +static inline void x2apic_force_phys(void) +{ + x2apic_phys = 1; +} #else static inline void check_x2apic(void) { @@ -194,6 +198,9 @@ static inline int x2apic_enabled(void) { return 0; } +static inline void x2apic_force_phys(void) +{ +} #define x2apic_preenabled 0 #define x2apic_supported() 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0b021c56..de039fc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -49,6 +49,7 @@ #include #include #include +#include unsigned int num_processors; @@ -1361,52 +1362,76 @@ void enable_x2apic(void) } #endif /* CONFIG_X86_X2APIC */ -void __init enable_IR_x2apic(void) +int __init enable_IR(void) { #ifdef CONFIG_INTR_REMAP int ret; - unsigned long flags; - struct IO_APIC_route_entry **ioapic_entries = NULL; ret = dmar_table_init(); if (ret) { pr_debug("dmar_table_init() failed with %d:\n", ret); - goto ir_failed; + return 0; } if (!intr_remapping_supported()) { pr_debug("intr-remapping not supported\n"); - goto ir_failed; + return 0; } - if (!x2apic_preenabled && skip_ioapic_setup) { pr_info("Skipped enabling intr-remap because of skipping " "io-apic setup\n"); - return; + return 0; } + if (enable_intr_remapping(x2apic_supported())) + return 0; + + pr_info("Enabled Interrupt-remapping\n"); + + return 1; + +#endif + return 0; +} + +void __init enable_IR_x2apic(void) +{ + unsigned long flags; + struct IO_APIC_route_entry **ioapic_entries = NULL; + int ret, x2apic_enabled = 0; + ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { - pr_info("Allocate ioapic_entries failed: %d\n", ret); - goto end; + pr_err("Allocate ioapic_entries failed\n"); + goto out; } ret = save_IO_APIC_setup(ioapic_entries); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); - goto end; + goto out; } local_irq_save(flags); - mask_IO_APIC_setup(ioapic_entries); mask_8259A(); + mask_IO_APIC_setup(ioapic_entries); - ret = enable_intr_remapping(x2apic_supported()); - if (ret) - goto end_restore; + ret = enable_IR(); + if (!ret) { + /* IR is required if there is APIC ID > 255 even when running + * under KVM + */ + if (max_physical_apicid > 255 || !kvm_para_available()) + goto nox2apic; + /* + * without IR all CPUs can be addressed by IOAPIC/MSI + * only in physical mode + */ + x2apic_force_phys(); + } - pr_info("Enabled Interrupt-remapping\n"); + x2apic_enabled = 1; if (x2apic_supported() && !x2apic_mode) { x2apic_mode = 1; @@ -1414,41 +1439,25 @@ void __init enable_IR_x2apic(void) pr_info("Enabled x2apic\n"); } -end_restore: - if (ret) - /* - * IR enabling failed - */ +nox2apic: + if (!ret) /* IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - unmask_8259A(); local_irq_restore(flags); -end: +out: if (ioapic_entries) free_ioapic_entries(ioapic_entries); - if (!ret) + if (x2apic_enabled) return; -ir_failed: if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); + panic("x2apic: enabled by BIOS but kernel init failed."); else if (cpu_has_x2apic) - pr_info("Not enabling x2apic,Intr-remapping\n"); -#else - if (!cpu_has_x2apic) - return; - - if (x2apic_preenabled) - panic("x2apic enabled prior OS handover," - " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); -#endif - - return; + pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); } - #ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880..f3b1037 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,11 +50,11 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic_mode && (apic != &apic_x2apic_phys && + if (x2apic_mode #ifdef CONFIG_X86_UV - apic != &apic_x2apic_uv_x && + && apic != &apic_x2apic_uv_x #endif - apic != &apic_x2apic_cluster)) { + ) { if (x2apic_phys) apic = &apic_x2apic_phys; else -- cgit v1.1 From f3d1915a8623b9248572d3ee44e19a80b7a3520b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 6 Aug 2009 00:09:31 +0400 Subject: x86, ioapic: Panic on irq-pin binding only if needed Though the most time we are to panic on irq-pin allocation fails, for PCI interrupts it's not the case and we could continue operate even if irq-pin allocation failed. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20090805200931.GB5319@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2a145d3..2999f3d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -490,7 +490,8 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int +add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list **last, *entry; @@ -498,19 +499,27 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin last = &cfg->irq_2_pin; for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == apic && entry->pin == pin) - return; + return 0; last = &entry->next; } entry = get_one_free_irq_2_pin(node); if (!entry) { - printk(KERN_ERR "can not alloc irq_pin_list\n"); - BUG_ON(1); + printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); + return -ENOMEM; } entry->apic = apic; entry->pin = pin; *last = entry; + return 0; +} + +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +{ + if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) + panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* @@ -3843,7 +3852,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_node(cfg, node, ioapic, pin); + if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { + printk(KERN_INFO "can not add pin %d for irq %d\n", + pin, irq); + return 0; + } } setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); -- cgit v1.1 From 1e5de18278e6862f4198412b5059a03770fa816a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 19 Jul 2009 00:12:20 +0900 Subject: x86: Introduce GDT_ENTRY_INIT() GDT_ENTRY_INIT is static initializer of desc_struct. We already have similar macro GDT_ENTRY() but it's static initializer for u64 and it cannot be used for desc_struct. Signed-off-by: Akinobu Mita LKML-Reference: <20090718151219.GD11294@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc_defs.h | 6 ++++++ arch/x86/include/asm/lguest.h | 5 +++-- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/cpu/common.c | 40 +++++++++++++++++------------------ 5 files changed, 31 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index a6adefa..9d66848 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -34,6 +34,12 @@ struct desc_struct { }; } __attribute__((packed)); +#define GDT_ENTRY_INIT(flags, base, limit) { { { \ + .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \ + .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \ + ((limit) & 0xf0000) | ((base) & 0xff000000), \ + } } } + enum { GATE_INTERRUPT = 0xE, GATE_TRAP = 0xF, diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 313389c..94cd698 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -91,8 +91,9 @@ static inline void lguest_set_ts(void) } /* Full 4G segment descriptors, suitable for CS and DS. */ -#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } }) -#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } }) +#define FULL_EXEC_SEGMENT \ + ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff)) +#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff)) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index cdc5e0b..44efdff 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -48,7 +48,7 @@ * head_32 for boot CPU and setup_per_cpu_areas() for others. */ #define GDT_STACK_CANARY_INIT \ - [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, + [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18), /* * Initialize the stackprotector canary value. diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index b5e841b..febb2da 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); -static struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; +static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, 0, 0); static const char driver_version[] = "1.16ac"; /* no spaces */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c0..8c9bc28 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -71,45 +71,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { * TLS descriptors are currently at a different place compared to i386. * Hopefully nobody expects them at a fixed place (Wine?) */ - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), #else - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), + [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), + [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), /* * Segments used for calling PnP BIOS have byte granularity. * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), + [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), GDT_STACK_CANARY_INIT #endif } }; -- cgit v1.1 From 72c4d8530244264317a662de9a55cc47e6c8e9df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Aug 2009 08:47:07 +0200 Subject: x86: Introduce GDT_ENTRY_INIT(), fix APM This crash: [ 0.891983] calling cache_sysfs_init+0x0/0x1ee @ 1 [ 0.897251] initcall cache_sysfs_init+0x0/0x1ee returned 0 after 405 usecs [ 0.904019] calling mce_init_device+0x0/0x242 @ 1 [ 0.909124] initcall mce_init_device+0x0/0x242 returned 0 after 347 usecs [ 0.915815] calling apm_init+0x0/0x38d @ 1 [ 0.919967] apm: BIOS version 1.2 Flags 0x07 (Driver version 1.16ac) [ 0.926813] general protection fault: 0000 [#1] [ 0.927269] last sysfs file: [ 0.927269] Modules linked in: [ 0.927269] [ 0.927269] Pid: 271, comm: kapmd Not tainted (2.6.31-rc3-00100-gd520da1-dirty #311) System Product Name [ 0.927269] EIP: 00c0:[<000082b2>] EFLAGS: 00010002 CPU: 0 [ 0.927269] EIP is at 0x82b2 [ 0.927269] EAX: 0000530e EBX: 00000000 ECX: 00000102 EDX: 00000000 [ 0.927269] ESI: 00000000 EDI: f6a4bf44 EBP: 67890000 ESP: f6a4beec [ 0.927269] DS: 00c8 ES: 0000 FS: 0000 GS: 0000 SS: 0068 [ 0.927269] Process kapmd (pid: 271, ti=f6a4a000 task=f7142280 task.ti=f6a4a000) [ 0.927269] Stack: [ 0.927269] 0000828d 02160000 00b88092 f6a4bf3c c102a63d 00000060 f6a4bf3c f6a4bf44 [ 0.927269] <0> 0000007b 0000007b 00000000 00000000 00000000 00000000 560aae9e 00000000 [ 0.927269] <0> 00000200 f705fd74 00000000 c102af70 f6a4bf60 c102a6ec 0000530e 00000000 [ 0.927269] Call Trace: [ 0.927269] [] ? __apm_bios_call_simple+0x7d/0x110 [ 0.927269] [] ? apm+0x0/0x6a0 [ 0.927269] [] ? apm_bios_call_simple+0x1c/0x50 [ 0.927269] [] ? apm+0x485/0x6a0 [ 0.927269] [] ? finish_task_switch+0x2a/0xb0 [ 0.927269] [] ? schedule+0x31e/0x480 [ 0.927269] [] ? apm+0x0/0x6a0 [ 0.927269] [] ? apm+0x0/0x6a0 [ 0.927269] [] ? kthread+0x74/0x80 [ 0.927269] [] ? kthread+0x0/0x80 [ 0.927269] [] ? kernel_thread_helper+0x7/0x10 [ 0.927269] Code: Bad EIP value. [ 0.927269] EIP: [<000082b2>] 0x82b2 SS:ESP 0068:f6a4beec [ 0.927269] ---[ end trace a7919e7f17c0a725 ]--- [ 0.927269] Kernel panic - not syncing: Fatal exception [ 0.927269] Pid: 271, comm: kapmd Tainted: G D 2.6.31-rc3-00100-gd520da1-dirty #311 Is caused by an incorrect GDT_ENTRY_INIT() conversion in the apm code, as noticed by hpa. Reported-by: Ingo Molnar Noticed-by: "H. Peter Anvin" Signed-off-by: Akinobu Mita LKML-Reference: <20090808094905.GA2954@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8c9bc28..b5764a2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -106,7 +106,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { /* 16-bit code */ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), + [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), -- cgit v1.1 From 4c711576b90cc36c13b94816a953a8de6a53d03c Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 6 Aug 2009 15:58:12 -0700 Subject: x86, 32-bit: Use generic sys_pipe() As suggested by Al, it's better to use the generic sys_pipe() for ia32. Signed-off-by: WANG Cong Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/ia32/sys_ia32.c | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index e590261..ba331bf 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -537,7 +537,7 @@ ia32_sys_call_table: .quad sys_mkdir .quad sys_rmdir /* 40 */ .quad sys_dup - .quad sys32_pipe + .quad sys_pipe .quad compat_sys_times .quad quiet_ni_syscall /* old prof syscall holder */ .quad sys_brk /* 45 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 085a8c3..9f55271 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -189,20 +189,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len, return sys_mprotect(start, len, prot); } -asmlinkage long sys32_pipe(int __user *fd) -{ - int retval; - int fds[2]; - - retval = do_pipe_flags(fds, 0); - if (retval) - goto out; - if (copy_to_user(fd, fds, sizeof(fds))) - retval = -EFAULT; -out: - return retval; -} - asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, struct sigaction32 __user *oact, unsigned int sigsetsize) -- cgit v1.1 From 30dd568c912602b7dbd609a45d053e01b13422bb Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 21 Jul 2009 15:56:48 +0200 Subject: x86, perf_counter, bts: Add BTS support to perfcounters Implement a performance counter with: attr.type = PERF_TYPE_HARDWARE attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS attr.sample_period = 1 Using branch trace store (BTS) on x86 hardware, if available. The from and to address for each branch can be sampled using: PERF_SAMPLE_IP for the from address PERF_SAMPLE_ADDR for the to address [ v2: address review feedback, fix bugs ] Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 10 ++ arch/x86/kernel/cpu/perf_counter.c | 325 +++++++++++++++++++++++++++++++++++- 2 files changed, 329 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index fa64e40..e7b7c93 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,16 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed counter range, since lower + * values are used by actual fixed counters and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f9..b237c18 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -6,6 +6,7 @@ * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, * * For licencing details see kernel-base/COPYING */ @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -27,12 +29,52 @@ static u64 perf_counter_mask __read_mostly; +/* The maximal number of PEBS counters: */ +#define MAX_PEBS_COUNTERS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; +}; + struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; + struct debug_store *ds; }; /* @@ -57,6 +99,8 @@ struct x86_pmu { u64 counter_mask; u64 max_period; u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); }; static struct x86_pmu x86_pmu __read_mostly; @@ -576,6 +620,9 @@ x86_perf_counter_update(struct perf_counter *counter, u64 prev_raw_count, new_raw_count; s64 delta; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * Careful: an NMI might modify the previous counter value. * @@ -659,10 +706,109 @@ static void release_pmc_hardware(void) enable_lapic_nmi_watchdog(); } +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_counters, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_counters, cpu).ds = NULL; + + kfree((void *)(long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return -EOPNOTSUPP; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_counters, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + static void hw_perf_counter_destroy(struct perf_counter *counter) { if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { release_pmc_hardware(); + release_bts_hardware(); mutex_unlock(&pmc_reserve_mutex); } } @@ -705,6 +851,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) return 0; } +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + /* * Setup the hardware configuration for a given attr_type */ @@ -721,9 +903,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) err = 0; if (!atomic_inc_not_zero(&active_counters)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) - err = -EBUSY; - else + if (atomic_read(&active_counters) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_bts_hardware(); + } + if (!err) atomic_inc(&active_counters); mutex_unlock(&pmc_reserve_mutex); } @@ -801,7 +987,18 @@ static void p6_pmu_disable_all(void) static void intel_pmu_disable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); } static void amd_pmu_disable_all(void) @@ -859,7 +1056,25 @@ static void p6_pmu_enable_all(void) static void intel_pmu_enable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_counter *counter = + cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!counter)) + return; + + intel_pmu_enable_bts(counter->hw.config); + } } static void amd_pmu_enable_all(void) @@ -946,6 +1161,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) static inline void intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc, idx); return; @@ -974,6 +1194,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, s64 period = hwc->sample_period; int err, ret = 0; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * If we are way outside a reasoable range then just skip forward: */ @@ -1056,6 +1279,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_counters).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc, idx); return; @@ -1077,11 +1308,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + if (!x86_pmu.num_counters_fixed) return -1; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1102,7 +1338,25 @@ static int x86_pmu_enable(struct perf_counter *counter) int idx; idx = fixed_mode_idx(counter, hwc); - if (idx >= 0) { + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* + * Try to use BTS for branch tracing. If that is not + * available, try to get a generic counter. + */ + if (unlikely(!cpuc->ds)) + goto try_generic; + + /* + * Try to get the fixed counter, if that is already taken + * then try to get a generic counter: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = 0; + hwc->counter_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { /* * Try to get the fixed counter, if that is already taken * then try to get a generic counter: @@ -1213,6 +1467,45 @@ void perf_counter_print_debug(void) local_irq_restore(flags); } +static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, + struct perf_sample_data *data) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + unsigned long orig_ip = data->regs->ip; + u64 at; + + if (!counter) + return; + + if (!ds) + return; + + for (at = ds->bts_buffer_base; + at < ds->bts_index; + at += sizeof(struct bts_record)) { + struct bts_record *rec = (struct bts_record *)(long)at; + + data->regs->ip = rec->from; + data->addr = rec->to; + + perf_counter_output(counter, 1, data); + } + + ds->bts_index = ds->bts_buffer_base; + + data->regs->ip = orig_ip; + data->addr = 0; + + /* There's new data available. */ + counter->pending_kill = POLL_IN; +} + static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -1237,6 +1530,15 @@ static void x86_pmu_disable(struct perf_counter *counter) * that we are disabling: */ x86_perf_counter_update(counter, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct perf_sample_data data; + struct pt_regs regs; + + data.regs = ®s; + intel_pmu_drain_bts_buffer(cpuc, &data); + } cpuc->counters[idx] = NULL; clear_bit(idx, cpuc->used_mask); @@ -1264,6 +1566,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) static void intel_pmu_reset(void) { + struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; unsigned long flags; int idx; @@ -1281,6 +1584,8 @@ static void intel_pmu_reset(void) for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); } + if (ds) + ds->bts_index = ds->bts_buffer_base; local_irq_restore(flags); } @@ -1346,6 +1651,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); + intel_pmu_drain_bts_buffer(cpuc, &data); status = intel_pmu_get_status(); if (!status) { perf_enable(); @@ -1547,6 +1853,8 @@ static struct x86_pmu intel_pmu = { * the generic counter period: */ .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, }; static struct x86_pmu amd_pmu = { @@ -1936,3 +2244,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } + +void hw_perf_counter_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} -- cgit v1.1 From 9f51e24ee8b5a1595b6a5ac0c2be278a16488e75 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 9 Aug 2009 21:54:00 +0200 Subject: x86: Use printk_once() Signed-off-by: Marcin Slusarz Cc: "H. Peter Anvin" LKML-Reference: <1249847649-11631-6-git-send-email-marcin.slusarz@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 5 ++--- arch/x86/kvm/x86.c | 7 +------ 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3b09634..7d35d0f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) void fixup_irqs(void) { unsigned int irq; - static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { @@ -236,8 +235,8 @@ void fixup_irqs(void) } if (desc->chip->set_affinity) desc->chip->set_affinity(irq, affinity); - else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); + else if (desc->action) + printk_once("Cannot set affinity for irq %i\n", irq); } #if 0 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fe5474a..0572c90 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2261,12 +2261,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu) { - static int reported; - - if (!reported) { - reported = 1; - printk(KERN_WARNING "kvm: emulating exchange as write\n"); - } + printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); #ifndef CONFIG_X86_64 /* guests cmpxchg8b have to be emulated atomically */ if (bytes == 8) { -- cgit v1.1 From a8ad568dd8ca122aa8048ea067d3599820d1c1b4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Aug 2009 11:53:10 +0900 Subject: dma-ops: Remove flush_write_buffers() in dma-mapping-common.h This moves flush_write_buffers() in asm-generic/dma-mapping-common.h to arch/x86/kernel/pci-nommu.c. The purpose of this patch is that, we can avoid defining NULL flush_write_buffers() on IA64 and SPARC. dma-mapping-common.h is used by X86 and IA64 (and SPARC soon) but only X86 with CONFIG_X86_OOSTORE or CONFIG_X86_PPRO_FENCE actually uses flush_write_buffers(). CONFIG_X86_OOSTORE or CONFIG_X86_PPRO_FENCE is usable with only kernel/pci-nommu.c (that is, not usable with other X86 IOMMU implementations such as SWIOTLB, VT-d, etc) so we can safely move flush_write_buffers() in asm-generic/dma-mapping-common.h to arch/x86/kernel/pci-nommu.c. The further discussion is: http://lkml.org/lkml/2009/6/28/104 Signed-off-by: Arnd Bergmann Acked-by: FUJITA Tomonori Cc: davem@davemloft.net Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1249872797-1314-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index c0a4222..a3933d4 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static void nommu_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + + +static void nommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + struct dma_map_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, - .map_sg = nommu_map_sg, - .map_page = nommu_map_page, - .is_phys = 1, + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = nommu_free_coherent, + .map_sg = nommu_map_sg, + .map_page = nommu_map_page, + .sync_single_for_device = nommu_sync_single_for_device, + .sync_sg_for_device = nommu_sync_sg_for_device, + .is_phys = 1, }; void __init no_iommu_init(void) -- cgit v1.1 From c7425314c755d5f94da7c978205c85a7c6201212 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 9 Aug 2009 17:03:52 +0900 Subject: x86: Introduce GDT_ENTRY_INIT(), initialize bad_bios_desc statically Fully initialize bad_bios_desc statically instead of doing some fields statically and some dynamically. Suggested-by: "H. Peter Anvin" Signed-off-by: Akinobu Mita LKML-Reference: <20090809080350.GA4765@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index febb2da..39a4462 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); -static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, 0, 0); + +/* + * Set up a segment that references the real mode segment 0x40 + * that extends up to the end of page zero (that we have reserved). + * This is for buggy BIOS's that refer to (real mode) segment 0x40 + * even though they are called in protected mode. + */ +static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, + (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); static const char driver_version[] = "1.16ac"; /* no spaces */ @@ -2332,15 +2340,6 @@ static int __init apm_init(void) pm_flags |= PM_APM; /* - * Set up a segment that references the real mode segment 0x40 - * that extends up to the end of page zero (that we have reserved). - * This is for buggy BIOS's that refer to (real mode) segment 0x40 - * even though they are called in protected mode. - */ - set_desc_base(&bad_bios_desc, (unsigned long)__va(0x40UL << 4)); - set_desc_limit(&bad_bios_desc, 4095 - (0x40 << 4)); - - /* * Set up the long jump entry point to the APM BIOS, which is called * from inline assembly. */ -- cgit v1.1 From 5b7e88edc6193f36941bccbfd5ed9ed5fe27d2e1 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Jul 2009 09:41:40 +0800 Subject: x86, mce: Support specifying context for software mce injection The cpu context is specified via the new mce.inject_flags fields. This allows more realistic machine check testing in different situations. "RANDOM" context is implemented via NMI broadcasting to add randomization to testing. AK: Fix NMI broadcasting check. Fix 32-bit building. Some race fixes. Move to module. Various changes ChangeLog: v3: - Re-based on latest x86-tip.git/mce4 - Fix 32-bit building v2: - Re-base on latest x86-tip.git/mce3 Signed-off-by: Huang Ying Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 11 ++- arch/x86/kernel/cpu/mcheck/mce-inject.c | 156 ++++++++++++++++++++++++++------ 2 files changed, 135 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ad75353..8945be9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -38,6 +38,13 @@ #define MCM_ADDR_MEM 3 /* memory address */ #define MCM_ADDR_GENERIC 7 /* generic */ +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 1 /* inject context: process */ +#define MCJ_CTX_IRQ 2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ + /* Fields are zero when not available */ struct mce { __u64 status; @@ -48,8 +55,8 @@ struct mce { __u64 tsc; /* cpu time stamp counter */ __u64 time; /* wall time_t when error was detected */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */ - __u8 pad1; - __u16 pad2; + __u8 inject_flags; /* software inject flags */ + __u16 pad; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a3a235a..ad5d927 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -18,7 +18,12 @@ #include #include #include +#include +#include +#include +#include #include +#include /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -39,44 +44,141 @@ static void inject_mce(struct mce *m) i->finished = 1; } -struct delayed_mce { - struct timer_list timer; - struct mce m; +static void raise_corrected(struct mce *m) +{ + unsigned long flags; + mce_banks_t b; + + memset(&b, 0xff, sizeof(mce_banks_t)); + local_irq_save(flags); + machine_check_poll(0, &b); + local_irq_restore(flags); + m->finished = 0; +} + +static void raise_uncorrected(struct mce *m, struct pt_regs *pregs) +{ + struct pt_regs regs; + unsigned long flags; + + if (!pregs) { + memset(®s, 0, sizeof(struct pt_regs)); + regs.ip = m->ip; + regs.cs = m->cs; + pregs = ®s; + } + /* in mcheck exeception handler, irq will be disabled */ + local_irq_save(flags); + do_machine_check(pregs, 0); + local_irq_restore(flags); + m->finished = 0; +} + +static cpumask_t mce_inject_cpumask; + +static int mce_raise_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int cpu = smp_processor_id(); + struct mce *m = &__get_cpu_var(injectm); + if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) + return NOTIFY_DONE; + cpu_clear(cpu, mce_inject_cpumask); + if (m->status & MCI_STATUS_UC) + raise_uncorrected(m, args->regs); + else if (m->status) + raise_corrected(m); + return NOTIFY_STOP; +} + +static struct notifier_block mce_raise_nb = { + .notifier_call = mce_raise_notify, + .priority = 1000, }; /* Inject mce on current CPU */ -static void raise_mce(unsigned long data) +static int raise_local(struct mce *m) { - struct delayed_mce *dm = (struct delayed_mce *)data; - struct mce *m = &dm->m; + int context = MCJ_CTX(m->inject_flags); + int ret = 0; int cpu = m->extcpu; - inject_mce(m); if (m->status & MCI_STATUS_UC) { - struct pt_regs regs; - memset(®s, 0, sizeof(struct pt_regs)); - regs.ip = m->ip; - regs.cs = m->cs; printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); - do_machine_check(®s, 0); + switch (context) { + case MCJ_CTX_IRQ: + /* + * Could do more to fake interrupts like + * calling irq_enter, but the necessary + * machinery isn't exported currently. + */ + /*FALL THROUGH*/ + case MCJ_CTX_PROCESS: + raise_uncorrected(m, NULL); + break; + default: + printk(KERN_INFO "Invalid MCE context\n"); + ret = -EINVAL; + } printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); - } else { - mce_banks_t b; - memset(&b, 0xff, sizeof(mce_banks_t)); + } else if (m->status) { printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); - machine_check_poll(0, &b); + raise_corrected(m); mce_notify_irq(); - printk(KERN_INFO "Finished machine check poll on CPU %d\n", - cpu); - } - kfree(dm); + printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); + } else + m->finished = 0; + + return ret; +} + +static void raise_mce(struct mce *m) +{ + int context = MCJ_CTX(m->inject_flags); + + inject_mce(m); + + if (context == MCJ_CTX_RANDOM) + return; + +#ifdef CONFIG_X86_LOCAL_APIC + if (m->inject_flags & MCJ_NMI_BROADCAST) { + unsigned long start; + int cpu; + get_online_cpus(); + mce_inject_cpumask = cpu_online_map; + cpu_clear(get_cpu(), mce_inject_cpumask); + for_each_online_cpu(cpu) { + struct mce *mcpu = &per_cpu(injectm, cpu); + if (!mcpu->finished || + MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) + cpu_clear(cpu, mce_inject_cpumask); + } + if (!cpus_empty(mce_inject_cpumask)) + apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); + start = jiffies; + while (!cpus_empty(mce_inject_cpumask)) { + if (!time_before(jiffies, start + 2*HZ)) { + printk(KERN_ERR + "Timeout waiting for mce inject NMI %lx\n", + *cpus_addr(mce_inject_cpumask)); + break; + } + cpu_relax(); + } + raise_local(m); + put_cpu(); + put_online_cpus(); + } else +#endif + raise_local(m); } /* Error injection interface */ static ssize_t mce_write(struct file *filp, const char __user *ubuf, size_t usize, loff_t *off) { - struct delayed_mce *dm; struct mce m; if (!capable(CAP_SYS_ADMIN)) @@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) return -EINVAL; - dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); - if (!dm) - return -ENOMEM; - /* * Need to give user space some time to set everything up, * so do it a jiffie or two later everywhere. - * Should we use a hrtimer here for better synchronization? */ - memcpy(&dm->m, &m, sizeof(struct mce)); - setup_timer(&dm->timer, raise_mce, (unsigned long)dm); - dm->timer.expires = jiffies + 2; - add_timer_on(&dm->timer, m.extcpu); + schedule_timeout(2); + raise_mce(&m); return usize; } @@ -116,6 +211,7 @@ static int inject_init(void) { printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; + register_die_notifier(&mce_raise_nb); return 0; } -- cgit v1.1 From 0dcc66851f1091af421416c28a9458836885f522 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Jul 2009 09:41:41 +0800 Subject: x86, mce: Support specifying raise mode for software MCE injection Raise mode include raising as exception or raising as poll, it is specified via the mce.inject_flags field. This can be used to specify raise mode of UCNA, which is UC error but raised not as exception. And this can be used to test the filter code of poll handler or exception handler too. For example, enforce a poll raise mode for a fatal MCE. ChangeLog: v2: - Re-base on latest x86-tip.git/mce3 Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce-inject.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8945be9..b608a64 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -44,6 +44,7 @@ #define MCJ_CTX_PROCESS 1 /* inject context: process */ #define MCJ_CTX_IRQ 2 /* inject context: IRQ */ #define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 8 /* raise as exception */ /* Fields are zero when not available */ struct mce { diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index ad5d927..7029f0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -44,7 +44,7 @@ static void inject_mce(struct mce *m) i->finished = 1; } -static void raise_corrected(struct mce *m) +static void raise_poll(struct mce *m) { unsigned long flags; mce_banks_t b; @@ -56,7 +56,7 @@ static void raise_corrected(struct mce *m) m->finished = 0; } -static void raise_uncorrected(struct mce *m, struct pt_regs *pregs) +static void raise_exception(struct mce *m, struct pt_regs *pregs) { struct pt_regs regs; unsigned long flags; @@ -85,10 +85,10 @@ static int mce_raise_notify(struct notifier_block *self, if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpu_clear(cpu, mce_inject_cpumask); - if (m->status & MCI_STATUS_UC) - raise_uncorrected(m, args->regs); + if (m->inject_flags & MCJ_EXCEPTION) + raise_exception(m, args->regs); else if (m->status) - raise_corrected(m); + raise_poll(m); return NOTIFY_STOP; } @@ -104,7 +104,7 @@ static int raise_local(struct mce *m) int ret = 0; int cpu = m->extcpu; - if (m->status & MCI_STATUS_UC) { + if (m->inject_flags & MCJ_EXCEPTION) { printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); switch (context) { case MCJ_CTX_IRQ: @@ -115,7 +115,7 @@ static int raise_local(struct mce *m) */ /*FALL THROUGH*/ case MCJ_CTX_PROCESS: - raise_uncorrected(m, NULL); + raise_exception(m, NULL); break; default: printk(KERN_INFO "Invalid MCE context\n"); @@ -124,7 +124,7 @@ static int raise_local(struct mce *m) printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); } else if (m->status) { printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); - raise_corrected(m); + raise_poll(m); mce_notify_irq(); printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); } else -- cgit v1.1 From 5be9ed251f58881dfc3dd6742a81ff9ad1a7bb04 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Jul 2009 09:41:42 +0800 Subject: x86, mce: Move debugfs mce dir creating to mce.c Because more debugfs files under mce dir will be create in mce.c. ChangeLog: v5: - Rebased on x86-tip.git/mce Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce-internal.h | 1 + arch/x86/kernel/cpu/mcheck/mce-severity.c | 4 +--- arch/x86/kernel/cpu/mcheck/mce.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 6bd51e7..32996f9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -22,6 +22,7 @@ struct mce_bank { }; int mce_severity(struct mce *a, int tolerant, char **msg); +struct dentry *mce_get_debugfs_dir(void); extern int mce_ser; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 51f7c72..bc35a07 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -197,7 +197,7 @@ static int __init severities_debugfs_init(void) { struct dentry *dmce = NULL, *fseverities_coverage = NULL; - dmce = debugfs_create_dir("mce", NULL); + dmce = mce_get_debugfs_dir(); if (dmce == NULL) goto err_out; fseverities_coverage = debugfs_create_file("severities-coverage", @@ -209,8 +209,6 @@ static int __init severities_debugfs_init(void) return 0; err_out: - if (dmce) - debugfs_remove(dmce); return -ENOMEM; } late_initcall(severities_debugfs_init); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1ce6db1..9c7419e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -2003,3 +2004,15 @@ static int __init mcheck_disable(char *str) return 1; } __setup("nomce", mcheck_disable); + +#ifdef CONFIG_DEBUG_FS +struct dentry *mce_get_debugfs_dir(void) +{ + static struct dentry *dmce; + + if (!dmce) + dmce = debugfs_create_dir("mce", NULL); + + return dmce; +} +#endif -- cgit v1.1 From bf783f9f7d33576815bc89f9f1856a7309ea2f17 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Jul 2009 09:41:43 +0800 Subject: x86, mce: Fake panic support for MCE testing If "fake panic" mode is turned on, just log panic message instead of go real panic. This is used for testing only, so that the test suite can check for the correct panic message and do regression testing for MCE would go panic. This patch is based on x86-tip.git/mce. ChangeLog: v5: - Rebased on x86-tip.git/mce v4: - Move config file from sysfs to debugfs Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 75 ++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9c7419e..54bd1b2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -204,6 +204,9 @@ static void print_mce_tail(void) static atomic_t mce_paniced; +static int fake_panic; +static atomic_t mce_fake_paniced; + /* Panic in progress. Enable interrupts and wait for final IPI */ static void wait_for_panic(void) { @@ -221,15 +224,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp) { int i; - /* - * Make sure only one CPU runs in machine check panic - */ - if (atomic_inc_return(&mce_paniced) > 1) - wait_for_panic(); - barrier(); + if (!fake_panic) { + /* + * Make sure only one CPU runs in machine check panic + */ + if (atomic_inc_return(&mce_paniced) > 1) + wait_for_panic(); + barrier(); - bust_spinlocks(1); - console_verbose(); + bust_spinlocks(1); + console_verbose(); + } else { + /* Don't log too much for fake panic */ + if (atomic_inc_return(&mce_fake_paniced) > 1) + return; + } print_mce_head(); /* First print corrected ones that are still unlogged */ for (i = 0; i < MCE_LOG_LEN; i++) { @@ -256,9 +265,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp) print_mce_tail(); if (exp) printk(KERN_EMERG "Machine check: %s\n", exp); - if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; - panic(msg); + if (!fake_panic) { + if (panic_timeout == 0) + panic_timeout = mce_panic_timeout; + panic(msg); + } else + printk(KERN_EMERG "Fake kernel panic: %s\n", msg); } /* Support code for software error injection */ @@ -2015,4 +2027,45 @@ struct dentry *mce_get_debugfs_dir(void) return dmce; } + +static void mce_reset(void) +{ + cpu_missing = 0; + atomic_set(&mce_fake_paniced, 0); + atomic_set(&mce_executing, 0); + atomic_set(&mce_callin, 0); + atomic_set(&global_nwo, 0); +} + +static int fake_panic_get(void *data, u64 *val) +{ + *val = fake_panic; + return 0; +} + +static int fake_panic_set(void *data, u64 val) +{ + mce_reset(); + fake_panic = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, + fake_panic_set, "%llu\n"); + +static int __init mce_debugfs_init(void) +{ + struct dentry *dmce, *ffake_panic; + + dmce = mce_get_debugfs_dir(); + if (!dmce) + return -ENOMEM; + ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, + &fake_panic_fops); + if (!ffake_panic) + return -ENOMEM; + + return 0; +} +late_initcall(mce_debugfs_init); #endif -- cgit v1.1 From 81e2d7b30d718824434725a4a24d5864a71b1d30 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 12 Aug 2009 05:45:34 -0700 Subject: x86, intel_txt: tboot.c needs arch/x86/kernel/tboot.c needs . In most configurations that ends up getting implicitly included, but not in all, causing build failures in some configurations. Reported-by: Ingo Molnar Signed-off-by: H. Peter Anvin Cc: Joseph Cihula Cc: Shane Wang --- arch/x86/kernel/tboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 263591a..1ab8012 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 00ae4064b1445524752575dd84df227c0687c99d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: rename 4k first chunk allocator to page Page size isn't always 4k depending on arch and configuration. Rename 4k first chunk allocator to page. Signed-off-by: Tejun Heo Cc: David Howells --- arch/x86/kernel/setup_percpu.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a26ff61..1e17711 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -249,21 +249,22 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k allocator + * Page allocator * - * Boring fallback 4k allocator. This allocator puts more pressure on - * PTE TLBs but other than that behaves nicely on both UMA and NUMA. + * Boring fallback 4k page allocator. This allocator puts more + * pressure on PTE TLBs but other than that behaves nicely on both UMA + * and NUMA. */ -static void __init pcpu4k_populate_pte(unsigned long addr) +static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_4k(size_t static_size) +static ssize_t __init setup_pcpu_page(size_t static_size) { - return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - pcpu_fc_alloc, pcpu_fc_free, - pcpu4k_populate_pte); + return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpup_populate_pte); } /* for explicit first chunk allocator selection */ @@ -307,7 +308,7 @@ void __init setup_per_cpu_areas(void) */ ret = -EINVAL; if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "4k")) { + if (strcmp(pcpu_chosen_alloc, "page")) { if (!strcmp(pcpu_chosen_alloc, "lpage")) ret = setup_pcpu_lpage(static_size, true); else if (!strcmp(pcpu_chosen_alloc, "embed")) @@ -317,7 +318,7 @@ void __init setup_per_cpu_areas(void) "specified\n", pcpu_chosen_alloc); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " - "falling back to 4k\n", + "falling back to page size\n", pcpu_chosen_alloc, ret); } } else { @@ -326,7 +327,7 @@ void __init setup_per_cpu_areas(void) ret = setup_pcpu_embed(static_size, false); } if (ret < 0) - ret = setup_pcpu_4k(static_size); + ret = setup_pcpu_page(static_size); if (ret < 0) panic("cannot allocate static percpu area (%zu bytes, err=%zd)", static_size, ret); -- cgit v1.1 From 08fc45806103e59a37418e84719b878f9bb32540 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: build first chunk allocators selectively There's no need to build unused first chunk allocators in. Define CONFIG_NEED_PER_CPU_*_FIRST_CHUNK and let archs enable them selectively. Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e06b2ee..f7ac272 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -150,6 +150,16 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_LPAGE_FIRST_CHUNK + def_bool y + depends on NEED_MULTIPLE_NODES + config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP -- cgit v1.1 From f58dc01ba2ca9fe3ab2ba4ca43d9c8a735cf62d8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: generalize first chunk allocator selection Now that all first chunk allocators are in mm/percpu.c, it makes sense to make generalize percpu_alloc kernel parameter. Define PCPU_FC_* and set pcpu_chosen_fc using early_param() in mm/percpu.c. Arch code can use the set value to determine which first chunk allocator to use. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1e17711..b961d99 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -267,16 +267,6 @@ static ssize_t __init setup_pcpu_page(size_t static_size) pcpup_populate_pte); } -/* for explicit first chunk allocator selection */ -static char pcpu_chosen_alloc[16] __initdata; - -static int __init percpu_alloc_setup(char *str) -{ - strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); - return 0; -} -early_param("percpu_alloc", percpu_alloc_setup); - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -307,19 +297,17 @@ void __init setup_per_cpu_areas(void) * each allocator for details. */ ret = -EINVAL; - if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "page")) { - if (!strcmp(pcpu_chosen_alloc, "lpage")) + if (pcpu_chosen_fc != PCPU_FC_AUTO) { + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + if (pcpu_chosen_fc == PCPU_FC_LPAGE) ret = setup_pcpu_lpage(static_size, true); - else if (!strcmp(pcpu_chosen_alloc, "embed")) - ret = setup_pcpu_embed(static_size, true); else - pr_warning("PERCPU: unknown allocator %s " - "specified\n", pcpu_chosen_alloc); + ret = setup_pcpu_embed(static_size, true); + if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " "falling back to page size\n", - pcpu_chosen_alloc, ret); + pcpu_fc_names[pcpu_chosen_fc], ret); } } else { ret = setup_pcpu_lpage(static_size, false); -- cgit v1.1 From 9a7737691e90d3cce0e5248f91826c50e5aa3fcf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: drop @static_size from first chunk allocators First chunk allocators assume percpu areas have been linked using one of PERCPU_*() macros and depend on __per_cpu_load symbol defined by those macros, so there isn't much point in passing in static area size explicitly when it can be easily calculated from __per_cpu_start and __per_cpu_end. Drop @static_size from all percpu first chunk allocators and helpers. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b961d99..8aad486 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,7 +157,7 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; @@ -184,8 +184,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -ENOMEM; } - ret = pcpu_lpage_build_unit_map(static_size, - PERCPU_FIRST_CHUNK_RESERVE, + ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, &dyn_size, &unit_size, PMD_SIZE, unit_map, pcpu_lpage_cpu_distance); if (ret < 0) { @@ -208,9 +207,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) } } - ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - dyn_size, unit_size, PMD_SIZE, - unit_map, nr_units, + ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + unit_size, PMD_SIZE, unit_map, nr_units, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: if (ret < 0) @@ -218,7 +216,7 @@ out_free: return ret; } #else -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -232,7 +230,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -244,7 +242,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) if (!chosen && (!cpu_has_pse || pcpu_need_numa())) return -EINVAL; - return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, reserve - PERCPU_FIRST_CHUNK_RESERVE); } @@ -260,9 +258,9 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(size_t static_size) +static ssize_t __init setup_pcpu_page(void) { - return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); } @@ -282,7 +280,6 @@ static inline void setup_percpu_segment(int cpu) void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; unsigned int cpu; unsigned long delta; size_t pcpu_unit_size; @@ -300,9 +297,9 @@ void __init setup_per_cpu_areas(void) if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(static_size, true); + ret = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(static_size, true); + ret = setup_pcpu_embed(true); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " @@ -310,15 +307,14 @@ void __init setup_per_cpu_areas(void) pcpu_fc_names[pcpu_chosen_fc], ret); } } else { - ret = setup_pcpu_lpage(static_size, false); + ret = setup_pcpu_lpage(false); if (ret < 0) - ret = setup_pcpu_embed(static_size, false); + ret = setup_pcpu_embed(false); } if (ret < 0) - ret = setup_pcpu_page(static_size); + ret = setup_pcpu_page(); if (ret < 0) - panic("cannot allocate static percpu area (%zu bytes, err=%zd)", - static_size, ret); + panic("cannot initialize percpu area (err=%zd)", ret); pcpu_unit_size = ret; -- cgit v1.1 From 3cbc85652767c38b252c8de55f9fd180b29e4c0d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: add @align to pcpu_fc_alloc_fn_t pcpu_fc_alloc_fn_t is about to see more interesting usage, add @align parameter. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8aad486..660cde1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -126,9 +126,9 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, /* * Helpers for first chunk memory allocation */ -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return pcpu_alloc_bootmem(cpu, size, size); + return pcpu_alloc_bootmem(cpu, size, align); } static void __init pcpu_fc_free(void *ptr, size_t size) -- cgit v1.1 From fd1e8a1fe2b54df6c185b4fa65f181f50b9c4d4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: introduce pcpu_alloc_info and pcpu_group_info Till now, non-linear cpu->unit map was expressed using an integer array which maps each cpu to a unit and used only by lpage allocator. Although how many units have been placed in a single contiguos area (group) is known while building unit_map, the information is lost when the result is recorded into the unit_map array. For lpage allocator, as all allocations are done by lpages and whether two adjacent lpages are in the same group or not is irrelevant, this didn't cause any problem. Non-linear cpu->unit mapping will be used for sparse embedding and this grouping information is necessary for that. This patch introduces pcpu_alloc_info which contains all the information necessary for initializing percpu allocator. pcpu_alloc_info contains array of pcpu_group_info which describes how units are grouped and mapped to cpus. pcpu_group_info also has base_offset field to specify its offset from the chunk's base address. pcpu_build_alloc_info() initializes this field as if all groups are allocated back-to-back as is currently done but this will be used to sparsely place groups. pcpu_alloc_info is a rather complex data structure which contains a flexible array which in turn points to nested cpu_map arrays. * pcpu_alloc_alloc_info() and pcpu_free_alloc_info() are provided to help dealing with pcpu_alloc_info. * pcpu_lpage_build_unit_map() is updated to build pcpu_alloc_info, generalized and renamed to pcpu_build_alloc_info(). @cpu_distance_fn may be NULL indicating that all cpus are of LOCAL_DISTANCE. * pcpul_lpage_dump_cfg() is updated to process pcpu_alloc_info, generalized and renamed to pcpu_dump_alloc_info(). It now also prints which group each alloc unit belongs to. * pcpu_setup_first_chunk() now takes pcpu_alloc_info instead of the separate parameters. All first chunk allocators are updated to use pcpu_build_alloc_info() to build alloc_info and call pcpu_setup_first_chunk() with it. This has the side effect of packing units for sparse possible cpus. ie. if cpus 0, 2 and 4 are possible, they'll be assigned unit 0, 1 and 2 instead of 0, 2 and 4. * x86 setup_pcpu_lpage() is updated to deal with alloc_info. * sparc64 setup_per_cpu_areas() is updated to build alloc_info. Although the changes made by this patch are pretty pervasive, it doesn't cause any behavior difference other than packing of sparse cpus. It mostly changes how information is passed among initialization functions and makes room for more flexibility. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/x86/kernel/setup_percpu.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 660cde1..db5f9c4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; - size_t unit_map_size, unit_size; - int *unit_map; - int nr_units; + struct pcpu_alloc_info *ai; ssize_t ret; /* on non-NUMA, embedding is better */ @@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } /* allocate and build unit_map */ - unit_map_size = nr_cpu_ids * sizeof(int); - unit_map = alloc_bootmem_nopanic(unit_map_size); - if (!unit_map) { - pr_warning("PERCPU: failed to allocate unit_map\n"); - return -ENOMEM; + ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, pcpu_lpage_cpu_distance); + if (IS_ERR(ai)) { + pr_warning("PERCPU: failed to build unit_map (%ld)\n", + PTR_ERR(ai)); + return PTR_ERR(ai); } - ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, - &dyn_size, &unit_size, PMD_SIZE, - unit_map, pcpu_lpage_cpu_distance); - if (ret < 0) { - pr_warning("PERCPU: failed to build unit_map\n"); - goto out_free; - } - nr_units = ret; - /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = nr_units * unit_size; + size_t tot_size = 0; + int group; + + for (group = 0; group < ai->nr_groups; group++) + tot_size += ai->unit_size * ai->groups[group].nr_units; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { @@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } } - ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - unit_size, PMD_SIZE, unit_map, nr_units, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, + pcpul_map); out_free: - if (ret < 0) - free_bootmem(__pa(unit_map), unit_map_size); + pcpu_free_alloc_info(ai); return ret; } #else -- cgit v1.1 From fb435d5233f8b6f9b93c11d6304d8e98fed03234 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: add pcpu_unit_offsets[] Currently units are mapped sequentially into address space. This patch adds pcpu_unit_offsets[] which allows units to be mapped to arbitrary offsets from the chunk base address. This is necessary to allow sparse embedding which might would need to allocate address ranges and memory areas which aren't aligned to unit size but allocation atom size (page or large page size). This also simplifies things a bit by removing the need to calculate offset from unit number. With this change, there's no need for the arch code to know pcpu_unit_size. Update pcpu_setup_first_chunk() and first chunk allocators to return regular 0 or -errno return code instead of unit size or -errno. Signed-off-by: Tejun Heo Cc: David S. Miller --- arch/x86/kernel/setup_percpu.c | 51 +++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index db5f9c4..9becc5d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,12 +157,12 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; struct pcpu_alloc_info *ai; - ssize_t ret; + int rc; /* on non-NUMA, embedding is better */ if (!chosen && !pcpu_need_numa()) @@ -196,19 +196,18 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - ret = -EINVAL; + rc = -EINVAL; goto out_free; } } - ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, - pcpul_map); + rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: pcpu_free_alloc_info(ai); - return ret; + return rc; } #else -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -222,7 +221,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(bool chosen) +static int __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -250,7 +249,7 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(void) +static int __init setup_pcpu_page(void) { return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, @@ -274,8 +273,7 @@ void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; - size_t pcpu_unit_size; - ssize_t ret; + int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -285,36 +283,33 @@ void __init setup_per_cpu_areas(void) * of large page mappings. Please read comments on top of * each allocator for details. */ - ret = -EINVAL; + rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(true); + rc = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(true); + rc = setup_pcpu_embed(true); - if (ret < 0) - pr_warning("PERCPU: %s allocator failed (%zd), " + if (rc < 0) + pr_warning("PERCPU: %s allocator failed (%d), " "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], ret); + pcpu_fc_names[pcpu_chosen_fc], rc); } } else { - ret = setup_pcpu_lpage(false); - if (ret < 0) - ret = setup_pcpu_embed(false); + rc = setup_pcpu_lpage(false); + if (rc < 0) + rc = setup_pcpu_embed(false); } - if (ret < 0) - ret = setup_pcpu_page(); - if (ret < 0) - panic("cannot initialize percpu area (err=%zd)", ret); - - pcpu_unit_size = ret; + if (rc < 0) + rc = setup_pcpu_page(); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = - delta + pcpu_unit_map[cpu] * pcpu_unit_size; + per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); -- cgit v1.1 From c8826dd538602d730ed2c18c6753f1bbfa6c4933 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: percpu: update embedding first chunk allocator to handle sparse units Now that percpu core can handle very sparse units, given that vmalloc space is large enough, embedding first chunk allocator can use any memory to build the first chunk. This patch teaches pcpu_embed_first_chunk() about distances between cpus and to use alloc/free callbacks to allocate node specific areas for each group and use them for the first chunk. This brings the benefits of embedding allocator to NUMA configurations - no extra TLB pressure with the flexibility of unified dynamic allocator and no need to restructure arch code to build memory layout suitable for percpu. With units put into atom_size aligned groups according to cpu distances, using large page for dynamic chunks is also easily possible with falling back to reuglar pages if large allocation fails. Embedding allocator users are converted to specify NULL cpu_distance_fn, so this patch doesn't cause any visible behavior difference. Following patches will convert them. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9becc5d..67f6314 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -234,7 +234,9 @@ static int __init setup_pcpu_embed(bool chosen) return -EINVAL; return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE); + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PAGE_SIZE, NULL, pcpu_fc_alloc, + pcpu_fc_free); } /* -- cgit v1.1 From 4518e6a0c038b98be4c480e6f4481e8676bd15dd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA Embedding percpu first chunk allocator can now handle very sparse unit mapping. Use embedding allocator instead of lpage for 64bit NUMA. This removes extra TLB pressure and the need to do complex and fragile dancing when changing page attributes. For 32bit, using very sparse unit mapping isn't a good idea because the vmalloc space is very constrained. 32bit NUMA machines aren't exactly the focus of optimization and it isn't very clear whether lpage performs better than page. Use page first chunk allocator for 32bit NUMAs. As this leaves setup_pcpu_*() functions pretty much empty, fold them into setup_per_cpu_areas(). Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Andi Kleen --- arch/x86/Kconfig | 4 -- arch/x86/kernel/setup_percpu.c | 155 ++++++++--------------------------------- 2 files changed, 28 insertions(+), 131 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f7ac272..869d7d3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -156,10 +156,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config NEED_PER_CPU_PAGE_FIRST_CHUNK def_bool y -config NEED_PER_CPU_LPAGE_FIRST_CHUNK - def_bool y - depends on NEED_MULTIPLE_NODES - config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 67f6314..d559af9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset); #define PERCPU_FIRST_CHUNK_RESERVE 0 #endif +#ifdef CONFIG_X86_32 /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void) #endif return false; } +#endif /** * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu @@ -136,128 +138,23 @@ static void __init pcpu_fc_free(void *ptr, size_t size) free_bootmem(__pa(ptr), size); } -/* - * Large page remapping allocator - */ -#ifdef CONFIG_NEED_MULTIPLE_NODES -static void __init pcpul_map(void *ptr, size_t size, void *addr) -{ - pmd_t *pmd, pmd_v; - - pmd = populate_extra_pmd((unsigned long)addr); - pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); - set_pmd(pmd, pmd_v); -} - -static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { +#ifdef CONFIG_NEED_MULTIPLE_NODES if (early_cpu_to_node(from) == early_cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; -} - -static int __init setup_pcpu_lpage(bool chosen) -{ - size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; - size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; - struct pcpu_alloc_info *ai; - int rc; - - /* on non-NUMA, embedding is better */ - if (!chosen && !pcpu_need_numa()) - return -EINVAL; - - /* need PSE */ - if (!cpu_has_pse) { - pr_warning("PERCPU: lpage allocator requires PSE\n"); - return -EINVAL; - } - - /* allocate and build unit_map */ - ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, pcpu_lpage_cpu_distance); - if (IS_ERR(ai)) { - pr_warning("PERCPU: failed to build unit_map (%ld)\n", - PTR_ERR(ai)); - return PTR_ERR(ai); - } - - /* do the parameters look okay? */ - if (!chosen) { - size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = 0; - int group; - - for (group = 0; group < ai->nr_groups; group++) - tot_size += ai->unit_size * ai->groups[group].nr_units; - - /* don't consume more than 20% of vmalloc area */ - if (tot_size > vm_size / 5) { - pr_info("PERCPU: too large chunk size %zuMB for " - "large page remap\n", tot_size >> 20); - rc = -EINVAL; - goto out_free; - } - } - - rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); -out_free: - pcpu_free_alloc_info(ai); - return rc; -} #else -static int __init setup_pcpu_lpage(bool chosen) -{ - return -EINVAL; -} + return LOCAL_DISTANCE; #endif - -/* - * Embedding allocator - * - * The first chunk is sized to just contain the static area plus - * module and dynamic reserves and embedded into linear physical - * mapping so that it can use PMD mapping without additional TLB - * pressure. - */ -static int __init setup_pcpu_embed(bool chosen) -{ - size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; - - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, embedding allocation doesn't play well with - * NUMA. - */ - if (!chosen && (!cpu_has_pse || pcpu_need_numa())) - return -EINVAL; - - return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, - PAGE_SIZE, NULL, pcpu_fc_alloc, - pcpu_fc_free); } -/* - * Page allocator - * - * Boring fallback 4k page allocator. This allocator puts more - * pressure on PTE TLBs but other than that behaves nicely on both UMA - * and NUMA. - */ static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); } -static int __init setup_pcpu_page(void) -{ - return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - pcpu_fc_alloc, pcpu_fc_free, - pcpup_populate_pte); -} - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -281,30 +178,34 @@ void __init setup_per_cpu_areas(void) NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* - * Allocate percpu area. If PSE is supported, try to make use - * of large page mappings. Please read comments on top of - * each allocator for details. + * Allocate percpu area. Embedding allocator is our favorite; + * however, on NUMA configurations, it can result in very + * sparse unit mapping and vmalloc area isn't spacious enough + * on 32bit. Use page in that case. */ +#ifdef CONFIG_X86_32 + if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) + pcpu_chosen_fc = PCPU_FC_PAGE; +#endif rc = -EINVAL; - if (pcpu_chosen_fc != PCPU_FC_AUTO) { - if (pcpu_chosen_fc != PCPU_FC_PAGE) { - if (pcpu_chosen_fc == PCPU_FC_LPAGE) - rc = setup_pcpu_lpage(true); - else - rc = setup_pcpu_embed(true); - - if (rc < 0) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); - } - } else { - rc = setup_pcpu_lpage(false); + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; + const size_t dyn_size = PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + + rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, + dyn_size, atom_size, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) - rc = setup_pcpu_embed(false); + pr_warning("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) - rc = setup_pcpu_page(); + rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpup_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); -- cgit v1.1 From e933a73f48e3b2d40cfa56d81e2646f194b5a66a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:53 +0900 Subject: percpu: kill lpage first chunk allocator With x86 converted to embedding allocator, lpage doesn't have any user left. Kill it along with cpa handling code. Signed-off-by: Tejun Heo Cc: Jan Beulich --- arch/x86/mm/pageattr.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f..f53cfc7 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr, remapped; + unsigned long vaddr; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif - /* - * If the PMD page was partially used for per-cpu remapping, - * the recycled area needs to be split and modified. Because - * the area is always proper subset of a PMD page - * cpa->numpages is guaranteed to be 1 for these areas, so - * there's no need to loop over and check for further remaps. - */ - remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); - if (remapped) { - WARN_ON(cpa->numpages > 1); - alias_cpa = *cpa; - alias_cpa.vaddr = &remapped; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - return 0; } -- cgit v1.1 From 58c41d28259c246dbc11358d85d332dc20ccd57b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 14 Aug 2009 12:14:19 -0700 Subject: x86, intel_txt: Factor out the code for S3 setup S3 sleep requires special setup in tboot. However, the data structures needed to do such setup are only available if CONFIG_ACPI_SLEEP is enabled. Abstract them out as much as possible, so we can have a single tboot_setup_sleep() which either is a proper implementation or a stub which simply calls BUG(). Signed-off-by: H. Peter Anvin Acked-by: Shane Wang Cc: Joseph Cihula --- arch/x86/kernel/tboot.c | 53 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 1ab8012..a183bef 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -164,25 +165,51 @@ void tboot_create_trampoline(void) map_base = PFN_DOWN(tboot->tboot_base); map_size = PFN_UP(tboot->tboot_size); if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) - panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", map_base, map_size); + panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", + map_base, map_size); } -static void set_mac_regions(void) +#ifdef CONFIG_ACPI_SLEEP + +static void add_mac_region(phys_addr_t start, unsigned long size) { - tboot->num_mac_regions = 3; + struct tboot_mac_region *mr; + phys_addr_t end = start + size; + + if (start && size) { + mr = &tboot->mac_regions[tboot->num_mac_regions++]; + mr->start = round_down(start, PAGE_SIZE); + mr->size = round_up(end, PAGE_SIZE) - mr->start; + } +} + +static int tboot_setup_sleep(void) +{ + tboot->num_mac_regions = 0; + /* S3 resume code */ - tboot->mac_regions[0].start = PFN_PHYS(PFN_DOWN(acpi_wakeup_address)); - tboot->mac_regions[0].size = PFN_UP(WAKEUP_SIZE) << PAGE_SHIFT; + add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); /* AP trampoline code */ - tboot->mac_regions[1].start = - PFN_PHYS(PFN_DOWN(virt_to_phys(trampoline_base))); - tboot->mac_regions[1].size = PFN_UP(TRAMPOLINE_SIZE) << PAGE_SHIFT; + add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); /* kernel code + data + bss */ - tboot->mac_regions[2].start = PFN_PHYS(PFN_DOWN(virt_to_phys(&_text))); - tboot->mac_regions[2].size = PFN_PHYS(PFN_UP(virt_to_phys(&_end))) - - PFN_PHYS(PFN_DOWN(virt_to_phys(&_text))); + add_mac_region(virt_to_phys(_text), _end - _text); + + tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; + + return 0; } +#else /* no CONFIG_ACPI_SLEEP */ + +static int tboot_setup_sleep(void) +{ + /* S3 shutdown requested, but S3 not supported by the kernel... */ + BUG(); + return -1; +} + +#endif + void tboot_shutdown(u32 shutdown_type) { void (*shutdown)(void); @@ -200,7 +227,8 @@ void tboot_shutdown(u32 shutdown_type) /* if this is S3 then set regions to MAC */ if (shutdown_type == TB_SHUTDOWN_S3) - set_mac_regions(); + if (tboot_setup_sleep()) + return; tboot->shutdown_type = shutdown_type; @@ -253,7 +281,6 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; /* we always use the 32b wakeup vector */ tboot->acpi_sinfo.vector_width = 32; - tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; if (sleep_state >= ACPI_S_STATE_COUNT || acpi_shutdown_map[sleep_state] == -1) { -- cgit v1.1 From 1be396794897f80bfc8774719ba60309a9e3d374 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:20 +0200 Subject: timekeeping: Move reset of cycle_last for tsc clocksource to tsc change_clocksource resets the cycle_last value to zero then sets it to a value read from the clocksource. The reset to zero is required only for the TSC clocksource to make the read_tsc function work after a resume. The reason is that the TSC read function uses cycle_last to detect backwards going TSCs. In the resume case cycle_last contains the TSC value from the last update before the suspend. On resume the TSC starts counting from 0 again and would trip over the cycle_last comparison. This is subtle and surprising. Move the reset to a resume function in the tsc code. Signed-off-by: Martin Schwidefsky Acked-by: Thomas Gleixner Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.142191175@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368..9684254 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -744,10 +744,16 @@ static cycle_t __vsyscall_fn vread_tsc(void) } #endif +static void resume_tsc(void) +{ + clocksource_tsc.cycle_last = 0; +} + static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, + .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .shift = 22, .flags = CLOCK_SOURCE_IS_CONTINUOUS | -- cgit v1.1 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/rtc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b2..bf67dcb 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) -- cgit v1.1 From 62a3207b8cf3de35368cdc3822b30b82d59eea95 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Aug 2009 11:16:16 -0700 Subject: x86, intel_txt: Handle ACPI_SLEEP without X86_TRAMPOLINE On 32 bits, we can have CONFIG_ACPI_SLEEP set without implying CONFIG_X86_TRAMPOLINE. In that case, we simply do not need to mark the trampoline as a MAC region. Signed-off-by: H. Peter Anvin Cc: Shane Wang Cc: Joseph Cihula --- arch/x86/kernel/tboot.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a183bef..c2e760c 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -189,8 +189,12 @@ static int tboot_setup_sleep(void) /* S3 resume code */ add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); + +#ifdef CONFIG_X86_TRAMPOLINE /* AP trampoline code */ add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); +#endif + /* kernel code + data + bss */ add_mac_region(virt_to_phys(_text), _end - _text); -- cgit v1.1 From b7f42ab2e237f08a5bbcefa17473e80eb05e725c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 17 Aug 2009 11:19:40 -0700 Subject: x86, apic: Move dmar_table_init() out of enable_IR() On an x2apic system, we got: [ 1.818072] ------------[ cut here ]------------ [ 1.820376] WARNING: at kernel/lockdep.c:2461 lockdep_trace_alloc+0xa5/0xe9() [ 1.835282] Hardware name: ASSY, [ 1.839006] Modules linked in: [ 1.841253] Pid: 1, comm: swapper Not tainted 2.6.31-rc5-tip-03926-g39aaa80-dirty #510 [ 1.858056] Call Trace: [ 1.859913] [] ? lockdep_trace_alloc+0xa5/0xe9 [ 1.876270] [] warn_slowpath_common+0x8d/0xd0 [ 1.879132] [] warn_slowpath_null+0x27/0x3d [ 1.896823] [] lockdep_trace_alloc+0xa5/0xe9 [ 1.900659] [] ? lock_release_holdtime+0x2f/0x199 [ 1.917188] [] kmem_cache_alloc_notrace+0x42/0x111 [ 1.922320] [] ? reserve_memtype+0x152/0x518 [ 1.938137] [] ? pat_pagerange_is_ram+0x4a/0x91 [ 1.941730] [] reserve_memtype+0x152/0x518 [ 1.958115] [] __ioremap_caller+0x1dd/0x30f [ 1.975507] [] ? acpi_os_map_memory+0x2a/0x47 [ 1.978987] [] ioremap_nocache+0x2a/0x40 [ 2.031400] [] ? trace_hardirqs_off+0x20/0x36 [ 2.036096] [] acpi_os_map_memory+0x2a/0x47 [ 2.046263] [] acpi_tb_verify_table+0x3d/0x85 [ 2.050349] [] ? _spin_unlock_irqrestore+0x50/0x76 [ 2.067327] [] acpi_get_table_with_size+0x64/0xd9 [ 2.070860] [] ? _spin_unlock_irqrestore+0x50/0x76 [ 2.088000] [] dmar_table_detect+0x33/0x70 [ 2.092047] [] dmar_table_init+0x43/0x428 [ 2.106854] [] enable_IR+0x1c/0x8d [ 2.110256] [] enable_IR_x2apic+0x7c/0x19e [ 2.127139] [] native_smp_prepare_cpus+0x139/0x3b8 [ 2.145175] [] kernel_init+0x71/0x1da [ 2.148913] [] child_rip+0xa/0x20 [ 2.152349] [] ? restore_args+0x0/0x30 [ 2.167931] [] ? kernel_init+0x0/0x1da [ 2.171671] [] ? child_rip+0x0/0x20 [ 2.187607] ---[ end trace a7919e7f17c0a725 ]--- Venkatesh Pallipadi said: | Looks like the problem started with this commit | | commit ce69a784504222c3ab6f1b3c357d09ec5772127a | Author: Gleb Natapov | Date: Mon Jul 20 15:24:17 2009 +0300 | | x86/apic: Enable x2APIC without interrupt remapping under KVM | | Before this commit, dmar_table_init() was getting called | with interrupts enabled and after this commit, it is getting | called with interrupts disabled. so try to move out dmar_table_init out of that function. Analyzed-by: Venkatesh Pallipadi Signed-off-by: Yinghai Lu Cc: Peter Zijlstra Cc: Gleb Natapov Cc: Suresh Siddha Cc: "Pallipadi, Venkatesh" LKML-Reference: <4A899F3C.2050104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index de039fc..3fc3a6c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1365,14 +1365,6 @@ void enable_x2apic(void) int __init enable_IR(void) { #ifdef CONFIG_INTR_REMAP - int ret; - - ret = dmar_table_init(); - if (ret) { - pr_debug("dmar_table_init() failed with %d:\n", ret); - return 0; - } - if (!intr_remapping_supported()) { pr_debug("intr-remapping not supported\n"); return 0; @@ -1400,6 +1392,14 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; + int dmar_table_init_ret = 0; + +#ifdef CONFIG_INTR_REMAP + dmar_table_init_ret = dmar_table_init(); + if (dmar_table_init_ret) + pr_debug("dmar_table_init() failed with %d:\n", + dmar_table_init_ret); +#endif ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { @@ -1417,7 +1417,11 @@ void __init enable_IR_x2apic(void) mask_8259A(); mask_IO_APIC_setup(ioapic_entries); - ret = enable_IR(); + if (dmar_table_init_ret) + ret = 0; + else + ret = enable_IR(); + if (!ret) { /* IR is required if there is APIC ID > 255 even when running * under KVM -- cgit v1.1 From 8126dec32738421afa362114337331337b4be17f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 20 Aug 2009 20:23:11 +0800 Subject: x86: Fix system crash when loading with "reservetop" parameter The system will die if the kernel is booted with "reservetop" parameter, in present code, parse "reservetop" parameter after early_ioremap_init(), and some function still use early_ioremap() after it. The problem is, "reservetop" parameter can modify 'FIXADDR_TOP', then the virtual address got by early_ioremap() is base on old 'FIXADDR_TOP', but the page mapping is base on new 'FIXADDR_TOP', it will occur page fault, and the IDT is not prepare yet, so, the system is dead. So, put parse_early_param() in the front of early_ioremap_init() in this patch. Signed-off-by: Xiao Guangrong Cc: yinghai@kernel.org Cc: Andrew Morton LKML-Reference: <4A8D402F.4080805@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 63f32d2..02643cc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -711,6 +711,11 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); + /* VMI may relocate the fixmap; do this before touching ioremap area */ vmi_init(); @@ -793,11 +798,6 @@ void __init setup_arch(char **cmdline_p) #endif #endif - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - parse_early_param(); - #ifdef CONFIG_X86_64 check_efer(); #endif -- cgit v1.1 From 3e0e1e9c5a327d4dba8490d83ef55c0564e6e8a7 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 21 Aug 2009 04:34:45 -0400 Subject: x86: Fix an incorrect argument of reserve_bootmem() This line looks suspicious, because if this is true, then the 'flags' parameter of function reserve_bootmem_generic() will be unused when !CONFIG_NUMA. I don't think this is what we want. Signed-off-by: WANG Cong Cc: Yinghai Lu Cc: akpm@linux-foundation.org LKML-Reference: <20090821083709.5098.52505.sendpatchset@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6176fe8..ea56b8c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -796,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, return ret; #else - reserve_bootmem(phys, len, BOOTMEM_DEFAULT); + reserve_bootmem(phys, len, flags); #endif if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { -- cgit v1.1 From 8cab02dc3c58a12235c6d463ce684dded9696848 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 18:19:45 +0200 Subject: x86: Do not unregister PIT clocksource on PIT oneshot setup/shutdown This basically reverts commit 1a0c009ac (x86: unregister PIT clocksource when PIT is disabled) because the problem which was tried to address with that patch has been solved by commit 3f68535ada (clocksource: sanity check sysfs clocksource changes). The problem addressed by the original patch is that PIT could be selected as clocksource after the system switched the PIT off or set the PIT into one shot mode which would result in complete timekeeping wreckage. Now with the sysfs sanity check in place PIT cannot be selected again when the system is in oneshot mode. The system will not switch to one shot mode as long as PIT is installed because PIT is not suitable for one shot. The shutdown case which happens when the lapic timer is installed is covered by the fact that init_pit_clocksource() is called after the lapic timer take over and then does not install the PIT clocksource at all. We should have done the sanity checks back then, but ... This also solves the locking problem which was reported vs. the clocksource rework. LKML-Reference: Cc: Martin Schwidefsky Cc: john stultz Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i8253.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cf36c0..23c1679 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -19,12 +19,6 @@ DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -#ifdef CONFIG_X86_32 -static void pit_disable_clocksource(void); -#else -static inline void pit_disable_clocksource(void) { } -#endif - /* * HPET replaces the PIT, when enabled. So we need to know, which of * the two timers is used @@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode, outb_pit(0, PIT_CH0); outb_pit(0, PIT_CH0); } - pit_disable_clocksource(); break; case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ - pit_disable_clocksource(); outb_pit(0x38, PIT_MODE); break; @@ -200,17 +192,6 @@ static struct clocksource pit_cs = { .shift = 20, }; -static void pit_disable_clocksource(void) -{ - /* - * Use mult to check whether it is registered or not - */ - if (pit_cs.mult) { - clocksource_unregister(&pit_cs); - pit_cs.mult = 0; - } -} - static int __init init_pit_clocksource(void) { /* -- cgit v1.1 From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 19 Aug 2009 19:13:34 -0700 Subject: time: Introduce CLOCK_REALTIME_COARSE After talking with some application writers who want very fast, but not fine-grained timestamps, I decided to try to implement new clock_ids to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE which returns the time at the last tick. This is very fast as we don't have to access any hardware (which can be very painful if you're using something like the acpi_pm clocksource), and we can even use the vdso clock_gettime() method to avoid the syscall. The only trade off is you only get low-res tick grained time resolution. This isn't a new idea, I know Ingo has a patch in the -rt tree that made the vsyscall gettimeofday() return coarse grained time when the vsyscall64 sysctrl was set to 2. However this affects all applications on a system. With this method, applications can choose the proper speed/granularity trade-off for themselves. Signed-off-by: John Stultz Cc: Andi Kleen Cc: nikolag@ca.ibm.com Cc: Darren Hart Cc: arjan@infradead.org Cc: jonathan@jonmasters.org LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vgtod.h | 1 + arch/x86/kernel/vsyscall_64.c | 1 + arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69..3d61e20 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -21,6 +21,7 @@ struct vsyscall_gtod_data { u32 shift; } clock; struct timespec wall_to_monotonic; + struct timespec wall_time_coarse; }; extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a..cf53a78 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78..ee55754 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) return 0; } +notrace static noinline int do_realtime_coarse(struct timespec *ts) +{ + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_coarse.tv_sec; + ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + return 0; +} + +notrace static noinline int do_monotonic_coarse(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_coarse.tv_sec; + ns = gtod->wall_time_coarse.tv_nsec; + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + if (likely(gtod->sysctl_enabled)) switch (clock) { case CLOCK_REALTIME: - return do_realtime(ts); + if (likely(gtod->clock.vread)) + return do_realtime(ts); + break; case CLOCK_MONOTONIC: - return do_monotonic(ts); + if (likely(gtod->clock.vread)) + return do_monotonic(ts); + break; + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(ts); } return vdso_fallback_gettime(clock, ts); } -- cgit v1.1 From 8b5a10fc6fd02289ea03480f93382b1a99006142 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 19 Aug 2009 08:40:48 +0100 Subject: x86: properly annotate alternatives.c Some of the NOPs tables aren't used on 64-bits, quite some code and data is needed post-init for module loading only, and a couple of functions aren't used outside that file (i.e. can be static, and don't need to be exported). The change to __INITDATA/__INITRODATA is needed to avoid an assembler warning. Signed-off-by: Jan Beulich LKML-Reference: <4A8BC8A00200007800010823@vpn.id2.novell.com> Acked-by: Sam Ravnborg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 7 ----- arch/x86/kernel/alternative.c | 56 ++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1a37bcd..c240efc 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} #endif /* CONFIG_SMP */ -const unsigned char *const *find_nop_table(void); - /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ \ @@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif -extern void add_nops(void *insns, unsigned int len); - /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. @@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len); * Intel's errata. * On the local CPU you need to be protected again NMI or MCE handlers seeing an * inconsistent instruction while you patch. - * The _early version expects the memory to already be RW. */ - extern void *text_poke(void *addr, const void *opcode, size_t len); -extern void *text_poke_early(void *addr, const void *opcode, size_t len); #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f576587..4869351 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly); #define smp_alt_once 1 #endif -static int debug_alternative; +static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) { @@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str) __setup("noreplace-smp", setup_noreplace_smp); #ifdef CONFIG_PARAVIRT -static int noreplace_paravirt = 0; +static int __initdata_or_module noreplace_paravirt = 0; static int __init setup_noreplace_paravirt(char *str) { @@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); #define DPRINTK(fmt, args...) if (debug_alternative) \ printk(KERN_DEBUG fmt, args) -#ifdef GENERIC_NOP1 +#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ -asm("\t.section .rodata, \"a\"\nintelnops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8 "\t.previous"); extern const unsigned char intelnops[]; -static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, intelnops + 1, @@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { #endif #ifdef K8_NOP1 -asm("\t.section .rodata, \"a\"\nk8nops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 K8_NOP7 K8_NOP8 "\t.previous"); extern const unsigned char k8nops[]; -static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, k8nops + 1, @@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { }; #endif -#ifdef K7_NOP1 -asm("\t.section .rodata, \"a\"\nk7nops: " +#if defined(K7_NOP1) && !defined(CONFIG_X86_64) +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 K7_NOP7 K7_NOP8 "\t.previous"); extern const unsigned char k7nops[]; -static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, k7nops + 1, @@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { #endif #ifdef P6_NOP1 -asm("\t.section .rodata, \"a\"\np6nops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 P6_NOP7 P6_NOP8 "\t.previous"); extern const unsigned char p6nops[]; -static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +p6_nops[ASM_NOP_MAX+1] = { NULL, p6nops, p6nops + 1, @@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { #ifdef CONFIG_X86_64 extern char __vsyscall_0; -const unsigned char *const *find_nop_table(void) +static const unsigned char *const *__init_or_module find_nop_table(void) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_has(X86_FEATURE_NOPL)) @@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void) #else /* CONFIG_X86_64 */ -const unsigned char *const *find_nop_table(void) +static const unsigned char *const *__init_or_module find_nop_table(void) { if (boot_cpu_has(X86_FEATURE_K8)) return k8_nops; @@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void) #endif /* CONFIG_X86_64 */ /* Use this to add nops to a buffer, then text_poke the whole buffer. */ -void add_nops(void *insns, unsigned int len) +static void __init_or_module add_nops(void *insns, unsigned int len) { const unsigned char *const *noptable = find_nop_table(); @@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len) len -= noplen; } } -EXPORT_SYMBOL_GPL(add_nops); extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; +static void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[]; APs have less capabilities than the boot processor are not handled. Tough. Make sure you disable such features by hand. */ -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) { struct alt_instr *a; char insnbuf[MAX_PATCH_LEN]; @@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); static int smp_mode = 1; /* protected by smp_alt */ -void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) +void __init_or_module alternatives_smp_module_add(struct module *mod, + char *name, + void *locks, void *locks_end, + void *text, void *text_end) { struct smp_alt_module *smp; @@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name, mutex_unlock(&smp_alt); } -void alternatives_smp_module_del(struct module *mod) +void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; @@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp) #endif #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) +void __init_or_module apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) { struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; @@ -485,7 +492,8 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -void *text_poke_early(void *addr, const void *opcode, size_t len) +static void *__init_or_module text_poke_early(void *addr, const void *opcode, + size_t len) { unsigned long flags; local_irq_save(flags); -- cgit v1.1 From d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 19 Aug 2009 18:05:36 -0700 Subject: x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init SDM Vol 3a section titled "MTRR considerations in MP systems" specifies the need for synchronizing the logical cpu's while initializing/updating MTRR. Currently Linux kernel does the synchronization of all cpu's only when a single MTRR register is programmed/updated. During an AP online (during boot/cpu-online/resume) where we initialize all the MTRR/PAT registers, we don't follow this synchronization algorithm. This can lead to scenarios where during a dynamic cpu online, that logical cpu is initializing MTRR/PAT with cache disabled (cr0.cd=1) etc while other logical HT sibling continue to run (also with cache disabled because of cr0.cd=1 on its sibling). Starting from Westmere, VMX transitions with cr0.cd=1 don't work properly (because of some VMX performance optimizations) and the above scenario (with one logical cpu doing VMX activity and another logical cpu coming online) can result in system crash. Fix the MTRR initialization by doing rendezvous of all the cpus. During boot and resume, we delay the MTRR/PAT init for APs till all the logical cpu's come online and the rendezvous process at the end of AP's bringup, will initialize the MTRR/PAT for all AP's. For dynamic single cpu online, we synchronize all the logical cpus and do the MTRR/PAT init on the AP that is coming online. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mtrr.h | 7 +++++++ arch/x86/kernel/cpu/mtrr/main.c | 46 +++++++++++++++++++++++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +++++++++++++ arch/x86/power/cpu.c | 2 +- 4 files changed, 59 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a51ada8..d5366ec 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -121,8 +121,12 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); +extern void set_mtrr_aps_delayed_init(void); +extern void mtrr_aps_init(void); +extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); +extern u32 mtrr_aps_delayed_init; # else static inline u8 mtrr_type_lookup(u64 addr, u64 end) { @@ -161,6 +165,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) +#define set_mtrr_aps_delayed_init() do {} while (0) +#define mtrr_aps_init() do {} while (0) +#define mtrr_bp_restore() do {} while (0) # endif #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7af0f88..7339be0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; +u32 mtrr_aps_delayed_init; static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; @@ -163,7 +164,10 @@ static void ipi_handler(void *info) if (data->smp_reg != ~0U) { mtrr_if->set(data->smp_reg, data->smp_base, data->smp_size, data->smp_type); - } else { + } else if (mtrr_aps_delayed_init) { + /* + * Initialize the MTRRs inaddition to the synchronisation. + */ mtrr_if->set_all(); } @@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ */ if (reg != ~0U) mtrr_if->set(reg, base, size, type); + else if (!mtrr_aps_delayed_init) + mtrr_if->set_all(); /* Wait for the others */ while (atomic_read(&data.count)) @@ -721,9 +727,7 @@ void __init mtrr_bp_init(void) void mtrr_ap_init(void) { - unsigned long flags; - - if (!mtrr_if || !use_intel()) + if (!use_intel() || mtrr_aps_delayed_init) return; /* * Ideally we should hold mtrr_mutex here to avoid mtrr entries @@ -738,11 +742,7 @@ void mtrr_ap_init(void) * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug * lock to prevent mtrr entry changes */ - local_irq_save(flags); - - mtrr_if->set_all(); - - local_irq_restore(flags); + set_mtrr(~0U, 0, 0, 0); } /** @@ -753,6 +753,34 @@ void mtrr_save_state(void) smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); } +void set_mtrr_aps_delayed_init(void) +{ + if (!use_intel()) + return; + + mtrr_aps_delayed_init = 1; +} + +/* + * MTRR initialization for all AP's + */ +void mtrr_aps_init(void) +{ + if (!use_intel()) + return; + + set_mtrr(~0U, 0, 0, 0); + mtrr_aps_delayed_init = 0; +} + +void mtrr_bp_restore(void) +{ + if (!use_intel()) + return; + + mtrr_if->set_all(); +} + static int __init mtrr_init_finialize(void) { if (!mtrr_if) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda6..d720b7e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1116,9 +1116,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (is_uv_system()) uv_system_init(); + + set_mtrr_aps_delayed_init(); out: preempt_enable(); } + +void arch_enable_nonboot_cpus_begin(void) +{ + set_mtrr_aps_delayed_init(); +} + +void arch_enable_nonboot_cpus_end(void) +{ + mtrr_aps_init(); +} + /* * Early setup to make printk work. */ @@ -1140,6 +1153,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif check_nmi_watchdog(); + mtrr_aps_init(); } static int __initdata setup_possible_cpus = -1; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index b3d20b9..417c9f5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -242,7 +242,7 @@ static void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); - mtrr_ap_init(); + mtrr_bp_restore(); #ifdef CONFIG_X86_OLD_MCE mcheck_init(&boot_cpu_data); -- cgit v1.1 From 5400743db5a06a4e6e298725a2044c40edcb27b9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Aug 2009 17:00:02 -0700 Subject: x86, mtrr: make mtrr_aps_delayed_init static bool mtr_aps_delayed_init was declared u32 and made global, but it only ever takes boolean values and is only ever used in arch/x86/kernel/cpu/mtrr/main.c. Declare it "static bool" and remove external references. Signed-off-by: H. Peter Anvin Cc: Suresh Siddha --- arch/x86/include/asm/mtrr.h | 1 - arch/x86/kernel/cpu/mtrr/main.c | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index d5366ec..4365ffd 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -126,7 +126,6 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); -extern u32 mtrr_aps_delayed_init; # else static inline u8 mtrr_type_lookup(u64 addr, u64 end) { diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7339be0..84e83de 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -58,7 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; -u32 mtrr_aps_delayed_init; +static bool mtrr_aps_delayed_init; static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; @@ -758,7 +758,7 @@ void set_mtrr_aps_delayed_init(void) if (!use_intel()) return; - mtrr_aps_delayed_init = 1; + mtrr_aps_delayed_init = true; } /* @@ -770,7 +770,7 @@ void mtrr_aps_init(void) return; set_mtrr(~0U, 0, 0, 0); - mtrr_aps_delayed_init = 0; + mtrr_aps_delayed_init = false; } void mtrr_bp_restore(void) -- cgit v1.1 From 366d19e181be873c70f4aafca3931d77d781ccd7 Mon Sep 17 00:00:00 2001 From: Tobias Doerffel Date: Fri, 21 Aug 2009 23:06:23 +0200 Subject: x86: add specific support for Intel Atom architecture Add another option when selecting CPU family so the kernel can be optimized for Intel Atom CPUs. If GCC supports tuning options for Intel Atom they will be used. Signed-off-by: Tobias Doerffel Signed-off-by: H. Peter Anvin LKML-Reference: <1251018457-19157-1-git-send-email-tobias.doerffel@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 19 ++++++++++++++----- arch/x86/Makefile | 2 ++ arch/x86/Makefile_32.cpu | 2 ++ arch/x86/include/asm/module.h | 2 ++ 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8130334..527519b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -262,6 +262,15 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) +config MATOM + bool "Intel Atom" + ---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -295,7 +304,7 @@ config X86_CPU config X86_L1_CACHE_BYTES int default "128" if MPSC - default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 + default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 config X86_INTERNODE_CACHE_BYTES int @@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y @@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM config X86_USE_3DNOW def_bool y @@ -387,7 +396,7 @@ config X86_P6_NOP config X86_TSC def_bool y - depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 + depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 config X86_CMPXCHG64 def_bool y @@ -397,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659..8a4c24c 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -55,6 +55,8 @@ else cflags-$(CONFIG_MCORE2) += \ $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 80177ec..30e9a26 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -33,6 +33,8 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-f cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) # AMD Elan support cflags-$(CONFIG_X86_ELAN) += -march=i486 diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 47d6274..e959c4a 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -28,6 +28,8 @@ struct mod_arch_specific {}; #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MATOM +#define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 #define MODULE_PROC_FAMILY "686 " #elif defined CONFIG_MPENTIUMII -- cgit v1.1 From 10f02d1168585edf66229bb2ec90a42f32667a78 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 23 Aug 2009 23:17:27 +0400 Subject: x86: uv: Clean up uv_ptc_init(), use proc_create() create_proc_entry() is getting duhprecated. Signed-off-by: Alexey Dobriyan Cc: cpw@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 77b9689..503c1f2 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -640,13 +640,13 @@ static int __init uv_ptc_init(void) if (!is_uv_system()) return 0; - proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); + proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, + &proc_uv_ptc_operations); if (!proc_uv_ptc) { printk(KERN_ERR "unable to create %s proc entry\n", UV_PTC_BASENAME); return -EINVAL; } - proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; return 0; } -- cgit v1.1 From 005155b1f626d2b2d7932e4afdf4fead168c6888 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 25 Aug 2009 15:35:12 +0200 Subject: x86: Fix x86_model test in es7000_apic_is_cluster() For the x86_model to be greater than 6 or less than 12 is logically always true. Signed-off-by: Roel Kluin Cc: Andrew Morton Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 69328ac..420f95d 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void) { /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) + (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) return 1; return 0; -- cgit v1.1 From ab94fcf528d127fcb490175512a8910f37e5b346 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 25 Aug 2009 16:47:16 -0700 Subject: x86: allow "=rm" in native_save_fl() This is a partial revert of f1f029c7bfbf4ee1918b90a431ab823bed812504. "=rm" is allowed in this context, because "pop" is explicitly defined to adjust the stack pointer *before* it evaluates its effective address, if it has one. Thus, we do end up writing to the correct address even if we use an on-stack memory argument. The original reporter for f1f029c7bfbf4ee1918b90a431ab823bed812504 was apparently using a broken x86 simulator. [ Impact: performance ] Signed-off-by: H. Peter Anvin Cc: Gabe Black --- arch/x86/include/asm/irqflags.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c6ccbe7..9e2b952 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,14 +13,13 @@ static inline unsigned long native_save_fl(void) unsigned long flags; /* - * Note: this needs to be "=r" not "=rm", because we have the - * stack offset from what gcc expects at the time the "pop" is - * executed, and so a memory reference with respect to the stack - * would end up using the wrong address. + * "=rm" is safe here, because "pop" adjusts the stack before + * it evaluates its effective address -- this is part of the + * documented behavior of the "pop" instruction. */ asm volatile("# __raw_save_flags\n\t" "pushf ; pop %0" - : "=r" (flags) + : "=rm" (flags) : /* no input */ : "memory"); -- cgit v1.1 From 8f3e1df48baf728bbb0f242c9dff9c9d7108218a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 24 Aug 2009 21:53:36 +0400 Subject: x86, ioapic: Define IO_APIC_DEFAULT_PHYS_BASE constant We already have APIC_DEFAULT_PHYS_BASE so just to be consistent. Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090824175550.927946757@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 3 ++- arch/x86/kernel/mpparse.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 7ddb36a..7386bfa 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -8,7 +8,8 @@ * Ingo Molnar , 1999, 2000 */ -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 +#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 #define APIC_ID 0x20 diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 651c93b..fcd513b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type) MP_bus_info(&bus); } - ioapic.type = MP_IOAPIC; - ioapic.apicid = 2; - ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.flags = MPC_APIC_USABLE; - ioapic.apicaddr = 0xFEC00000; + ioapic.type = MP_IOAPIC; + ioapic.apicid = 2; + ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.flags = MPC_APIC_USABLE; + ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; MP_ioapic_info(&ioapic); /* -- cgit v1.1 From ffc438366c2660a6a811b94ba33229bf217f8254 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 24 Aug 2009 21:53:39 +0400 Subject: x86, ioapic: Get rid of needless check and simplify ioapic_setup_resources() alloc_bootmem() already panics on allocation failure. There is no need to check the result. Also there is a way to unbind global variable from its body and use it as a parameter which allow us to simplify ioapic_init_mappings as well -- "for" cycle already uses nr_ioapics as a conditional variable and there is no need to check if ioapic_setup_resources was returning NULL again. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20090824175551.493629148@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2999f3d..d836b4d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4053,7 +4053,7 @@ void __init setup_ioapic_dest(void) static struct resource *ioapic_resources; -static struct resource * __init ioapic_setup_resources(void) +static struct resource * __init ioapic_setup_resources(int nr_ioapics) { unsigned long n; struct resource *res; @@ -4069,15 +4069,13 @@ static struct resource * __init ioapic_setup_resources(void) mem = alloc_bootmem(n); res = (void *)mem; - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; + mem += sizeof(struct resource) * nr_ioapics; - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; } ioapic_resources = res; @@ -4091,7 +4089,7 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - ioapic_res = ioapic_setup_resources(); + ioapic_res = ioapic_setup_resources(nr_ioapics); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].apicaddr; @@ -4120,11 +4118,9 @@ fake_ioapic_page: __fix_to_virt(idx), ioapic_phys); idx++; - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; } } -- cgit v1.1 From 5051fd69773d2d044734b78516317a04d3774871 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 24 Aug 2009 21:53:37 +0400 Subject: x86, e820: Guard against array overflowed in __e820_add_region() Better to be paranoid against unpredicted nr_map modifications. Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090824175551.146070377@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7271fa3..2e5e0fa 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, { int x = e820x->nr_map; - if (x == ARRAY_SIZE(e820x->map)) { + if (x >= ARRAY_SIZE(e820x->map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } -- cgit v1.1 From 680b6cfd3cee30a7d997d49430fb73af84523853 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 26 Aug 2009 16:20:36 +0900 Subject: x86, mce: CE in last bank prevents panic by unknown MCE If MCE handler is called but none of mces_seen have machine check event which might signal the MCE (i.e. event higher than MCE_KEEP_SEVERITY), panic with "Machine check from unknown source" will be taken since the MCE is assumed to be signaled from external agent or so. Usually mces_seen never point MCE_KEEP_SEVERITY event such as CE. But it can happen because initial value of mces_seen is accidentally modified by mce_no_way_out() - in case if mce_no_way_out() run through all banks and the last bank has the CE, mces_seen points the CE and the "panic by unknown" will not be taken. This patch fixes this undesired behavior, and clarifies the logic. Signed-off-by: Hidetoshi Seto Cc: H. Peter Anvin Cc: Andi Kleen Cc: Jin Dongming LKML-Reference: <4A94E244.3020301@jp.fujitsu.com> Signed-off-by: Ingo Molnar Reported-by: Jin Dongming --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 54bd1b2..325559d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -612,7 +612,7 @@ out: * This way we prevent any potential data corruption in a unrecoverable case * and also makes sure always all CPU's errors are examined. * - * Also this detects the case of an machine check event coming from outer + * Also this detects the case of a machine check event coming from outer * space (not detected by any CPUs) In this case some external agent wants * us to shut down, so panic too. * @@ -665,7 +665,7 @@ static void mce_reign(void) * No machine check event found. Must be some external * source or one CPU is hung. Panic. */ - if (!m && tolerant < 3) + if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) mce_panic("Machine check from unknown source", NULL, NULL); /* @@ -889,11 +889,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - no_way_out = mce_no_way_out(&m, &msg); - final = &__get_cpu_var(mces_seen); *final = m; + no_way_out = mce_no_way_out(&m, &msg); + barrier(); /* -- cgit v1.1 From d3a247bfb2c26f5b67367d58af7ad8c2efbbc6c1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 26 Aug 2009 21:13:24 +0400 Subject: x86, apic: Slim down stack usage in early_init_lapic_mapping() As far as I see there is no external poking of mp_lapic_addr in this procedure which could lead to unpredited changes and require local storage unit for it. Lets use it plain forward. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20090826171324.GC4548@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3fc3a6c..159740d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1562,8 +1562,6 @@ no_apic: #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) { - unsigned long phys_addr; - /* * If no local APIC can be found then go out * : it means there is no mpatable and MADT @@ -1571,11 +1569,9 @@ void __init early_init_lapic_mapping(void) if (!smp_found_config) return; - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); + APIC_BASE, mp_lapic_addr); /* * Fetch the APIC ID of the BSP in case we have a -- cgit v1.1 From 5fc517466dd3d0fc6d2a5180ca6792e60344d8be Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:32 -0700 Subject: x86, pat: Keep identity maps consistent with mmaps even when pat_disabled Make reserve_memtype internally take care of pat disabled case and fallback to default return values. Remove the specific pat_disabled checks in track_* routines. Change kernel_map_sync_memtype to sync identity map even when pat_disabled. This change ensures that, even for pat_disabled case, we take care of keeping identity map in sync. Before this patch, in pat disabled case, ioremap() keeps the identity maps in sync and other APIs like pci and /dev/mem mmap don't, which is not a very consistent behavior. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e6718bb..d5af279 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -339,6 +339,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) { if (req_type == -1) *new_type = _PAGE_CACHE_WB; + else if (req_type == _PAGE_CACHE_WC) + *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; } @@ -577,7 +579,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) { unsigned long id_sz; - if (!pat_enabled || base >= __pa(high_memory)) + if (base >= __pa(high_memory)) return 0; id_sz = (__pa(high_memory) < base + size) ? @@ -677,9 +679,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - if (!pat_enabled) - return 0; - /* * For now, only handle remap_pfn_range() vmas where * is_linear_pfn_mapping() == TRUE. Handling of @@ -715,9 +714,6 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return 0; - /* * For now, only handle remap_pfn_range() vmas where * is_linear_pfn_mapping() == TRUE. Handling of @@ -743,9 +739,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return; - /* * For now, only handle remap_pfn_range() vmas where * is_linear_pfn_mapping() == TRUE. Handling of -- cgit v1.1 From 279e669b3fc0068cc3509e8e53036999e1e86588 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:33 -0700 Subject: x86, pat: ioremap to follow same PAT restrictions as other PAT users ioremap has this hard-coded check for new type and requested type. That check differs from other PAT users like /dev/mem mmap, remap_pfn_range in only one condition where requested type is UC_MINUS and new type is WC. Under that condition, ioremap fails. But other PAT interfaces succeed with a WC mapping. Change to make ioremap be in sync with other PAT APIs and use the same macro as others. Also changes the error print to KERN_ERR instead of pr_debug. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/ioremap.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8a45093..aeaea8c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -228,24 +228,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, retval = reserve_memtype(phys_addr, (u64)phys_addr + size, prot_val, &new_prot_val); if (retval) { - pr_debug("Warning: reserve_memtype returned %d\n", retval); + printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); return NULL; } if (prot_val != new_prot_val) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uc-, return cannot be write-back - * - request is uc-, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((prot_val == _PAGE_CACHE_UC_MINUS && - (new_prot_val == _PAGE_CACHE_WB || - new_prot_val == _PAGE_CACHE_WC)) || - (prot_val == _PAGE_CACHE_WC && - new_prot_val == _PAGE_CACHE_WB)) { - pr_debug( + if (!is_new_memtype_allowed(prot_val, new_prot_val)) { + printk(KERN_ERR "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), -- cgit v1.1 From 9fd126bc742f74a95d2ba610247712ff05da02fe Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:34 -0700 Subject: x86, pat: New i/f for driver to request memtype for IO regions Add new routines to request memtype for IO regions. This will currently be a backend for io_mapping_* routines. But, it can also be made available to drivers directly in future, in case it is needed. reserve interface reserves the memory, makes sure we have a compatible memory type available and keeps the identity map in sync when needed. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pat.h | 5 +++++ arch/x86/mm/pat.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 7af14e5..e2c1668 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end); extern int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flag); +int io_reserve_memtype(resource_size_t start, resource_size_t end, + unsigned long *type); + +void io_free_memtype(resource_size_t start, resource_size_t end); + #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d5af279..82d097c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -498,6 +498,55 @@ int free_memtype(u64 start, u64 end) } +/** + * io_reserve_memtype - Request a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + * @type: A pointer to memtype, with requested type. On success, requested + * or any other compatible type that was available for the region is returned + * + * On success, returns 0 + * On failure, returns non-zero + */ +int io_reserve_memtype(resource_size_t start, resource_size_t end, + unsigned long *type) +{ + unsigned long req_type = *type; + unsigned long new_type; + int ret; + + WARN_ON_ONCE(iomem_map_sanity_check(start, end - start)); + + ret = reserve_memtype(start, end, req_type, &new_type); + if (ret) + goto out_err; + + if (!is_new_memtype_allowed(req_type, new_type)) + goto out_free; + + if (kernel_map_sync_memtype(start, end - start, new_type) < 0) + goto out_free; + + *type = new_type; + return 0; + +out_free: + free_memtype(start, end); + ret = -EBUSY; +out_err: + return ret; +} + +/** + * io_free_memtype - Release a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + */ +void io_free_memtype(resource_size_t start, resource_size_t end) +{ + free_memtype(start, end); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { -- cgit v1.1 From 9e36fda0b359d2a6ae039c3d7e71a04502a77898 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:35 -0700 Subject: x86, pat: Add PAT reserve free to io_mapping* APIs io_mapping_* interfaces were added, mainly for graphics drivers. Make this interface go through the PAT reserve/free, instead of hardcoding WC mapping. This makes sure that there are no aliases due to unconditional WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/iomap.h | 9 ++++++--- arch/x86/mm/iomap_32.c | 27 +++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 0e9fe1d..f35eb45 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -26,13 +26,16 @@ #include #include -int -is_io_mapping_possible(resource_size_t base, unsigned long size); - void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); +int +iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); + +void +iomap_free(resource_size_t base, unsigned long size); + #endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84c..84e236c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,7 +21,7 @@ #include #include -int is_io_mapping_possible(resource_size_t base, unsigned long size) +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) /* There is no way to map greater than 1 << 32 address without PAE */ @@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) #endif return 1; } -EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) +{ + unsigned long flag = _PAGE_CACHE_WC; + int ret; + + if (!is_io_mapping_possible(base, size)) + return -EINVAL; + + ret = io_reserve_memtype(base, base + size, &flag); + if (ret) + return ret; + + *prot = __pgprot(__PAGE_KERNEL | flag); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_create_wc); + +void +iomap_free(resource_size_t base, unsigned long size) +{ + io_free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { -- cgit v1.1 From 335ef896d4c6639849d79367f0fef9abc06d121b Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:36 -0700 Subject: x86, pat: Add rbtree to do quick lookup in memtype tracking PAT memtype tracking uses a linear link list to keep track of IO (non-RAM) regions and their memtypes. The code used a last_accessed pointer as a cache to speedup the lookup. As per discussions with H. Peter Anvin a while back, having a rbtree here will avoid bad performances in pathological cases where we may end up with huge linked list. This may not add any noticable performance speedup in normal case as the number of entires in PAT memtype list tend to be ~20-30 range. The patch removes the "cached_entry" logic as with rbtree we have more generic way of speeding up the lookup. With this patch, we use rbtree to do the quick lookup. We still use linked list as the memtype range tracked can be of different sizes and can overlap in different ways. We also keep track of usage counts with linked list. Example: Multiple ioremaps with different sizes uncached-minus @ 0xfffff00000-0xfffff04000 uncached-minus @ 0xfffff02000-0xfffff03000 And one userlevel mmap and the thread forks a new process uncached-minus @ 0xbf453000-0xbf454000 uncached-minus @ 0xbf453000-0xbf454000 Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 106 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 82d097c..c90f242 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) * areas). All the aliases have the same cache attributes of course. * Zero attributes are represented as holes. * - * Currently the data structure is a list because the number of mappings - * are expected to be relatively small. If this should be a problem - * it could be changed to a rbtree or similar. + * The data structure is a list that is also organized as an rbtree + * sorted on the start address of memtype range. * - * memtype_lock protects the whole list. + * memtype_lock protects both the linear list and rbtree. */ struct memtype { @@ -160,11 +160,53 @@ struct memtype { u64 end; unsigned long type; struct list_head nd; + struct rb_node rb; }; +static struct rb_root memtype_rbroot = RB_ROOT; static LIST_HEAD(memtype_list); static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ +static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) +{ + struct rb_node *node = root->rb_node; + struct memtype *last_lower = NULL; + + while (node) { + struct memtype *data = container_of(node, struct memtype, rb); + + if (data->start < start) { + last_lower = data; + node = node->rb_right; + } else if (data->start > start) { + node = node->rb_left; + } else + return data; + } + + /* Will return NULL if there is no entry with its start <= start */ + return last_lower; +} + +static void memtype_rb_insert(struct rb_root *root, struct memtype *data) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct memtype *this = container_of(*new, struct memtype, rb); + + parent = *new; + if (data->start <= this->start) + new = &((*new)->rb_left); + else if (data->start > this->start) + new = &((*new)->rb_right); + } + + rb_link_node(&data->rb, parent, new); + rb_insert_color(&data->rb, root); +} + /* * Does intersection of PAT memory type and MTRR memory type and returns * the resulting memory type as PAT understands it. @@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) return -EBUSY; } -static struct memtype *cached_entry; -static u64 cached_start; - static int pat_pagerange_is_ram(unsigned long start, unsigned long end) { int ram_page = 0, not_rampage = 0; @@ -382,17 +421,19 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); - if (cached_entry && start >= cached_start) - entry = cached_entry; - else + entry = memtype_rb_search(&memtype_rbroot, new->start); + if (likely(entry != NULL)) { + /* To work correctly with list_for_each_entry_continue */ + entry = list_entry(entry->nd.prev, struct memtype, nd); + } else { entry = list_entry(&memtype_list, struct memtype, nd); + } /* Search for existing mapping that overlaps the current range */ where = NULL; list_for_each_entry_continue(entry, &memtype_list, nd) { if (end <= entry->start) { where = entry->nd.prev; - cached_entry = list_entry(where, struct memtype, nd); break; } else if (start <= entry->start) { /* end > entry->start */ err = chk_conflict(new, entry, new_type); @@ -400,8 +441,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); where = entry->nd.prev; - cached_entry = list_entry(where, - struct memtype, nd); } break; } else if (start < entry->end) { /* start > entry->start */ @@ -409,8 +448,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!err) { dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); - cached_entry = list_entry(entry->nd.prev, - struct memtype, nd); /* * Move to right position in the linked @@ -438,13 +475,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } - cached_start = start; - if (where) list_add(&new->nd, where); else list_add_tail(&new->nd, &memtype_list); + memtype_rb_insert(&memtype_rbroot, new); + spin_unlock(&memtype_lock); dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", @@ -456,7 +493,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, int free_memtype(u64 start, u64 end) { - struct memtype *entry; + struct memtype *entry, *saved_entry; int err = -EINVAL; int is_range_ram; @@ -474,17 +511,46 @@ int free_memtype(u64 start, u64 end) return -EINVAL; spin_lock(&memtype_lock); + + entry = memtype_rb_search(&memtype_rbroot, start); + if (unlikely(entry == NULL)) + goto unlock_ret; + + /* + * Saved entry points to an entry with start same or less than what + * we searched for. Now go through the list in both directions to look + * for the entry that matches with both start and end, with list stored + * in sorted start address + */ + saved_entry = entry; list_for_each_entry(entry, &memtype_list, nd) { if (entry->start == start && entry->end == end) { - if (cached_entry == entry || cached_start == start) - cached_entry = NULL; + rb_erase(&entry->rb, &memtype_rbroot); + list_del(&entry->nd); + kfree(entry); + err = 0; + break; + } else if (entry->start > start) { + break; + } + } + + if (!err) + goto unlock_ret; + entry = saved_entry; + list_for_each_entry_reverse(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + rb_erase(&entry->rb, &memtype_rbroot); list_del(&entry->nd); kfree(entry); err = 0; break; + } else if (entry->start < start) { + break; } } +unlock_ret: spin_unlock(&memtype_lock); if (err) { -- cgit v1.1 From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:37 -0700 Subject: x86, pat: Generalize the use of page flag PG_uncached Only IA64 was using PG_uncached as of now. We now intend to use this bit in x86 as well, to keep track of memory type of those addresses that have page struct for them. So, generalize the use of that bit across ia64 and x86. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f722..8e15953 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1414,6 +1414,10 @@ config X86_PAT If unsure, say Y. +config ARCH_USES_PG_UNCACHED + def_bool y + depends on X86_PAT + config EFI bool "EFI runtime service support" depends on ACPI -- cgit v1.1 From f58417409603d62f2eb23db4d2cf6853d84a1698 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:38 -0700 Subject: x86, pat: Use page flags to track memtypes of RAM pages Change reserve_ram_pages_type and free_ram_pages_type to use 2 page flags to track UC_MINUS, WC, WB and default types. Previous RAM tracking just tracked WB or NonWB, which was not complete and did not allow tracking of RAM fully and there was no way to get the actual type reserved by looking at the page flags. We use the memtype_lock spinlock for atomicity in dealing with memtype tracking in struct page. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cacheflush.h | 54 ++++++++++++++++++++++- arch/x86/mm/pat.c | 91 +++++++++++++++++++++------------------ 2 files changed, 102 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e55dfc1..b54f6af 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, memcpy(dst, src, len); } -#define PG_non_WB PG_arch_1 -PAGEFLAG(NonWB, non_WB) +#define PG_WC PG_arch_1 +PAGEFLAG(WC, WC) + +#ifdef CONFIG_X86_PAT +/* + * X86 PAT uses page flags WC and Uncached together to keep track of + * memory type of pages that have backing page struct. X86 PAT supports 3 + * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and + * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not + * been changed from its default (value of -1 used to denote this). + * Note we do not support _PAGE_CACHE_UC here. + * + * Caller must hold memtype_lock for atomicity. + */ +static inline unsigned long get_page_memtype(struct page *pg) +{ + if (!PageUncached(pg) && !PageWC(pg)) + return -1; + else if (!PageUncached(pg) && PageWC(pg)) + return _PAGE_CACHE_WC; + else if (PageUncached(pg) && !PageWC(pg)) + return _PAGE_CACHE_UC_MINUS; + else + return _PAGE_CACHE_WB; +} + +static inline void set_page_memtype(struct page *pg, unsigned long memtype) +{ + switch (memtype) { + case _PAGE_CACHE_WC: + ClearPageUncached(pg); + SetPageWC(pg); + break; + case _PAGE_CACHE_UC_MINUS: + SetPageUncached(pg); + ClearPageWC(pg); + break; + case _PAGE_CACHE_WB: + SetPageUncached(pg); + SetPageWC(pg); + break; + default: + case -1: + ClearPageUncached(pg); + ClearPageWC(pg); + break; + } +} +#else +static inline unsigned long get_page_memtype(struct page *pg) { return -1; } +static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } +#endif /* * The set_memory_* API can be used to change various attributes of a virtual diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index c90f242..1a9d0f0 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -288,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) } /* - * For RAM pages, mark the pages as non WB memory type using - * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or - * set_memory_wc() on a RAM page at a time before marking it as WB again. - * This is ok, because only one driver will be owning the page and - * doing set_memory_*() calls. + * For RAM pages, we use page flags to mark the pages with appropriate type. + * Here we do two pass: + * - Find the memtype of all the pages in the range, look for any conflicts + * - In case of no conflicts, set the new memtype for pages in the range * - * For now, we use PageNonWB to track that the RAM page is being mapped - * as non WB. In future, we will have to use one more flag - * (or some other mechanism in page_struct) to distinguish between - * UC and WC mapping. + * Caller must hold memtype_lock for atomicity. */ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; + + if (req_type == _PAGE_CACHE_UC) { + /* We do not support strong UC */ + WARN_ON_ONCE(1); + req_type = _PAGE_CACHE_UC_MINUS; + } for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - if (page_mapped(page) || PageNonWB(page)) - goto out; + unsigned long type; - SetPageNonWB(page); + page = pfn_to_page(pfn); + type = get_page_memtype(page); + if (type != -1) { + printk(KERN_INFO "reserve_ram_pages_type failed " + "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", + start, end, type, req_type); + if (new_type) + *new_type = type; + + return -EBUSY; + } } - return 0; -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { + if (new_type) + *new_type = req_type; + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - ClearPageNonWB(page); + set_page_memtype(page, req_type); } - - return -EINVAL; + return 0; } static int free_ram_pages_type(u64 start, u64 end) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - if (page_mapped(page) || !PageNonWB(page)) - goto out; - - ClearPageNonWB(page); + set_page_memtype(page, -1); } return 0; - -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { - page = pfn_to_page(pfn); - SetPageNonWB(page); - } - return -EINVAL; } /* @@ -405,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, *new_type = actual_type; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = reserve_ram_pages_type(start, end, req_type, new_type); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -505,10 +508,16 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = free_ram_pages_type(start, end); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } spin_lock(&memtype_lock); -- cgit v1.1 From 637b86e75f4c255a4446bc0b67ce9d914b9d2d42 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:39 -0700 Subject: x86, pat: Add lookup_memtype to get the current memtype of a paddr Add a new routine lookup_memtype() to get the current memtype based on the PAT reserves and frees. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 1a9d0f0..71aa6f7 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -574,6 +574,51 @@ unlock_ret: /** + * lookup_memtype - Looksup the memory type for a physical address + * @paddr: physical address of which memory type needs to be looked up + * + * Only to be called when PAT is enabled + * + * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or + * _PAGE_CACHE_UC + */ +static unsigned long lookup_memtype(u64 paddr) +{ + int rettype = _PAGE_CACHE_WB; + struct memtype *entry; + + if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) + return rettype; + + if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { + struct page *page; + spin_lock(&memtype_lock); + page = pfn_to_page(paddr >> PAGE_SHIFT); + rettype = get_page_memtype(page); + spin_unlock(&memtype_lock); + /* + * -1 from get_page_memtype() implies RAM page is in its + * default state and not reserved, and hence of type WB + */ + if (rettype == -1) + rettype = _PAGE_CACHE_WB; + + return rettype; + } + + spin_lock(&memtype_lock); + + entry = memtype_rb_search(&memtype_rbroot, paddr); + if (entry != NULL) + rettype = entry->type; + else + rettype = _PAGE_CACHE_UC_MINUS; + + spin_unlock(&memtype_lock); + return rettype; +} + +/** * io_reserve_memtype - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region -- cgit v1.1 From 1087637616dd5e96d834164ea462aed6159d039b Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:40 -0700 Subject: x86, pat: Lookup the protection from memtype list on vm_insert_pfn() Lookup the reserved memtype during vm_insert_pfn and use that memtype for the new mapping. This takes care or handling of vm_insert_pfn() interface in track_pfn_vma*/untrack_pfn_vma. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 71aa6f7..b629f75 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -848,11 +848,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* * reserve the whole chunk covered by vma. We need the @@ -880,20 +875,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { + unsigned long flags; resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; return reserve_pfn_range(paddr, vma_size, prot, 0); } + if (!pat_enabled) + return 0; + + /* for vm_insert_pfn and friends, we set prot based on lookup */ + flags = lookup_memtype(pfn << PAGE_SHIFT); + *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + flags); + return 0; } @@ -908,11 +907,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* free the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; -- cgit v1.1 From d886c73cd4cf02a71e1650cbcb6176799d78aac1 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:41 -0700 Subject: x86, pat: Sanity check remap_pfn_range for RAM region Add sanity check for remap_pfn_range of RAM regions using lookup_memtype(). Previously, we did not have anyway to get the type of RAM memory regions as they were tracked using a single bit in page_struct (WB, nonWB). Now we can get the actual type from page struct (WB, WC, UC_MINUS) and make sure the requester gets that type. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b629f75..a6cace0 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -783,11 +783,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. Maintain the current - * behavior with RAM pages by returning success. + * reserve_pfn_range() for RAM pages. We do not refcount to keep + * track of number of mappings of RAM pages. We can assert that + * the type requested matches the type of first page in the range. */ - if (is_ram != 0) + if (is_ram) { + if (!pat_enabled) + return 0; + + flags = lookup_memtype(paddr); + if (want_flags != flags) { + printk(KERN_WARNING + "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); + } return 0; + } ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) -- cgit v1.1 From 57844a8f8e29802f37ad9a0f94eb11d6ae358603 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 14:48:38 +0200 Subject: x86: Add x86_init infrastructure The upcoming Moorestown support brings the embedded world to x86. The setup code of x86 has already a couple of hooks which are either x86_quirks or paravirt ops. Some of those setup hooks are pretty convoluted like the timer setup and the tsc calibration code. But there are other places which could do with a cleanup. Instead of having inline functions/macros which are modified at compile time I decided to introduce x86_init ops which are unconditional in the code and make it clear that they can be changed either during compile time or in the early boot process. The function pointers are initialized by default functions which can be noops so that the pointer can be called unconditionally in the most cases. This also allows us to remove 32bit/64bit, paravirt and other #ifdeffery. paravirt guests are just a hardware platform in the setup code, so we should treat them as such and not hide all behind multiple layers of indirection and compile time dependencies. It's more obvious that x86_init.timers.timer_init() is a function pointer than the late_time_init = choose_time_init() obscurity. It's also way simpler to grep for x86_init.timers.timer_init and find all the places which modify that function pointer instead of analyzing weak functions, macros and paravirt indirections. Note. This is not a general paravirt_ops replacement. It just will move setup related hooks which are potentially useful for other platform setup purposes as well out of the paravirt domain. Add the base infrastructure without any functionality. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/setup.h | 2 ++ arch/x86/include/asm/x86_init.h | 15 +++++++++++++++ arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/x86_init.c | 17 +++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/x86_init.h create mode 100644 arch/x86/kernel/x86_init.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 4093d1e..741e295 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -7,6 +7,8 @@ #ifndef __ASSEMBLY__ +#include + /* * Any setup quirks to be performed? */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h new file mode 100644 index 0000000..14d1107 --- /dev/null +++ b/arch/x86/include/asm/x86_init.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_PLATFORM_H +#define _ASM_X86_PLATFORM_H + +/** + * struct x86_init_ops - functions for platform specific setup + * + */ +struct x86_init_ops { +}; + +extern struct x86_init_ops x86_init; + +extern void x86_init_noop(void); + +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 430d5b2..313ed6f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -32,7 +32,7 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit.o +obj-y += setup.o x86_init.o i8259.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c new file mode 100644 index 0000000..82d510c --- /dev/null +++ b/arch/x86/kernel/x86_init.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2009 Thomas Gleixner + * + * For licencing details see kernel-base/COPYING + */ +#include + +#include + +void __cpuinit x86_init_noop(void) { } + +/* + * The platform setup functions are preset with the default functions + * for standard PC hardware. + */ +struct __initdata x86_init_ops x86_init = { +}; -- cgit v1.1 From f7cf5a5b8c0e59eac8d30b62271cb0fa52e53ebc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 14:43:56 +0200 Subject: x86: Add probe_roms to x86_init probe_roms is only used on 32bit. Add it to the x86_init ops and remove the #ifdefs. Default initializer is x86_init_noop() which is overridden in the 32bit boot code. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 10 ++++++++++ arch/x86/kernel/head32.c | 3 +++ arch/x86/kernel/setup.c | 4 +--- arch/x86/kernel/x86_init.c | 4 ++++ 4 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 14d1107..75e9e68 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -2,10 +2,20 @@ #define _ASM_X86_PLATFORM_H /** + * struct x86_init_resources - platform specific resource related ops + * @probe_roms: probe BIOS roms + * + */ +struct x86_init_resources { + void (*probe_roms)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ struct x86_init_ops { + struct x86_init_resources resources; }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3f8579f..4049353 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,6 +29,9 @@ void __init i386_start_kernel(void) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif + /* Initilize 32bit specific setup functions */ + x86_init.resources.probe_roms = probe_roms; + reserve_ebda_region(); /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 63f32d2..5796eb1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -835,9 +835,7 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor(&boot_cpu_data); -#ifdef CONFIG_X86_32 - probe_roms(); -#endif + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ insert_resource(&iomem_resource, &code_resource); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 82d510c..88883f8 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,4 +14,8 @@ void __cpuinit x86_init_noop(void) { } * for standard PC hardware. */ struct __initdata x86_init_ops x86_init = { + + .resources = { + .probe_roms = x86_init_noop, + }, }; -- cgit v1.1 From 8fee697d990c54976c8dc167270633299e2515d2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 14:55:50 +0200 Subject: x86: Add request_standard_resources to x86_init The 32bit and the 64bit code are slighty different in the reservation of standard resources. Also the upcoming Moorestown support needs its own version of that. Add it to x86_init_ops and initialize it with the 64bit default. 32bit overrides it in early boot. Now moorestown can add it's own override w/o sprinkling the code with more #ifdefs Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/setup.h | 3 +++ arch/x86/include/asm/x86_init.h | 3 +++ arch/x86/kernel/head32.c | 1 + arch/x86/kernel/setup.c | 28 ++++++++++++++++------------ arch/x86/kernel/x86_init.c | 3 ++- 5 files changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 741e295..19769ac 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -88,6 +88,9 @@ extern unsigned long saved_video_mode; #define paravirt_post_allocator_init() do {} while (0) #endif +extern void reserve_standard_io_resources(void); +extern void i386_reserve_resources(void); + #ifndef _SETUP /* diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 75e9e68..d0d9be2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -4,10 +4,13 @@ /** * struct x86_init_resources - platform specific resource related ops * @probe_roms: probe BIOS roms + * @reserve_resources: reserve the standard resources for the + * platform * */ struct x86_init_resources { void (*probe_roms)(void); + void (*reserve_resources)(void); }; /** diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4049353..d91c37c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -31,6 +31,7 @@ void __init i386_start_kernel(void) #endif /* Initilize 32bit specific setup functions */ x86_init.resources.probe_roms = probe_roms; + x86_init.resources.reserve_resources = i386_reserve_resources; reserve_ebda_region(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5796eb1..c2a8090 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -171,13 +171,6 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; /* common cpu data for all cpus */ @@ -605,7 +598,7 @@ static struct resource standard_io_resources[] = { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -static void __init reserve_standard_io_resources(void) +void __init reserve_standard_io_resources(void) { int i; @@ -1013,10 +1006,7 @@ void __init setup_arch(char **cmdline_p) e820_reserve_resources(); e820_mark_nosave_regions(max_low_pfn); -#ifdef CONFIG_X86_32 - request_resource(&iomem_resource, &video_ram_resource); -#endif - reserve_standard_io_resources(); + x86_init.resources.reserve_resources(); e820_setup_gap(); @@ -1102,4 +1092,18 @@ void __init x86_quirk_time_init(void) irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } + +static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +void __init i386_reserve_resources(void) +{ + request_resource(&iomem_resource, &video_ram_resource); + reserve_standard_io_resources(); +} + #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 88883f8..68c093b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,7 +5,7 @@ */ #include -#include +#include void __cpuinit x86_init_noop(void) { } @@ -17,5 +17,6 @@ struct __initdata x86_init_ops x86_init = { .resources = { .probe_roms = x86_init_noop, + .reserve_resources = reserve_standard_io_resources, }, }; -- cgit v1.1 From 816c25e7d4fb6fd40022a376e8b7f45b1edf5a89 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 14:36:27 +0200 Subject: x86: Add reserve_ebda_region to x86_init_ops reserve_ebda_region needs to be called befor start_kernel. Moorestown needs to override it. Make it a x86_init_ops function and initialize it with the default reserve_ebda_region. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/head32.c | 4 ++-- arch/x86/kernel/head64.c | 3 +-- arch/x86/kernel/x86_init.c | 2 ++ 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d0d9be2..8a971cb 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -6,11 +6,13 @@ * @probe_roms: probe BIOS roms * @reserve_resources: reserve the standard resources for the * platform + * @reserve_ebda_region: reserve the extended bios data area * */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); + void (*reserve_ebda_region)(void); }; /** diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d91c37c..921a23b 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include void __init i386_start_kernel(void) @@ -33,7 +33,7 @@ void __init i386_start_kernel(void) x86_init.resources.probe_roms = probe_roms; x86_init.resources.reserve_resources = i386_reserve_resources; - reserve_ebda_region(); + x86_init.resources.reserve_ebda_region(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 70eaa85..cead814 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -23,7 +23,6 @@ #include #include #include -#include #include static void __init zap_identity_mappings(void) @@ -112,7 +111,7 @@ void __init x86_64_start_reservations(char *real_mode_data) } #endif - reserve_ebda_region(); + x86_init.resources.reserve_ebda_region(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 68c093b..1fff49a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,6 +5,7 @@ */ #include +#include #include void __cpuinit x86_init_noop(void) { } @@ -18,5 +19,6 @@ struct __initdata x86_init_ops x86_init = { .resources = { .probe_roms = x86_init_noop, .reserve_resources = reserve_standard_io_resources, + .reserve_ebda_region = reserve_ebda_region, }, }; -- cgit v1.1 From 6b18ae3e2ff62daa9f181401759161dd8de0aadf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 10:19:54 +0200 Subject: x86: Move memory_setup to x86_init_ops memory_setup is overridden by x86_quirks and by paravirts with weak functions and quirks. Unify the whole mess and make it an unconditional x86_init_ops function which defaults to the standard function and can be overridden by the early platform code. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/e820.h | 2 -- arch/x86/include/asm/paravirt_types.h | 1 - arch/x86/include/asm/setup.h | 1 - arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/numaq_32.c | 1 - arch/x86/kernel/e820.c | 19 +------------------ arch/x86/kernel/paravirt.c | 6 ------ arch/x86/kernel/visws_quirks.c | 3 ++- arch/x86/kernel/x86_init.c | 2 ++ arch/x86/lguest/boot.c | 3 ++- arch/x86/xen/enlighten.c | 3 ++- 11 files changed, 11 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 7ecba4d..40b4e61 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -126,8 +126,6 @@ extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); -extern char *machine_specific_memory_setup(void); -extern char *memory_setup(void); #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2b3371b..6d66896 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -81,7 +81,6 @@ struct pv_init_ops { /* Basic arch-specific setup */ void (*arch_setup)(void); - char *(*memory_setup)(void); void (*post_allocator_init)(void); /* Print a banner to identify the environment */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 19769ac..9cba9d6 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -22,7 +22,6 @@ struct x86_quirks { int (*arch_pre_intr_init)(void); int (*arch_intr_init)(void); int (*arch_trap_init)(void); - char * (*arch_memory_setup)(void); int (*mach_get_smp_config)(unsigned int early); int (*mach_find_smp_config)(unsigned int reserve); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8a971cb..6c084f2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -7,12 +7,14 @@ * @reserve_resources: reserve the standard resources for the * platform * @reserve_ebda_region: reserve the extended bios data area + * @memory_setup: platform specific memory setup * */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); void (*reserve_ebda_region)(void); + char *(*memory_setup)(void); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index ca96e68..403c062 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -260,7 +260,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, .arch_pre_intr_init = NULL, - .arch_memory_setup = NULL, .arch_intr_init = NULL, .arch_trap_init = NULL, .mach_get_smp_config = NULL, diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 5cb5725..0d804b9 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1455,28 +1455,11 @@ char *__init default_machine_specific_memory_setup(void) return who; } -char *__init __attribute__((weak)) machine_specific_memory_setup(void) -{ - if (x86_quirks->arch_memory_setup) { - char *who = x86_quirks->arch_memory_setup(); - - if (who) - return who; - } - return default_machine_specific_memory_setup(); -} - -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __init __attribute__((weak)) memory_setup(void) -{ - return machine_specific_memory_setup(); -} - void __init setup_memory_map(void) { char *who; - who = memory_setup(); + who = x86_init.resources.memory_setup(); memcpy(&e820_saved, &e820, sizeof(struct e820map)); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map(who); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b9..532c9a2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -60,11 +60,6 @@ static void __init default_banner(void) pv_info.name); } -char *memory_setup(void) -{ - return pv_init_ops.memory_setup(); -} - /* Simple instruction patching code. */ #define DEF_NATIVE(ops, name, code) \ extern const char start_##ops##_##name[], end_##ops##_##name[]; \ @@ -322,7 +317,6 @@ struct pv_init_ops pv_init_ops = { .patch = native_patch, .banner = default_banner, .arch_setup = paravirt_nop, - .memory_setup = machine_specific_memory_setup, }; struct pv_time_ops pv_time_ops = { diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31ffc24..97c670d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -239,7 +239,6 @@ static int visws_trap_init(void); static struct x86_quirks visws_x86_quirks __initdata = { .arch_time_init = visws_time_init, .arch_pre_intr_init = visws_pre_intr_init, - .arch_memory_setup = visws_memory_setup, .arch_intr_init = NULL, .arch_trap_init = visws_trap_init, .mach_get_smp_config = visws_get_smp_config, @@ -263,6 +262,8 @@ void __init visws_early_detect(void) */ x86_quirks = &visws_x86_quirks; + x86_init.resources.memory_setup = visws_memory_setup; + /* * Install reboot quirks: */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 1fff49a..1965bff 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -7,6 +7,7 @@ #include #include +#include void __cpuinit x86_init_noop(void) { } @@ -20,5 +21,6 @@ struct __initdata x86_init_ops x86_init = { .probe_roms = x86_init_noop, .reserve_resources = reserve_standard_io_resources, .reserve_ebda_region = reserve_ebda_region, + .memory_setup = default_machine_specific_memory_setup, }, }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index d677fa9..11445c1 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1270,7 +1270,6 @@ __init void lguest_init(void) pv_irq_ops.safe_halt = lguest_safe_halt; /* Setup operations */ - pv_init_ops.memory_setup = lguest_memory_setup; pv_init_ops.patch = lguest_patch; /* Intercepts of various CPU instructions */ @@ -1325,6 +1324,8 @@ __init void lguest_init(void) pv_time_ops.time_init = lguest_time_init; pv_time_ops.get_tsc_khz = lguest_tsc_khz; + x86_init.resources.memory_setup = lguest_memory_setup; + /* * Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e90540a..50b20c6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -841,7 +841,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, .banner = xen_banner, - .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, .post_allocator_init = xen_post_allocator_init, }; @@ -982,6 +981,8 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + x86_init.resources.memory_setup = xen_memory_setup; + #ifdef CONFIG_X86_64 /* * Setup percpu state. We only need to do this for 64-bit -- cgit v1.1 From f4848472cd99487e182b64fb2a5d0e4fedbe86ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 12:05:01 +0200 Subject: x86: Sanitize smp_record and move it to x86_init_ops The x86 quirkification introduced an extra ugly hackery with a variable pointer in the mpparse code. If the pointer is initialized then it is dereferenced and the variable set to 0 or incremented. Create a x86_init_ops function and let the affected numaq code hold the function. Default init is a setup noop. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/setup.h | 1 - arch/x86/include/asm/x86_init.h | 12 +++++++++++- arch/x86/kernel/apic/numaq_32.c | 19 ++++++++++++++++--- arch/x86/kernel/mpparse.c | 6 ++---- arch/x86/kernel/x86_init.c | 5 +++++ 5 files changed, 34 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 9cba9d6..bbf2dfd 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -25,7 +25,6 @@ struct x86_quirks { int (*mach_get_smp_config)(unsigned int early); int (*mach_find_smp_config)(unsigned int reserve); - int *mpc_record; int (*mpc_apic_id)(struct mpc_cpu *m); void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_bus *m); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 6c084f2..10b297b 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -2,6 +2,14 @@ #define _ASM_X86_PLATFORM_H /** + * struct x86_init_mpparse - platform specific mpparse ops + * @mpc_record: platform specific mpc record accounting + */ +struct x86_init_mpparse { + void (*mpc_record)(unsigned int mode); +}; + +/** * struct x86_init_resources - platform specific resource related ops * @probe_roms: probe BIOS roms * @reserve_resources: reserve the standard resources for the @@ -22,11 +30,13 @@ struct x86_init_resources { * */ struct x86_init_ops { - struct x86_init_resources resources; + struct x86_init_resources resources; + struct x86_init_mpparse mpparse; }; extern struct x86_init_ops x86_init; extern void x86_init_noop(void); +extern void x86_init_uint_noop(unsigned int unused); #endif diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 403c062..b5f0b1d 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -66,7 +66,6 @@ struct mpc_trans { unsigned short trans_reserved; }; -/* x86_quirks member */ static int mpc_record; static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; @@ -177,6 +176,19 @@ static void mpc_oem_pci_bus(struct mpc_bus *m) quad_local_to_mp_bus_id[quad][local] = m->busid; } +/* + * Called from mpparse code. + * mode = 0: prescan + * mode = 1: one mpc entry scanned + */ +static void numaq_mpc_record(unsigned int mode) +{ + if (!mode) + mpc_record = 0; + else + mpc_record++; +} + static void __init MP_translation_info(struct mpc_trans *m) { printk(KERN_INFO @@ -264,7 +276,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .arch_trap_init = NULL, .mach_get_smp_config = NULL, .mach_find_smp_config = NULL, - .mpc_record = &mpc_record, .mpc_apic_id = mpc_apic_id, .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, @@ -285,8 +296,10 @@ static __init void early_check_numaq(void) if (smp_found_config) early_get_smp_config(); - if (found_numaq) + if (found_numaq) { x86_quirks = &numaq_x86_quirks; + x86_init.mpparse.mpc_record = numaq_mpc_record; + } } int __init get_memcfg_numaq(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 651c93b..b2179fd 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -320,8 +320,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) /* * Now process the configuration blocks. */ - if (x86_quirks->mpc_record) - *x86_quirks->mpc_record = 0; + x86_init.mpparse.mpc_record(0); while (count < mpc->length) { switch (*mpt) { @@ -353,8 +352,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) count = mpc->length; break; } - if (x86_quirks->mpc_record) - (*x86_quirks->mpc_record)++; + x86_init.mpparse.mpc_record(1); } #ifdef CONFIG_X86_BIGSMP diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 1965bff..83bd5db 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -10,6 +10,7 @@ #include void __cpuinit x86_init_noop(void) { } +void __init x86_init_uint_noop(unsigned int unused) { } /* * The platform setup functions are preset with the default functions @@ -23,4 +24,8 @@ struct __initdata x86_init_ops x86_init = { .reserve_ebda_region = reserve_ebda_region, .memory_setup = default_machine_specific_memory_setup, }, + + .mpparse = { + .mpc_record = x86_init_uint_noop, + }, }; -- cgit v1.1 From de93410310952fb7b705f784ef22493c8362dbe8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 09:27:29 +0200 Subject: x86: Move ioapic_ids_setup to x86_init_ops 32bit and also the numaq code have special requirements on the ioapic_id setup. Convert it to a x86_init_ops function and get rid of the quirks and #ifdefs Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 3 ++- arch/x86/include/asm/setup.h | 1 - arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/io_apic.c | 11 ++++------- arch/x86/kernel/apic/numaq_32.c | 8 +------- arch/x86/kernel/head32.c | 3 +++ arch/x86/kernel/x86_init.c | 1 + 7 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 330ee80..2b8aeb8 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -177,12 +177,13 @@ extern int setup_ioapic_entry(int apic, int irq, int polarity, int vector, int pin); extern void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e); +extern void setup_ioapic_ids_from_mpc(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 +#define setup_ioapic_ids_from_mpc x86_init_noop static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } - static inline void probe_nr_irqs_gsi(void) { } #endif diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index bbf2dfd..cc8b4b0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -30,7 +30,6 @@ struct x86_quirks { void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, unsigned short oemsize); - int (*setup_ioapic_ids)(void); }; extern void x86_quirk_intr_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 10b297b..6598573 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -4,9 +4,11 @@ /** * struct x86_init_mpparse - platform specific mpparse ops * @mpc_record: platform specific mpc record accounting + * @setup_ioapic_ids: platform specific ioapic id override */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); + void (*setup_ioapic_ids)(void); }; /** diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d2ed6c5..5f46871 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2014,7 +2014,7 @@ void disable_IO_APIC(void) * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ -static void __init setup_ioapic_ids_from_mpc(void) +void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; @@ -2023,9 +2023,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + if (acpi_ioapic) return; - /* * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. @@ -3061,10 +3060,8 @@ void __init setup_IO_APIC(void) /* * Set up IO-APIC IRQ routing. */ -#ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); -#endif + x86_init.mpparse.setup_ioapic_ids(); + sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index b5f0b1d..f371765 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -262,12 +262,6 @@ static void __init } } -static int __init numaq_setup_ioapic_ids(void) -{ - /* so can skip it */ - return 1; -} - static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -280,7 +274,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, - .setup_ioapic_ids = numaq_setup_ioapic_ids, }; static __init void early_check_numaq(void) @@ -299,6 +292,7 @@ static __init void early_check_numaq(void) if (found_numaq) { x86_quirks = &numaq_x86_quirks; x86_init.mpparse.mpc_record = numaq_mpc_record; + x86_init.mpparse.setup_ioapic_ids = x86_init_noop; } } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 921a23b..a21398f 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include void __init i386_start_kernel(void) { @@ -32,6 +34,7 @@ void __init i386_start_kernel(void) /* Initilize 32bit specific setup functions */ x86_init.resources.probe_roms = probe_roms; x86_init.resources.reserve_resources = i386_reserve_resources; + x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; x86_init.resources.reserve_ebda_region(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 83bd5db..f4a32b3 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -27,5 +27,6 @@ struct __initdata x86_init_ops x86_init = { .mpparse = { .mpc_record = x86_init_uint_noop, + .setup_ioapic_ids = x86_init_noop, }, }; -- cgit v1.1 From fd6c6661492226bb82f422157c535ac573cbecbd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 10:41:58 +0200 Subject: x86: Move mpc_apic_id to x86_init_ops The mpc_apic_id setup is handled by a x86_quirk. Make it a x86_init_ops function with a default implementation. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mpspec.h | 2 ++ arch/x86/include/asm/setup.h | 2 -- arch/x86/include/asm/x86_init.h | 4 ++++ arch/x86/kernel/apic/numaq_32.c | 2 +- arch/x86/kernel/mpparse.c | 10 ++++++---- arch/x86/kernel/x86_init.c | 2 ++ 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index e2a1bb6..03c6a92 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -62,10 +62,12 @@ extern void get_smp_config(void); extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); extern int enable_update_mptable; +extern int default_mpc_apic_id(struct mpc_cpu *m); #else static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #define enable_update_mptable 0 +#define default_mpc_apic_id NULL #endif void __cpuinit generic_processor_info(int apicid, int version); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index cc8b4b0..7c7f44f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -12,7 +12,6 @@ /* * Any setup quirks to be performed? */ -struct mpc_cpu; struct mpc_bus; struct mpc_oemtable; @@ -25,7 +24,6 @@ struct x86_quirks { int (*mach_get_smp_config)(unsigned int early); int (*mach_find_smp_config)(unsigned int reserve); - int (*mpc_apic_id)(struct mpc_cpu *m); void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 6598573..f2be2a7 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -1,14 +1,18 @@ #ifndef _ASM_X86_PLATFORM_H #define _ASM_X86_PLATFORM_H +struct mpc_cpu; + /** * struct x86_init_mpparse - platform specific mpparse ops * @mpc_record: platform specific mpc record accounting * @setup_ioapic_ids: platform specific ioapic id override + * @mpc_apic_id: platform specific mpc apic id assignment */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); void (*setup_ioapic_ids)(void); + int (*mpc_apic_id)(struct mpc_cpu *m); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f371765..222413f 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -270,7 +270,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .arch_trap_init = NULL, .mach_get_smp_config = NULL, .mach_find_smp_config = NULL, - .mpc_apic_id = mpc_apic_id, .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, @@ -293,6 +292,7 @@ static __init void early_check_numaq(void) x86_quirks = &numaq_x86_quirks; x86_init.mpparse.mpc_record = numaq_mpc_record; x86_init.mpparse.setup_ioapic_ids = x86_init_noop; + x86_init.mpparse.mpc_apic_id = mpc_apic_id; } } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b2179fd..0456086 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -45,6 +45,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } +int __init default_mpc_apic_id(struct mpc_cpu *m) +{ + return m->apicid; +} + static void __init MP_processor_info(struct mpc_cpu *m) { int apicid; @@ -55,10 +60,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - if (x86_quirks->mpc_apic_id) - apicid = x86_quirks->mpc_apic_id(m); - else - apicid = m->apicid; + apicid = x86_init.mpparse.mpc_apic_id(m); if (m->cpuflag & CPU_BOOTPROCESSOR) { bootup_cpu = " (Bootup-CPU)"; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index f4a32b3..08749f2 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -28,5 +29,6 @@ struct __initdata x86_init_ops x86_init = { .mpparse = { .mpc_record = x86_init_uint_noop, .setup_ioapic_ids = x86_init_noop, + .mpc_apic_id = default_mpc_apic_id, }, }; -- cgit v1.1 From 72302142e165313ee58af81bd76708c12b58d7ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 12:18:32 +0200 Subject: x86: Move smp_read_mpc_oem to x86_init_ops. Move smp_read_mpc_oem from quirks to x86_init. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mpspec.h | 2 ++ arch/x86/include/asm/setup.h | 3 --- arch/x86/include/asm/x86_init.h | 3 +++ arch/x86/kernel/apic/numaq_32.c | 6 +++--- arch/x86/kernel/mpparse.c | 8 ++++---- arch/x86/kernel/x86_init.c | 1 + 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 03c6a92..5de8e92 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -63,11 +63,13 @@ extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); extern int enable_update_mptable; extern int default_mpc_apic_id(struct mpc_cpu *m); +extern void default_smp_read_mpc_oem(struct mpc_table *mpc); #else static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #define enable_update_mptable 0 #define default_mpc_apic_id NULL +#define default_smp_read_mpc_oem NULL #endif void __cpuinit generic_processor_info(int apicid, int version); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7c7f44f..adb5d44 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -13,7 +13,6 @@ * Any setup quirks to be performed? */ struct mpc_bus; -struct mpc_oemtable; struct x86_quirks { int (*arch_pre_time_init)(void); @@ -26,8 +25,6 @@ struct x86_quirks { void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_bus *m); - void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, - unsigned short oemsize); }; extern void x86_quirk_intr_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f2be2a7..fc0eef2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -2,17 +2,20 @@ #define _ASM_X86_PLATFORM_H struct mpc_cpu; +struct mpc_table; /** * struct x86_init_mpparse - platform specific mpparse ops * @mpc_record: platform specific mpc record accounting * @setup_ioapic_ids: platform specific ioapic id override * @mpc_apic_id: platform specific mpc apic id assignment + * @smp_read_mpc_oem: platform specific oem mpc table setup */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); void (*setup_ioapic_ids)(void); int (*mpc_apic_id)(struct mpc_cpu *m); + void (*smp_read_mpc_oem)(struct mpc_table *mpc); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 222413f..1bd3b0e 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -218,9 +218,9 @@ static int __init mpf_checksum(unsigned char *mp, int len) /* * Read/parse the MPC oem tables */ -static void __init - smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize) +static void __init smp_read_mpc_oem(struct mpc_table *mpc) { + struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; int count = sizeof(*oemtable); /* the header size */ unsigned char *oemptr = ((unsigned char *)oemtable) + count; @@ -272,7 +272,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mach_find_smp_config = NULL, .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, - .smp_read_mpc_oem = smp_read_mpc_oem, }; static __init void early_check_numaq(void) @@ -293,6 +292,7 @@ static __init void early_check_numaq(void) x86_init.mpparse.mpc_record = numaq_mpc_record; x86_init.mpparse.setup_ioapic_ids = x86_init_noop; x86_init.mpparse.mpc_apic_id = mpc_apic_id; + x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; } } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0456086..45abdf6 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -293,6 +293,8 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) 1, mpc, mpc->length, 1); } +void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) { char str[16]; @@ -314,10 +316,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (early) return 1; - if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { - struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; - x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); - } + if (mpc->oemptr) + x86_init.mpparse.smp_read_mpc_oem(mpc); /* * Now process the configuration blocks. diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 08749f2..fb5d93c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -30,5 +30,6 @@ struct __initdata x86_init_ops x86_init = { .mpc_record = x86_init_uint_noop, .setup_ioapic_ids = x86_init_noop, .mpc_apic_id = default_mpc_apic_id, + .smp_read_mpc_oem = default_smp_read_mpc_oem, }, }; -- cgit v1.1 From 52fdb5684660f9fd7129f7bbbe279a02893bacb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 12:45:33 +0200 Subject: x86: Move mpc_oem_pci_bus to x86_init_ops Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/setup.h | 1 - arch/x86/include/asm/x86_init.h | 3 +++ arch/x86/kernel/apic/numaq_32.c | 2 +- arch/x86/kernel/mpparse.c | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index adb5d44..fd2267b 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -24,7 +24,6 @@ struct x86_quirks { int (*mach_find_smp_config)(unsigned int reserve); void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); - void (*mpc_oem_pci_bus)(struct mpc_bus *m); }; extern void x86_quirk_intr_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc0eef2..404e2d2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_PLATFORM_H #define _ASM_X86_PLATFORM_H +struct mpc_bus; struct mpc_cpu; struct mpc_table; @@ -10,12 +11,14 @@ struct mpc_table; * @setup_ioapic_ids: platform specific ioapic id override * @mpc_apic_id: platform specific mpc apic id assignment * @smp_read_mpc_oem: platform specific oem mpc table setup + * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL) */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); void (*setup_ioapic_ids)(void); int (*mpc_apic_id)(struct mpc_cpu *m); void (*smp_read_mpc_oem)(struct mpc_table *mpc); + void (*mpc_oem_pci_bus)(struct mpc_bus *m); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 1bd3b0e..feebe8e 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -271,7 +271,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mach_get_smp_config = NULL, .mach_find_smp_config = NULL, .mpc_oem_bus_info = mpc_oem_bus_info, - .mpc_oem_pci_bus = mpc_oem_pci_bus, }; static __init void early_check_numaq(void) @@ -293,6 +292,7 @@ static __init void early_check_numaq(void) x86_init.mpparse.setup_ioapic_ids = x86_init_noop; x86_init.mpparse.mpc_apic_id = mpc_apic_id; x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; + x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; } } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 45abdf6..72e1140 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -98,8 +98,8 @@ static void __init MP_bus_info(struct mpc_bus *m) mp_bus_id_to_type[m->busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { - if (x86_quirks->mpc_oem_pci_bus) - x86_quirks->mpc_oem_pci_bus(m); + if (x86_init.mpparse.mpc_oem_pci_bus) + x86_init.mpparse.mpc_oem_pci_bus(m); clear_bit(m->busid, mp_bus_not_pci); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) -- cgit v1.1 From 90e1c6969d8711edb888a00ec54c74370f125c8f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 12:34:47 +0200 Subject: x86: Move oem_bus_info to x86_init_ops Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mpspec.h | 6 ++++++ arch/x86/include/asm/setup.h | 3 --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/numaq_32.c | 2 +- arch/x86/kernel/mpparse.c | 14 ++++++++------ arch/x86/kernel/x86_init.c | 1 + 6 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 5de8e92..e3c579e 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -64,12 +64,18 @@ extern void early_reserve_e820_mpc_new(void); extern int enable_update_mptable; extern int default_mpc_apic_id(struct mpc_cpu *m); extern void default_smp_read_mpc_oem(struct mpc_table *mpc); +# ifdef CONFIG_X86_IO_APIC +extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); +# else +# define default_mpc_oem_bus_info NULL +# endif #else static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #define enable_update_mptable 0 #define default_mpc_apic_id NULL #define default_smp_read_mpc_oem NULL +#define default_mpc_oem_bus_info NULL #endif void __cpuinit generic_processor_info(int apicid, int version); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index fd2267b..6121a8a 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -12,7 +12,6 @@ /* * Any setup quirks to be performed? */ -struct mpc_bus; struct x86_quirks { int (*arch_pre_time_init)(void); @@ -22,8 +21,6 @@ struct x86_quirks { int (*arch_trap_init)(void); int (*mach_get_smp_config)(unsigned int early); int (*mach_find_smp_config)(unsigned int reserve); - - void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); }; extern void x86_quirk_intr_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 404e2d2..2833a87 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -12,6 +12,7 @@ struct mpc_table; * @mpc_apic_id: platform specific mpc apic id assignment * @smp_read_mpc_oem: platform specific oem mpc table setup * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL) + * @mpc_oem_bus_info: platform specific mpc bus info */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); @@ -19,6 +20,7 @@ struct x86_init_mpparse { int (*mpc_apic_id)(struct mpc_cpu *m); void (*smp_read_mpc_oem)(struct mpc_table *mpc); void (*mpc_oem_pci_bus)(struct mpc_bus *m); + void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index feebe8e..700273d 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -270,7 +270,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .arch_trap_init = NULL, .mach_get_smp_config = NULL, .mach_find_smp_config = NULL, - .mpc_oem_bus_info = mpc_oem_bus_info, }; static __init void early_check_numaq(void) @@ -293,6 +292,7 @@ static __init void early_check_numaq(void) x86_init.mpparse.mpc_apic_id = mpc_apic_id; x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; + x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; } } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 72e1140..a42f23f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -72,16 +72,18 @@ static void __init MP_processor_info(struct mpc_cpu *m) } #ifdef CONFIG_X86_IO_APIC -static void __init MP_bus_info(struct mpc_bus *m) +void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str) { - char str[7]; memcpy(str, m->bustype, 6); str[6] = 0; + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); +} + +static void __init MP_bus_info(struct mpc_bus *m) +{ + char str[7]; - if (x86_quirks->mpc_oem_bus_info) - x86_quirks->mpc_oem_bus_info(m, str); - else - apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); + x86_init.mpparse.mpc_oem_bus_info(m, str); #if MAX_MP_BUSSES < 256 if (m->busid >= MAX_MP_BUSSES) { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index fb5d93c..27685ed 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -31,5 +31,6 @@ struct __initdata x86_init_ops x86_init = { .setup_ioapic_ids = x86_init_noop, .mpc_apic_id = default_mpc_apic_id, .smp_read_mpc_oem = default_smp_read_mpc_oem, + .mpc_oem_bus_info = default_mpc_oem_bus_info, }, }; -- cgit v1.1 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- arch/x86/kernel/tsc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9684254..fc3672a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); + printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; clocksource_tsc.rating = 0; + } } } -- cgit v1.1 From a192a9580bcc41692be1f36b77c3b681827f566a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 28 Jul 2009 16:45:54 -0400 Subject: ACPI: Move definition of PREFIX from acpi_bus.h to internal..h Linux/ACPI core files using internal.h all PREFIX "ACPI: ", however, not all ACPI drivers use/want it -- and they should not have to #undef PREFIX to define their own. Add GPL commment to internal.h while we are there. This does not change any actual console output, asside from a whitespace fix. Signed-off-by: Len Brown --- arch/x86/pci/mmconfig-shared.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 712443e..81d3466 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -18,6 +18,8 @@ #include #include +#define PREFIX "ACPI: " + /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) #define MMCONFIG_APER_MAX (256 * 1024*1024) -- cgit v1.1 From 2a4ab640d3c28c2952967e5f63ea495555bf2a5f Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 7 Jul 2009 23:01:15 -0400 Subject: ACPI, x86: expose some IO-APIC routines when CONFIG_ACPI=n Some IO-APIC routines are ACPI specific now, but need to be exposed when CONFIG_ACPI=n for the benefit of SFI. Remove #ifdef ACPI around these routines: io_apic_get_unique_id(int ioapic, int apic_id); io_apic_get_version(int ioapic); io_apic_get_redir_entries(int ioapic); Move these routines from ACPI-specific boot.c to io_apic.c: uniq_ioapic_id(u8 id) mp_find_ioapic() mp_find_ioapic_pin() mp_register_ioapic() Also, since uniq_ioapic_id() is now no longer static, re-name it to io_apic_unique_id() for consistency with the other public io_apic routines. For simplicity, do not #ifdef the resulting code ACPI || SFI, thought that could be done in the future if it is important to optimize the !ACPI !SFI IO-APIC x86 kernel for size. Signed-off-by: Feng Tang Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/asm/io_apic.h | 13 +++++- arch/x86/kernel/acpi/boot.c | 102 +--------------------------------------- arch/x86/kernel/apic/io_apic.c | 103 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 109 insertions(+), 109 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 330ee80..85232d3 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -150,11 +150,10 @@ extern int timer_through_8259; #define io_apic_assign_pci_irqs \ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -#ifdef CONFIG_ACPI +extern u8 io_apic_unique_id(u8 id); extern int io_apic_get_unique_id(int ioapic, int apic_id); extern int io_apic_get_version(int ioapic); extern int io_apic_get_redir_entries(int ioapic); -#endif /* CONFIG_ACPI */ struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, @@ -177,6 +176,16 @@ extern int setup_ioapic_entry(int apic, int irq, int polarity, int vector, int pin); extern void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e); + +struct mp_ioapic_gsi{ + int gsi_base; + int gsi_end; +}; +extern struct mp_ioapic_gsi mp_gsi_routing[]; +int mp_find_ioapic(int gsi); +int mp_find_ioapic_pin(int ioapic, int gsi); +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6b8ca3a..7e62d1e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void) extern int es7000_plat; #endif -static struct { - int gsi_base; - int gsi_end; -} mp_ioapic_routing[MAX_IO_APICS]; - -int mp_find_ioapic(int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; -} - -int mp_find_ioapic_pin(int ioapic, int gsi) -{ - if (WARN_ON(ioapic == -1)) - return -1; - if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) - return -1; - - return gsi - mp_ioapic_routing[ioapic].gsi_base; -} - -static u8 __init uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif -} - -static int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); - return 1; - } - return 0; -} - -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -{ - int idx = 0; - - if (bad_ioapic(address)) - return; - - idx = nr_ioapics; - - mp_ioapics[idx].type = MP_IOAPIC; - mp_ioapics[idx].flags = MPC_APIC_USABLE; - mp_ioapics[idx].apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].apicid = uniq_ioapic_id(id); - mp_ioapics[idx].apicver = io_apic_get_version(idx); - - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, - mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} - int __init acpi_probe_gsi(void) { int idx; @@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void) max_gsi = 0; for (idx = 0; idx < nr_ioapics; idx++) { - gsi = mp_ioapic_routing[idx].gsi_end; + gsi = mp_gsi_routing[idx].gsi_end; if (gsi > max_gsi) max_gsi = gsi; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d2ed6c5..a8c0232 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -85,6 +85,9 @@ int nr_ioapic_registers[MAX_IO_APICS]; struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; +/* IO APIC gsi routing info */ +struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; + /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -3941,11 +3944,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq, return __io_apic_set_pci_routing(dev, irq, irq_attr); } -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ +u8 __init io_apic_unique_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); -#ifdef CONFIG_ACPI + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} #ifdef CONFIG_X86_32 int __init io_apic_get_unique_id(int ioapic, int apic_id) @@ -4054,8 +4074,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return 0; } -#endif /* CONFIG_ACPI */ - /* * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online @@ -4201,3 +4219,76 @@ void __init ioapic_insert_resources(void) r++; } } + +int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_gsi_routing[i].gsi_base) + && (gsi <= mp_gsi_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +int mp_find_ioapic_pin(int ioapic, int gsi) +{ + if (WARN_ON(ioapic == -1)) + return -1; + if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) + return -1; + + return gsi - mp_gsi_routing[ioapic].gsi_base; +} + +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " + "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); + return 1; + } + if (!address) { + printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].apicid = io_apic_unique_id(id); + mp_ioapics[idx].apicver = io_apic_get_version(idx); + + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_gsi_routing[idx].gsi_base = gsi_base; + mp_gsi_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, + mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); + + nr_ioapics++; +} -- cgit v1.1 From f4a2d5840e9f0e48d1a787b66e7346087a756029 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 28 Jul 2009 16:48:02 -0400 Subject: ACPI, PCI: Change PREFIX to "PCI" from "ACPI" in mmconfig-shared.c Signed-off-by: Len Brown Acked-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 81d3466..b707a01 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -18,7 +18,7 @@ #include #include -#define PREFIX "ACPI: " +#define PREFIX "PCI: " /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) -- cgit v1.1 From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 28 Jul 2009 17:41:53 +0800 Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h linux/acpi.h is the top level header for interfacing with the ACPI sub-system, so acpi_disabled should be up there instead of down in asm/acpi.h -- particularly since asm/acpi.h doesn't exist for all architectures. Same story for acpi_table_parse(), which is a top-level API to Linux/ACPI. This is necessary for building some code that used to always depend on CONFIG_ACPI=y, but will soon also need to build with CONFIG_ACPI=n. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465..4518dc5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) #else /* !CONFIG_ACPI */ -#define acpi_disabled 1 #define acpi_lapic 0 #define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } -- cgit v1.1 From efafc8b213e67ed148a5b53ade29ee7b48af907d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:23:29 -0400 Subject: x86: add arch-specific SFI support arch/x86/kernel/sfi.c serves the dual-purpose of supporting the SFI core with arch specific code, as well as a home for the arch-specific code that uses SFI. analogous to ACPI, drivers/sfi/Kconfig is pulled in by arch/x86/Kconfig Signed-off-by: Feng Tang Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/Kconfig | 2 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/setup.c | 3 ++ arch/x86/kernel/sfi.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 arch/x86/kernel/sfi.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5d..d8ba424 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1683,6 +1683,8 @@ source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" +source "drivers/sfi/Kconfig" + config X86_APM_BOOT bool default y diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 430d5b2..6321afa 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -55,6 +55,7 @@ obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ +obj-$(CONFIG_SFI) += sfi.o obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 63f32d2..d784ea2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -990,6 +991,8 @@ void __init setup_arch(char **cmdline_p) */ acpi_boot_init(); + sfi_init(); + #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) /* * get boot-time SMP configuration: diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c new file mode 100644 index 0000000..761df3f --- /dev/null +++ b/arch/x86/kernel/sfi.c @@ -0,0 +1,133 @@ +/* + * sfi.c - x86 architecture SFI support. + * + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#define KMSG_COMPONENT "SFI" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; + +void __init mp_sfi_register_lapic_address(unsigned long address) +{ + mp_lapic_addr = address; + + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = read_apic_id(); + + pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid); +} + +/* All CPUs enumerated by SFI must be present and enabled */ +void __cpuinit mp_sfi_register_lapic(u8 id) +{ + int boot_cpu = 0; + + if (MAX_APICS - id <= 0) { + pr_warning("Processor #%d invalid (max %d)\n", + id, MAX_APICS); + return; + } + + if (id == boot_cpu_physical_apicid) + boot_cpu = 1; + pr_info("registering lapic[%d]\n", id); + + generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); +} + +static int __init sfi_parse_cpus(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_cpu_table_entry *pentry; + int i; + int cpu_num; + + sb = (struct sfi_table_simple *)table; + cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry); + pentry = (struct sfi_cpu_table_entry *)sb->pentry; + + for (i = 0; i < cpu_num; i++) { + mp_sfi_register_lapic(pentry->apic_id); + pentry++; + } + + smp_found_config = 1; + return 0; +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +static u32 gsi_base; + +static int __init sfi_parse_ioapic(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_apic_table_entry *pentry; + int i, num; + + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); + pentry = (struct sfi_apic_table_entry *)sb->pentry; + + for (i = 0; i < num; i++) { + mp_register_ioapic(i, pentry->phys_addr, gsi_base); + gsi_base += io_apic_get_redir_entries(i); + pentry++; + } + + WARN(pic_mode, KERN_WARNING + "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n"); + pic_mode = 0; + return 0; +} +#endif /* CONFIG_X86_IO_APIC */ + +/* + * sfi_platform_init(): register lapics & io-apics + */ +int __init sfi_platform_init(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + mp_sfi_register_lapic_address(sfi_lapic_addr); + sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus); +#endif +#ifdef CONFIG_X86_IO_APIC + sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic); +#endif + return 0; +} -- cgit v1.1 From 5f0db7a2fb78895a197f64e548333b3bbd433996 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:37:50 -0400 Subject: SFI: Hook PCI MMCONFIG First check ACPI, and if that fails, ask SFI to find the MCFG. Signed-off-by: Feng Tang Signed-off-by: Len Brown Cc: Jesse Barnes --- arch/x86/Kconfig | 2 +- arch/x86/pci/mmconfig-shared.c | 6 ++++-- arch/x86/pci/mmconfig_32.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d8ba424..4c92c91 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1880,7 +1880,7 @@ config PCI_DIRECT config PCI_MMCONFIG def_bool y - depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) + depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY) config PCI_OLPC def_bool y diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index b707a01..602c172d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -13,10 +13,12 @@ #include #include #include +#include #include #include #include #include +#include #define PREFIX "PCI: " @@ -493,7 +495,7 @@ static void __init pci_mmcfg_reject_broken(int early) (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (!early) + if (!early && !acpi_disabled) valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); if (valid) @@ -608,7 +610,7 @@ static void __init __pci_mmcfg_init(int early) } if (!known_bridge) - acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); + acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); pci_mmcfg_reject_broken(early); diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8b2d561..f10a7e9 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -11,9 +11,9 @@ #include #include -#include #include #include +#include /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) -- cgit v1.1 From 47d25003cbd9e9030a95f7ccc4e70fec6aa7b844 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 28 Aug 2009 14:11:57 +0100 Subject: x86: Fix earlyprintk=dbgp for machines without NX Since parse_early_param() may (e.g. for earlyprintk=dbgp) involve calls to page table manipulation functions (here set_fixmap_nocache()), NX hardware support must be determined before calling that function (so that __supported_pte_mask gets properly set up). But the call after parse_early_param() can also not go away, as that will honor eventual command line specified disabling of the NX functionality. ( This will then just result in whatever mappings got established during parse_early_param() having the NX bit set despite it being disabled on the command line, but I think that's tolerable). Signed-off-by: Jan Beulich Cc: Yinghai Lu LKML-Reference: <4A97F3BD02000078000121B9@vpn.id2.novell.com> [ merged to x86/pat to resolve a conflict. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 02643cc..eb1f1e6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -714,6 +714,16 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; +#ifdef CONFIG_X86_64 + /* + * Must call this twice: Once just to detect whether hardware doesn't + * support NX (so that the early EHCI debug console setup can safely + * call set_fixmap(), and then again after parsing early parameters to + * honor the respective command line option. + */ + check_efer(); +#endif + parse_early_param(); /* VMI may relocate the fixmap; do this before touching ioremap area */ -- cgit v1.1 From 23386d63bbb3199cf247313ec088878d72debcfd Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Sat, 29 Aug 2009 18:27:18 +0200 Subject: x86: Detect stack protector for i386 builds on x86_64 Stack protector support was not detected when building with ARCH=i386 on x86_64 systems: arch/x86/Makefile:80: stack protector enabled but no compiler support The "-m32" argument needs to be passed to the detection script. Signed-off-by: Michal Schmidt Cc: Tejun Heo Cc: Jeremy Fitzhardinge Cc: Arjan van de Ven LKML-Reference: <20090829182718.10f566b1@leela> Signed-off-by: Ingo Molnar -- --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659..5e7db44 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -72,7 +72,7 @@ endif ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh - ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) stackp-y := -fstack-protector stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all KBUILD_CFLAGS += $(stackp-y) -- cgit v1.1 From b3f1b617f49447df6c3f5fac9c225aaea8b724ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 11:11:52 +0200 Subject: x86: Move get/find_smp_config to x86_init_ops Replace the quirk machinery by a x86_init_ops function which defaults to the standard implementation. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mpspec.h | 37 ++++++++++++++++++++++++++++++------- arch/x86/include/asm/setup.h | 2 -- arch/x86/include/asm/x86_init.h | 4 ++++ arch/x86/kernel/apic/numaq_32.c | 2 -- arch/x86/kernel/mpparse.c | 33 ++------------------------------- arch/x86/kernel/setup.c | 2 -- arch/x86/kernel/visws_quirks.c | 14 ++++---------- arch/x86/kernel/x86_init.c | 2 ++ 8 files changed, 42 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index e3c579e..79c9450 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -4,6 +4,7 @@ #include #include +#include extern int apic_version[MAX_APICS]; extern int pic_mode; @@ -41,9 +42,6 @@ extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #endif /* CONFIG_X86_64 */ -extern void early_find_smp_config(void); -extern void early_get_smp_config(void); - #if defined(CONFIG_MCA) || defined(CONFIG_EISA) extern int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif @@ -52,14 +50,36 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); extern unsigned int boot_cpu_physical_apicid; extern unsigned int max_physical_apicid; -extern int smp_found_config; extern int mpc_default_type; extern unsigned long mp_lapic_addr; -extern void get_smp_config(void); +#ifdef CONFIG_X86_LOCAL_APIC +extern int smp_found_config; +#else +# define smp_found_config 0 +#endif + +static inline void get_smp_config(void) +{ + x86_init.mpparse.get_smp_config(0); +} + +static inline void early_get_smp_config(void) +{ + x86_init.mpparse.get_smp_config(1); +} + +static inline void find_smp_config(void) +{ + x86_init.mpparse.find_smp_config(1); +} + +static inline void early_find_smp_config(void) +{ + x86_init.mpparse.find_smp_config(0); +} #ifdef CONFIG_X86_MPPARSE -extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); extern int enable_update_mptable; extern int default_mpc_apic_id(struct mpc_cpu *m); @@ -69,13 +89,16 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); # else # define default_mpc_oem_bus_info NULL # endif +extern void default_find_smp_config(unsigned int reserve); +extern void default_get_smp_config(unsigned int early); #else -static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #define enable_update_mptable 0 #define default_mpc_apic_id NULL #define default_smp_read_mpc_oem NULL #define default_mpc_oem_bus_info NULL +#define default_find_smp_config x86_init_uint_noop +#define default_get_smp_config x86_init_uint_noop #endif void __cpuinit generic_processor_info(int apicid, int version); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 6121a8a..345a255 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -19,8 +19,6 @@ struct x86_quirks { int (*arch_pre_intr_init)(void); int (*arch_intr_init)(void); int (*arch_trap_init)(void); - int (*mach_get_smp_config)(unsigned int early); - int (*mach_find_smp_config)(unsigned int reserve); }; extern void x86_quirk_intr_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2833a87..e0d4729 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -13,6 +13,8 @@ struct mpc_table; * @smp_read_mpc_oem: platform specific oem mpc table setup * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL) * @mpc_oem_bus_info: platform specific mpc bus info + * @find_smp_config: find the smp configuration + * @get_smp_config: get the smp configuration */ struct x86_init_mpparse { void (*mpc_record)(unsigned int mode); @@ -21,6 +23,8 @@ struct x86_init_mpparse { void (*smp_read_mpc_oem)(struct mpc_table *mpc); void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); + void (*find_smp_config)(unsigned int reserve); + void (*get_smp_config)(unsigned int early); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 700273d..3dd5fd7 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -268,8 +268,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_intr_init = NULL, .arch_intr_init = NULL, .arch_trap_init = NULL, - .mach_get_smp_config = NULL, - .mach_find_smp_config = NULL, }; static __init void early_check_numaq(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a42f23f..7535764 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -610,7 +610,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) /* * Scan the memory blocks for an SMP configuration block. */ -static void __init __get_smp_config(unsigned int early) +void __init default_get_smp_config(unsigned int early) { struct mpf_intel *mpf = mpf_found; @@ -627,11 +627,6 @@ static void __init __get_smp_config(unsigned int early) if (acpi_lapic && acpi_ioapic) return; - if (x86_quirks->mach_get_smp_config) { - if (x86_quirks->mach_get_smp_config(early)) - return; - } - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) @@ -672,16 +667,6 @@ static void __init __get_smp_config(unsigned int early) */ } -void __init early_get_smp_config(void) -{ - __get_smp_config(1); -} - -void __init get_smp_config(void) -{ - __get_smp_config(0); -} - static void __init smp_reserve_bootmem(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); @@ -747,14 +732,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 0; } -static void __init __find_smp_config(unsigned int reserve) +void __init default_find_smp_config(unsigned int reserve) { unsigned int address; - if (x86_quirks->mach_find_smp_config) { - if (x86_quirks->mach_find_smp_config(reserve)) - return; - } /* * FIXME: Linux assumes you have 640K of base ram.. * this continues the error... @@ -789,16 +770,6 @@ static void __init __find_smp_config(unsigned int reserve) smp_scan_config(address, 0x400, reserve); } -void __init early_find_smp_config(void) -{ - __find_smp_config(0); -} - -void __init find_smp_config(void) -{ - __find_smp_config(1); -} - #ifdef CONFIG_X86_IO_APIC static u8 __initdata irq_used[MAX_IRQ_SOURCES]; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c2a8090..54043cb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -981,13 +981,11 @@ void __init setup_arch(char **cmdline_p) */ acpi_boot_init(); -#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); -#endif prefill_possible_map(); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 97c670d..31e8281 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -156,12 +156,8 @@ static void visws_machine_power_off(void) outl(PIIX_SPECIAL_STOP, 0xCFC); } -static int __init visws_get_smp_config(unsigned int early) +static void __init visws_get_smp_config(unsigned int early) { - /* - * Prevent MP-table parsing by the generic code: - */ - return 1; } /* @@ -208,7 +204,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) apic_version[m->apicid] = ver; } -static int __init visws_find_smp_config(unsigned int reserve) +static void __init visws_find_smp_config(unsigned int reserve) { struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); @@ -230,8 +226,6 @@ static int __init visws_find_smp_config(unsigned int reserve) MP_processor_info(mp++); mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - return 1; } static int visws_trap_init(void); @@ -241,8 +235,6 @@ static struct x86_quirks visws_x86_quirks __initdata = { .arch_pre_intr_init = visws_pre_intr_init, .arch_intr_init = NULL, .arch_trap_init = visws_trap_init, - .mach_get_smp_config = visws_get_smp_config, - .mach_find_smp_config = visws_find_smp_config, }; void __init visws_early_detect(void) @@ -263,6 +255,8 @@ void __init visws_early_detect(void) x86_quirks = &visws_x86_quirks; x86_init.resources.memory_setup = visws_memory_setup; + x86_init.mpparse.get_smp_config = visws_get_smp_config; + x86_init.mpparse.find_smp_config = visws_find_smp_config; /* * Install reboot quirks: diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 27685ed..3488fb6 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -32,5 +32,7 @@ struct __initdata x86_init_ops x86_init = { .mpc_apic_id = default_mpc_apic_id, .smp_read_mpc_oem = default_smp_read_mpc_oem, .mpc_oem_bus_info = default_mpc_oem_bus_info, + .find_smp_config = default_find_smp_config, + .get_smp_config = default_get_smp_config, }, }; -- cgit v1.1 From d9112f43021554ded2ef2b9bea5f88ba4b52abe0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 09:41:38 +0200 Subject: x86: Move pre_intr_init to x86_init_ops Replace the quirk machinery by a x86_init_ops function which defaults to the standard implementation. This is also a preparatory patch for Moorestown support which needs to replace the default init_ISA_irqs as well. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq.h | 2 ++ arch/x86/include/asm/setup.h | 1 - arch/x86/include/asm/x86_init.h | 10 ++++++++++ arch/x86/kernel/apic/numaq_32.c | 1 - arch/x86/kernel/irqinit.c | 24 ++---------------------- arch/x86/kernel/visws_quirks.c | 10 +++------- arch/x86/kernel/x86_init.c | 5 +++++ 7 files changed, 22 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index f38481b..8fe2782 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -47,4 +47,6 @@ extern unsigned int do_IRQ(struct pt_regs *regs); extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); +extern void init_ISA_irqs(void); + #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 345a255..66a3197 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -16,7 +16,6 @@ struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); - int (*arch_pre_intr_init)(void); int (*arch_intr_init)(void); int (*arch_trap_init)(void); }; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index e0d4729..65e3394 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -44,12 +44,22 @@ struct x86_init_resources { }; /** + * struct x86_init_irqs - platform specific interrupt setup + * @pre_vector_init: init code to run before interrupt vectors + * are set up. + */ +struct x86_init_irqs { + void (*pre_vector_init)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ struct x86_init_ops { struct x86_init_resources resources; struct x86_init_mpparse mpparse; + struct x86_init_irqs irqs; }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 3dd5fd7..ec8b311 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -265,7 +265,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, - .arch_pre_intr_init = NULL, .arch_intr_init = NULL, .arch_trap_init = NULL, }; diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703..acdf088 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -116,7 +116,7 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } -static void __init init_ISA_irqs(void) +void __init init_ISA_irqs(void) { int i; @@ -213,32 +213,12 @@ static void __init apic_intr_init(void) #endif } -/** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -static void __init x86_quirk_pre_intr_init(void) -{ -#ifdef CONFIG_X86_32 - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } -#endif - init_ISA_irqs(); -} - void __init native_init_IRQ(void) { int i; /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); + x86_init.irqs.pre_vector_init(); apic_intr_init(); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31e8281..1d6309d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -73,14 +73,10 @@ static int __init visws_time_init(void) return 0; } -static int __init visws_pre_intr_init(void) +/* Replaces the default init_ISA_irqs in the generic setup */ +static void __init visws_pre_intr_init(void) { init_VISWS_APIC_irqs(); - - /* - * We dont want ISA irqs to be set up by the generic code: - */ - return 1; } /* Quirk for machine specific memory setup. */ @@ -232,7 +228,6 @@ static int visws_trap_init(void); static struct x86_quirks visws_x86_quirks __initdata = { .arch_time_init = visws_time_init, - .arch_pre_intr_init = visws_pre_intr_init, .arch_intr_init = NULL, .arch_trap_init = visws_trap_init, }; @@ -257,6 +252,7 @@ void __init visws_early_detect(void) x86_init.resources.memory_setup = visws_memory_setup; x86_init.mpparse.get_smp_config = visws_get_smp_config; x86_init.mpparse.find_smp_config = visws_find_smp_config; + x86_init.irqs.pre_vector_init = visws_pre_intr_init; /* * Install reboot quirks: diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3488fb6..f2abe21 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -9,6 +9,7 @@ #include #include #include +#include void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } @@ -35,4 +36,8 @@ struct __initdata x86_init_ops x86_init = { .find_smp_config = default_find_smp_config, .get_smp_config = default_get_smp_config, }, + + .irqs = { + .pre_vector_init = init_ISA_irqs, + }, }; -- cgit v1.1 From 66bcaf0bde100a4b54b82fc6fea6ceee2212ffb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 09:59:09 +0200 Subject: x86: Move irq_init to x86_init_ops irq_init is overridden by x86_quirks and by paravirts. Unify the whole mess and make it an unconditional x86_init_ops function which defaults to the standard function and can be overridden by the early platform code. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq.h | 1 - arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/include/asm/setup.h | 3 --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/numaq_32.c | 1 - arch/x86/kernel/irqinit.c | 12 ++++-------- arch/x86/kernel/paravirt.c | 6 ------ arch/x86/kernel/setup.c | 17 ----------------- arch/x86/kernel/visws_quirks.c | 1 - arch/x86/kernel/x86_init.c | 1 + arch/x86/lguest/boot.c | 2 +- arch/x86/xen/irq.c | 5 +++-- 12 files changed, 11 insertions(+), 42 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 8fe2782..ddda6cb 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -37,7 +37,6 @@ extern void fixup_irqs(void); #endif extern void (*generic_interrupt_extension)(void); -extern void init_IRQ(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6d66896..25922afb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -201,8 +201,6 @@ struct pv_cpu_ops { }; struct pv_irq_ops { - void (*init_IRQ)(void); - /* * Get/set interrupt state. save_fl and restore_fl are only * expected to use X86_EFLAGS_IF; all other bits diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 66a3197..404086f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -16,12 +16,9 @@ struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); - int (*arch_intr_init)(void); int (*arch_trap_init)(void); }; -extern void x86_quirk_intr_init(void); - extern void x86_quirk_trap_init(void); extern void x86_quirk_pre_time_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 65e3394..8d7be65 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -47,9 +47,11 @@ struct x86_init_resources { * struct x86_init_irqs - platform specific interrupt setup * @pre_vector_init: init code to run before interrupt vectors * are set up. + * @intr_init: interrupt init code */ struct x86_init_irqs { void (*pre_vector_init)(void); + void (*intr_init)(void); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index ec8b311..eafd341 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -265,7 +265,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, - .arch_intr_init = NULL, .arch_trap_init = NULL, }; diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index acdf088..e0142cd 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -140,8 +140,10 @@ void __init init_ISA_irqs(void) } } -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); +void init_IRQ(void) +{ + x86_init.irqs.intr_init(); +} static void __init smp_intr_init(void) { @@ -238,12 +240,6 @@ void __init native_init_IRQ(void) #ifdef CONFIG_X86_32 /* - * Call quirks after call gates are initialised (usually add in - * the architecture specific gates): - */ - x86_quirk_intr_init(); - - /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 532c9a2..d76bfbe 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -183,11 +183,6 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, return insn_len; } -void init_IRQ(void) -{ - pv_irq_ops.init_IRQ(); -} - static void native_flush_tlb(void) { __native_flush_tlb(); @@ -328,7 +323,6 @@ struct pv_time_ops pv_time_ops = { }; struct pv_irq_ops pv_irq_ops = { - .init_IRQ = native_init_IRQ, .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 54043cb..d3da0f7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1021,23 +1021,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * x86_quirk_intr_init - post gate setup interrupt initialisation - * - * Description: - * Fill in any interrupts that may have been left out by the general - * init_IRQ() routine. interrupts having to do with the machine rather - * than the devices on the I/O bus (like APIC interrupts in intel MP - * systems) are started here. - **/ -void __init x86_quirk_intr_init(void) -{ - if (x86_quirks->arch_intr_init) { - if (x86_quirks->arch_intr_init()) - return; - } -} - -/** * x86_quirk_trap_init - initialise system specific traps * * Description: diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 1d6309d..a490137 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -228,7 +228,6 @@ static int visws_trap_init(void); static struct x86_quirks visws_x86_quirks __initdata = { .arch_time_init = visws_time_init, - .arch_intr_init = NULL, .arch_trap_init = visws_trap_init, }; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index f2abe21..8cb5933 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -39,5 +39,6 @@ struct __initdata x86_init_ops x86_init = { .irqs = { .pre_vector_init = init_ISA_irqs, + .intr_init = native_init_IRQ, }, }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 11445c1..1ff98651 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1262,7 +1262,6 @@ __init void lguest_init(void) */ /* Interrupt-related operations */ - pv_irq_ops.init_IRQ = lguest_init_IRQ; pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); @@ -1325,6 +1324,7 @@ __init void lguest_init(void) pv_time_ops.get_tsc_khz = lguest_tsc_khz; x86_init.resources.memory_setup = lguest_memory_setup; + x86_init.irqs.intr_init = lguest_init_IRQ; /* * Now is a good time to look at the implementations of these functions diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index cfd1779..9d30105 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -112,8 +114,6 @@ static void xen_halt(void) } static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = xen_init_IRQ, - .save_fl = PV_CALLEE_SAVE(xen_save_fl), .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), @@ -129,4 +129,5 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { void __init xen_init_irq_ops() { pv_irq_ops = xen_irq_ops; + x86_init.irqs.intr_init = xen_init_IRQ; } -- cgit v1.1 From 428cf9025b15573e16e658032f2b963283e34ae0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 10:35:46 +0200 Subject: x86: Move traps_init to x86_init_ops Replace the quirks by a simple x86_init_ops function. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/setup.h | 3 --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/numaq_32.c | 1 - arch/x86/kernel/setup.c | 15 --------------- arch/x86/kernel/traps.c | 5 ++--- arch/x86/kernel/visws_quirks.c | 8 +++----- arch/x86/kernel/x86_init.c | 1 + 7 files changed, 8 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 404086f..7751d1f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -16,11 +16,8 @@ struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); - int (*arch_trap_init)(void); }; -extern void x86_quirk_trap_init(void); - extern void x86_quirk_pre_time_init(void); extern void x86_quirk_time_init(void); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8d7be65..07c37bd 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -48,10 +48,12 @@ struct x86_init_resources { * @pre_vector_init: init code to run before interrupt vectors * are set up. * @intr_init: interrupt init code + * @trap_init: platform specific trap setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); + void (*trap_init)(void); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index eafd341..71c5ea6 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -265,7 +265,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, - .arch_trap_init = NULL, }; static __init void early_check_numaq(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3da0f7..bf3b87f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1020,21 +1020,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 -/** - * x86_quirk_trap_init - initialise system specific traps - * - * Description: - * Called as the final act of trap_init(). Used in VISWS to initialise - * the various board specific APIC traps. - **/ -void __init x86_quirk_trap_init(void) -{ - if (x86_quirks->arch_trap_init) { - if (x86_quirks->arch_trap_init()) - return; - } -} - static struct irqaction irq0 = { .handler = timer_interrupt, .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e4b1f5..ed96ed5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,6 +59,7 @@ #include #ifdef CONFIG_X86_64 +#include #include #include #else @@ -980,7 +981,5 @@ void __init trap_init(void) */ cpu_init(); -#ifdef CONFIG_X86_32 - x86_quirk_trap_init(); -#endif + x86_init.irqs.trap_init(); } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index a490137..2719091 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -224,11 +224,10 @@ static void __init visws_find_smp_config(unsigned int reserve) mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; } -static int visws_trap_init(void); +static void visws_trap_init(void); static struct x86_quirks visws_x86_quirks __initdata = { .arch_time_init = visws_time_init, - .arch_trap_init = visws_trap_init, }; void __init visws_early_detect(void) @@ -252,6 +251,7 @@ void __init visws_early_detect(void) x86_init.mpparse.get_smp_config = visws_get_smp_config; x86_init.mpparse.find_smp_config = visws_find_smp_config; x86_init.irqs.pre_vector_init = visws_pre_intr_init; + x86_init.irqs.trap_init = visws_trap_init; /* * Install reboot quirks: @@ -390,12 +390,10 @@ static __init void cobalt_init(void) co_apic_read(CO_APIC_ID)); } -static int __init visws_trap_init(void) +static void __init visws_trap_init(void) { lithium_init(); cobalt_init(); - - return 1; } /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 8cb5933..9f2b775 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -40,5 +40,6 @@ struct __initdata x86_init_ops x86_init = { .irqs = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, + .trap_init = x86_init_noop, }, }; -- cgit v1.1 From 42bbdb43b16d233b2dacb4cd76e28f61c2a86dc6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 13:04:10 +0200 Subject: x86: Replace ARCH_SETUP by a proper x86_init_ops ARCH_SETUP is a horrible leftover from the old arch/i386 mach support code. It still has a lonely user in xen. Move it to x86_init_ops. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 1 - arch/x86/include/asm/paravirt_types.h | 1 - arch/x86/include/asm/x86_init.h | 9 +++++++++ arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/setup.c | 6 +----- arch/x86/kernel/x86_init.c | 4 ++++ arch/x86/xen/enlighten.c | 2 +- 7 files changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6a07af4..22cb387 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -24,7 +24,6 @@ static inline void load_sp0(struct tss_struct *tss, PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); } -#define ARCH_SETUP pv_init_ops.arch_setup(); static inline unsigned long get_wallclock(void) { return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 25922afb..a05085e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -80,7 +80,6 @@ struct pv_init_ops { unsigned long addr, unsigned len); /* Basic arch-specific setup */ - void (*arch_setup)(void); void (*post_allocator_init)(void); /* Print a banner to identify the environment */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 07c37bd..ceffbf3 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -57,6 +57,14 @@ struct x86_init_irqs { }; /** + * struct x86_init_oem - oem platform specific customizing functions + * @arch_setup: platform specific architecure setup + */ +struct x86_init_oem { + void (*arch_setup)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -64,6 +72,7 @@ struct x86_init_ops { struct x86_init_resources resources; struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; + struct x86_init_oem oem; }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d76bfbe..80275ef 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -311,7 +311,6 @@ struct pv_info pv_info = { struct pv_init_ops pv_init_ops = { .patch = native_patch, .banner = default_banner, - .arch_setup = paravirt_nop, }; struct pv_time_ops pv_time_ops = { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bf3b87f..d12aa82 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -108,10 +108,6 @@ #include #endif -#ifndef ARCH_SETUP -#define ARCH_SETUP -#endif - /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. * The direct mapping extends to max_pfn_mapped, so that we can directly access @@ -750,7 +746,7 @@ void __init setup_arch(char **cmdline_p) } #endif - ARCH_SETUP + x86_init.oem.arch_setup(); setup_memory_map(); parse_setup_data(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f2b775..fa2d849 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -42,4 +42,8 @@ struct __initdata x86_init_ops x86_init = { .intr_init = native_init_IRQ, .trap_init = x86_init_noop, }, + + .oem = { + .arch_setup = x86_init_noop, + }, }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50b20c6..73c7b1d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -841,7 +841,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, .banner = xen_banner, - .arch_setup = xen_arch_setup, .post_allocator_init = xen_post_allocator_init, }; @@ -982,6 +981,7 @@ asmlinkage void __init xen_start_kernel(void) pv_mmu_ops = xen_mmu_ops; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.oem.arch_setup = xen_arch_setup; #ifdef CONFIG_X86_64 /* -- cgit v1.1 From 6f30c1ac3fcf11e08f00670f293546a112cdf4e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 13:19:57 +0200 Subject: x86: Move paravirt banner printout to x86_init_ops Replace another obscure paravirt magic and move it to x86_init_ops. Such a hook is also useful for embedded and special hardware. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 6 +++++- arch/x86/include/asm/paravirt_types.h | 3 --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/paravirt.c | 10 +--------- arch/x86/kernel/setup.c | 1 + arch/x86/kernel/x86_init.c | 2 ++ arch/x86/xen/enlighten.c | 2 +- 7 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 22cb387..3de6435 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -918,6 +918,8 @@ static inline unsigned long __raw_local_irq_save(void) #undef PVOP_VCALL4 #undef PVOP_CALL4 +extern void default_banner(void); + #else /* __ASSEMBLY__ */ #define _PVSITE(ptype, clobbers, ops, word, algn) \ @@ -1058,5 +1060,7 @@ static inline unsigned long __raw_local_irq_save(void) #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ +#else /* CONFIG_PARAVIRT */ +# define default_banner x86_init_noop +#endif /* !CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a05085e..ce7723c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -81,9 +81,6 @@ struct pv_init_ops { /* Basic arch-specific setup */ void (*post_allocator_init)(void); - - /* Print a banner to identify the environment */ - void (*banner)(void); }; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ceffbf3..ee7c59d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -59,9 +59,11 @@ struct x86_init_irqs { /** * struct x86_init_oem - oem platform specific customizing functions * @arch_setup: platform specific architecure setup + * @banner: print a platform specific banner */ struct x86_init_oem { void (*arch_setup)(void); + void (*banner)(void); }; /** diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 80275ef..f7a5fb7 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -54,7 +54,7 @@ u64 _paravirt_ident_64(u64 x) return x; } -static void __init default_banner(void) +void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); @@ -208,13 +208,6 @@ extern void native_irq_enable_sysexit(void); extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); -static int __init print_banner(void) -{ - pv_init_ops.banner(); - return 0; -} -core_initcall(print_banner); - static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, @@ -310,7 +303,6 @@ struct pv_info pv_info = { struct pv_init_ops pv_init_ops = { .patch = native_patch, - .banner = default_banner, }; struct pv_time_ops pv_time_ops = { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d12aa82..bc5f0e5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1012,6 +1012,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + x86_init.oem.banner(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index fa2d849..08fea49 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -45,5 +46,6 @@ struct __initdata x86_init_ops x86_init = { .oem = { .arch_setup = x86_init_noop, + .banner = default_banner, }, }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 73c7b1d..46e23cd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -840,7 +840,6 @@ static const struct pv_info xen_info __initdata = { static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, - .banner = xen_banner, .post_allocator_init = xen_post_allocator_init, }; @@ -982,6 +981,7 @@ asmlinkage void __init xen_start_kernel(void) x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; + x86_init.oem.banner = xen_banner; #ifdef CONFIG_X86_64 /* -- cgit v1.1 From 030cb6c00d242c20e92a3327d0cac17ce02d0cc3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 14:30:02 +0200 Subject: x86: Move paravirt pagetable_setup to x86_init_ops Replace more paravirt hackery by proper x86_init_ops. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 10 ---------- arch/x86/include/asm/paravirt_types.h | 9 --------- arch/x86/include/asm/pgtable.h | 10 ---------- arch/x86/include/asm/pgtable_types.h | 4 ++-- arch/x86/include/asm/x86_init.h | 13 +++++++++++++ arch/x86/kernel/paravirt.c | 7 ------- arch/x86/kernel/setup.c | 4 ++-- arch/x86/kernel/x86_init.c | 6 ++++++ arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/mmu.c | 11 +++++++---- arch/x86/xen/mmu.h | 2 +- 11 files changed, 32 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 3de6435..1caf25b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -351,16 +351,6 @@ static inline void paravirt_post_allocator_init(void) (*pv_init_ops.post_allocator_init)(); } -static inline void paravirt_pagetable_setup_start(pgd_t *base) -{ - (*pv_mmu_ops.pagetable_setup_start)(base); -} - -static inline void paravirt_pagetable_setup_done(pgd_t *base) -{ - (*pv_mmu_ops.pagetable_setup_done)(base); -} - #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce7723c..4039eef 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -231,15 +231,6 @@ struct pv_apic_ops { }; struct pv_mmu_ops { - /* - * Called before/after init_mm pagetable setup. setup_start - * may reset %cr3, and may pre-install parts of the pagetable; - * pagetable setup is expected to preserve any existing - * mapping. - */ - void (*pagetable_setup_start)(pgd_t *pgd_base); - void (*pagetable_setup_done)(pgd_t *pgd_base); - unsigned long (*read_cr2)(void); void (*write_cr2)(unsigned long); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1674807..60d422a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -56,16 +56,6 @@ extern struct list_head pgd_list; #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) -static inline void __init paravirt_pagetable_setup_start(pgd_t *base) -{ - native_pagetable_setup_start(base); -} - -static inline void __init paravirt_pagetable_setup_done(pgd_t *base) -{ - native_pagetable_setup_done(base); -} - #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 54cb697..7b467bf 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -299,8 +299,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_setup_start(pgd_t *base); extern void native_pagetable_setup_done(pgd_t *base); #else -static inline void native_pagetable_setup_start(pgd_t *base) {} -static inline void native_pagetable_setup_done(pgd_t *base) {} +#define native_pagetable_setup_start x86_init_pgd_noop +#define native_pagetable_setup_done x86_init_pgd_noop #endif struct seq_file; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ee7c59d..b9bb4fa 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PLATFORM_H #define _ASM_X86_PLATFORM_H +#include + struct mpc_bus; struct mpc_cpu; struct mpc_table; @@ -67,6 +69,16 @@ struct x86_init_oem { }; /** + * struct x86_init_paging - platform specific paging functions + * @pagetable_setup_start: platform specific pre paging_init() call + * @pagetable_setup_done: platform specific post paging_init() call + */ +struct x86_init_paging { + void (*pagetable_setup_start)(pgd_t *base); + void (*pagetable_setup_done)(pgd_t *base); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -75,6 +87,7 @@ struct x86_init_ops { struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; struct x86_init_oem oem; + struct x86_init_paging paging; }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f7a5fb7..8167be0 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -402,13 +402,6 @@ struct pv_apic_ops pv_apic_ops = { #endif struct pv_mmu_ops pv_mmu_ops = { -#ifndef CONFIG_X86_64 - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, -#else - .pagetable_setup_start = paravirt_nop, - .pagetable_setup_done = paravirt_nop, -#endif .read_cr2 = native_read_cr2, .write_cr2 = native_write_cr2, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bc5f0e5..4952d63 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -959,9 +959,9 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif - paravirt_pagetable_setup_start(swapper_pg_dir); + x86_init.paging.pagetable_setup_start(swapper_pg_dir); paging_init(); - paravirt_pagetable_setup_done(swapper_pg_dir); + x86_init.paging.pagetable_setup_done(swapper_pg_dir); paravirt_post_allocator_init(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 08fea49..7df020e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,6 +14,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } +void __init x86_init_pgd_noop(pgd_t *unused) { } /* * The platform setup functions are preset with the default functions @@ -48,4 +49,9 @@ struct __initdata x86_init_ops x86_init = { .arch_setup = x86_init_noop, .banner = default_banner, }, + + .paging = { + .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_setup_done = native_pagetable_setup_done, + }, }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46e23cd..12ea09e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -977,7 +977,6 @@ asmlinkage void __init xen_start_kernel(void) pv_time_ops = xen_time_ops; pv_cpu_ops = xen_cpu_ops; pv_apic_ops = xen_apic_ops; - pv_mmu_ops = xen_mmu_ops; x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; @@ -991,6 +990,7 @@ asmlinkage void __init xen_start_kernel(void) load_percpu_segment(0); #endif + xen_init_mmu_ops(); xen_init_irq_ops(); xen_init_cpuid_mask(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4ceb285..dbec51d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1875,10 +1875,7 @@ static void xen_leave_lazy_mmu(void) preempt_enable(); } -const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - +static const struct pv_mmu_ops xen_mmu_ops __initdata = { .read_cr2 = xen_read_cr2, .write_cr2 = xen_write_cr2, @@ -1954,6 +1951,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_fixmap = xen_set_fixmap, }; +void __init xen_init_mmu_ops(void) +{ + x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; + x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; + pv_mmu_ops = xen_mmu_ops; +} #ifdef CONFIG_XEN_DEBUG_FS diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index da73026..5fe6bc7 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -59,5 +59,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, unsigned long xen_read_cr2_direct(void); -extern const struct pv_mmu_ops xen_mmu_ops; +extern void xen_init_mmu_ops(void); #endif /* _XEN_MMU_H */ -- cgit v1.1 From f1d7062a235d057e5d85ed2860bef609e0160cde Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 13:13:52 +0200 Subject: x86: Move xen_post_allocator_init into xen_pagetable_setup_done We really do not need two paravirt/x86_init_ops functions which are called in two consecutive source lines. Move the only user of post_allocator_init into the already existing pagetable_setup_done function. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 6 ------ arch/x86/include/asm/paravirt_types.h | 3 --- arch/x86/include/asm/setup.h | 4 ---- arch/x86/kernel/setup.c | 1 - arch/x86/xen/enlighten.c | 2 -- arch/x86/xen/mmu.c | 5 ++++- arch/x86/xen/xen-ops.h | 2 -- 7 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 1caf25b..7ce415e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -345,12 +345,6 @@ static inline void setup_secondary_clock(void) } #endif -static inline void paravirt_post_allocator_init(void) -{ - if (pv_init_ops.post_allocator_init) - (*pv_init_ops.post_allocator_init)(); -} - #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4039eef..ecc74e5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -78,9 +78,6 @@ struct pv_init_ops { */ unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, unsigned long addr, unsigned len); - - /* Basic arch-specific setup */ - void (*post_allocator_init)(void); }; diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7751d1f..58b5895 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -63,10 +63,6 @@ static inline int is_visws_box(void) { return 0; } extern struct x86_quirks *x86_quirks; extern unsigned long saved_video_mode; -#ifndef CONFIG_PARAVIRT -#define paravirt_post_allocator_init() do {} while (0) -#endif - extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4952d63..43ec6aa 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p) x86_init.paging.pagetable_setup_start(swapper_pg_dir); paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); - paravirt_post_allocator_init(); #ifdef CONFIG_X86_64 map_vsyscall(); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 12ea09e..a924caa 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -839,8 +839,6 @@ static const struct pv_info xen_info __initdata = { static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, - - .post_allocator_init = xen_post_allocator_init, }; static const struct pv_time_ops xen_time_ops __initdata = { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dbec51d..093dd59 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1229,9 +1229,12 @@ static __init void xen_pagetable_setup_start(pgd_t *base) { } +static void xen_post_allocator_init(void); + static __init void xen_pagetable_setup_done(pgd_t *base) { xen_setup_shared_info(); + xen_post_allocator_init(); } static void xen_write_cr2(unsigned long cr2) @@ -1841,7 +1844,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } -__init void xen_post_allocator_init(void) +static __init void xen_post_allocator_init(void) { pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 22494fd..355fa6b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -30,8 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); void xen_reserve_top(void); -void xen_post_allocator_init(void); - char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); -- cgit v1.1 From 736decac643e8982655e22ac7f0e5e61c5b7f9bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 12:35:53 +0200 Subject: x86: Move percpu clockevents setup to x86_init_ops paravirt overrides the setup of the default apic timers as per cpu timers. Moorestown needs to override that as well. Move it to x86_init_ops setup and create a separate x86_cpuinit struct which holds the function for the secondary evtl. hotplugabble CPUs. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 5 ++--- arch/x86/include/asm/paravirt.h | 12 ------------ arch/x86/include/asm/paravirt_types.h | 3 --- arch/x86/include/asm/x86_init.h | 19 +++++++++++++++++++ arch/x86/kernel/apic/apic.c | 3 ++- arch/x86/kernel/kvmclock.c | 5 ++++- arch/x86/kernel/paravirt.c | 2 -- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/vmi_32.c | 4 ++-- arch/x86/kernel/x86_init.c | 9 +++++++++ arch/x86/xen/enlighten.c | 4 ++-- 11 files changed, 42 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bb7d479..6f15b29 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -70,9 +70,6 @@ static inline void default_inquire_remote_apic(int apicid) */ #ifdef CONFIG_PARAVIRT #include -#else -#define setup_boot_clock setup_boot_APIC_clock -#define setup_secondary_clock setup_secondary_APIC_clock #endif #ifdef CONFIG_X86_64 @@ -245,6 +242,8 @@ static inline void lapic_shutdown(void) { } static inline void init_apic_mappings(void) { } static inline void disable_local_APIC(void) { } static inline void apic_disable(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7ce415e..825674a 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -333,18 +333,6 @@ static inline void slow_down_io(void) #endif } -#ifdef CONFIG_X86_LOCAL_APIC -static inline void setup_boot_clock(void) -{ - PVOP_VCALL0(pv_apic_ops.setup_boot_clock); -} - -static inline void setup_secondary_clock(void) -{ - PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); -} -#endif - #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ecc74e5..1da8927 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -218,9 +218,6 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC - void (*setup_boot_clock)(void); - void (*setup_secondary_clock)(void); - void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b9bb4fa..b7d258f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -79,6 +79,15 @@ struct x86_init_paging { }; /** + * struct x86_init_timers - platform specific timer setup + * @setup_perpcu_clockev: set up the per cpu clock event device for the + * boot cpu + */ +struct x86_init_timers { + void (*setup_percpu_clockev)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -88,9 +97,19 @@ struct x86_init_ops { struct x86_init_irqs irqs; struct x86_init_oem oem; struct x86_init_paging paging; + struct x86_init_timers timers; +}; + +/** + * struct x86_cpuinit_ops - platform specific cpu hotplug setups + * @setup_percpu_clockev: set up the per cpu clock event device + */ +struct x86_cpuinit_ops { + void (*setup_percpu_clockev)(void); }; extern struct x86_init_ops x86_init; +extern struct x86_cpuinit_ops x86_cpuinit; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0a1c283..ce00980 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -1701,7 +1702,7 @@ int __init APIC_init_uniprocessor(void) localise_nmi_watchdog(); #endif - setup_boot_clock(); + x86_init.timers.setup_percpu_clockev(); #ifdef CONFIG_X86_64 check_nmi_watchdog(); #endif diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 223af43..64e9b5f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -22,6 +22,8 @@ #include #include #include + +#include #include #define KVM_SCALE 22 @@ -187,7 +189,8 @@ void __init kvmclock_init(void) pv_time_ops.sched_clock = kvm_clock_read; pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; #ifdef CONFIG_X86_LOCAL_APIC - pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + x86_cpuinit.setup_percpu_clockev = + kvm_setup_secondary_clock; #endif #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8167be0..1ed32c7 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -387,8 +387,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .setup_boot_clock = setup_boot_APIC_clock, - .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, #endif }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda6..6eb81a8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -323,7 +323,7 @@ notrace static void __cpuinit start_secondary(void *unused) /* enable local interrupts */ local_irq_enable(); - setup_secondary_clock(); + x86_cpuinit.setup_percpu_clockev(); wmb(); cpu_idle(); @@ -1112,7 +1112,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); - setup_boot_clock(); + x86_init.timers.setup_percpu_clockev(); if (is_uv_system()) uv_system_init(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95a7289..b43b668 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -821,8 +821,8 @@ static inline int __init activate_vmi(void) pv_time_ops.get_wallclock = vmi_get_wallclock; pv_time_ops.set_wallclock = vmi_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC - pv_apic_ops.setup_boot_clock = vmi_time_bsp_init; - pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; + x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; + x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; #endif pv_time_ops.sched_clock = vmi_sched_clock; pv_time_ops.get_tsc_khz = vmi_tsc_khz; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7df020e..e666a98 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -54,4 +55,12 @@ struct __initdata x86_init_ops x86_init = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, }, + + .timers = { + .setup_percpu_clockev = setup_boot_APIC_clock, + }, +}; + +__cpuinitdata struct x86_cpuinit_ops x86_cpuinit = { + .setup_percpu_clockev = setup_secondary_APIC_clock, }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a924caa..14e597e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -912,8 +912,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC - .setup_boot_clock = paravirt_nop, - .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, #endif }; @@ -979,6 +977,8 @@ asmlinkage void __init xen_start_kernel(void) x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + x86_cpuinit.setup_percpu_clockev = x86_init_noop; #ifdef CONFIG_X86_64 /* -- cgit v1.1 From 845b3944bbdf9e9247849bf037f27ff3a3f26d87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 15:37:03 +0200 Subject: x86: Add timer_init to x86_init_ops The timer init code is convoluted with several quirks and the paravirt timer chooser. Figuring out which code path is actually taken is not for the faint hearted. Move the numaq TSC quirk to tsc_pre_init x86_init_ops function and replace the paravirt time chooser and the remaining x86 quirk with a simple x86_init_ops function. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 5 ---- arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/include/asm/setup.h | 21 ++--------------- arch/x86/include/asm/time.h | 1 - arch/x86/include/asm/timer.h | 3 +-- arch/x86/include/asm/x86_init.h | 4 ++++ arch/x86/kernel/apic/numaq_32.c | 10 ++------ arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/setup.c | 43 ----------------------------------- arch/x86/kernel/time_32.c | 34 ++++++++++++++++++--------- arch/x86/kernel/time_64.c | 9 ++++++-- arch/x86/kernel/tsc.c | 2 ++ arch/x86/kernel/visws_quirks.c | 20 ++++------------ arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/x86_init.c | 3 +++ arch/x86/lguest/boot.c | 2 +- arch/x86/xen/enlighten.c | 4 ++-- 17 files changed, 53 insertions(+), 113 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 825674a..11a4ba7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -34,11 +34,6 @@ static inline int set_wallclock(unsigned long nowtime) return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); } -static inline void (*choose_time_init(void))(void) -{ - return pv_time_ops.time_init; -} - /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 1da8927..0d812e5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -88,8 +88,6 @@ struct pv_lazy_ops { }; struct pv_time_ops { - void (*time_init)(void); - /* Set and set time of day */ unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 58b5895..861e1fe 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -5,24 +5,6 @@ #define COMMAND_LINE_SIZE 2048 -#ifndef __ASSEMBLY__ - -#include - -/* - * Any setup quirks to be performed? - */ - -struct x86_quirks { - int (*arch_pre_time_init)(void); - int (*arch_time_init)(void); -}; - -extern void x86_quirk_pre_time_init(void); -extern void x86_quirk_time_init(void); - -#endif /* __ASSEMBLY__ */ - #ifdef __i386__ #include @@ -42,6 +24,7 @@ extern void x86_quirk_time_init(void); #ifndef __ASSEMBLY__ #include +#include /* Interrupt control for vSMPowered x86_64 systems */ #ifdef CONFIG_X86_64 @@ -60,11 +43,11 @@ static inline void visws_early_detect(void) { } static inline int is_visws_box(void) { return 0; } #endif -extern struct x86_quirks *x86_quirks; extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); +extern void setup_default_timer_irq(void); #ifndef _SETUP diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 50c733a..91bb162 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -54,7 +54,6 @@ extern void time_init(void); #define get_wallclock() native_get_wallclock() #define set_wallclock(x) native_set_wallclock(x) -#define choose_time_init() hpet_time_init #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 20ca9c4..e854c7a 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,8 +12,7 @@ unsigned long native_calibrate_tsc(void); #ifdef CONFIG_X86_32 extern int timer_ack; -extern irqreturn_t timer_interrupt(int irq, void *dev_id); -#endif /* CONFIG_X86_32 */ +#endif extern int recalibrate_cpu_khz(void); extern int no_timer_check; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b7d258f..f8bdd22 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -82,9 +82,13 @@ struct x86_init_paging { * struct x86_init_timers - platform specific timer setup * @setup_perpcu_clockev: set up the per cpu clock event device for the * boot cpu + * @tsc_pre_init: platform function called before TSC init + * @timer_init: initialize the platform timer (default PIT/HPET) */ struct x86_init_timers { void (*setup_percpu_clockev)(void); + void (*tsc_pre_init)(void); + void (*timer_init)(void); }; /** diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 71c5ea6..f1ebed6b 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -129,10 +129,9 @@ void __cpuinit numaq_tsc_disable(void) } } -static int __init numaq_pre_time_init(void) +static void __init numaq_tsc_init(void) { numaq_tsc_disable(); - return 0; } static inline int generate_logical_apicid(int quad, int phys_apicid) @@ -262,11 +261,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) } } -static struct x86_quirks numaq_x86_quirks __initdata = { - .arch_pre_time_init = numaq_pre_time_init, - .arch_time_init = NULL, -}; - static __init void early_check_numaq(void) { /* @@ -281,13 +275,13 @@ static __init void early_check_numaq(void) early_get_smp_config(); if (found_numaq) { - x86_quirks = &numaq_x86_quirks; x86_init.mpparse.mpc_record = numaq_mpc_record; x86_init.mpparse.setup_ioapic_ids = x86_init_noop; x86_init.mpparse.mpc_apic_id = mpc_apic_id; x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; + x86_init.timers.tsc_pre_init = numaq_tsc_init; } } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1ed32c7..9c0e644 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -306,7 +306,6 @@ struct pv_init_ops pv_init_ops = { }; struct pv_time_ops pv_time_ops = { - .time_init = hpet_time_init, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, .sched_clock = native_sched_clock, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 43ec6aa..bb207a4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -626,10 +626,6 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif -static struct x86_quirks default_x86_quirks __initdata; - -struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; - #ifdef CONFIG_X86_RESERVE_LOW_64K static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { @@ -1016,45 +1012,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .name = "timer" -}; - -/** - * x86_quirk_pre_time_init - do any specific initialisations before. - * - **/ -void __init x86_quirk_pre_time_init(void) -{ - if (x86_quirks->arch_pre_time_init) - x86_quirks->arch_pre_time_init(); -} - -/** - * x86_quirk_time_init - do any specific initialisations for the system timer. - * - * Description: - * Must plug the system timer interrupt source at HZ into the IRQ listed - * in irq_vectors.h:TIMER_IRQ - **/ -void __init x86_quirk_time_init(void) -{ - if (x86_quirks->arch_time_init) { - /* - * A nonzero return code does not mean failure, it means - * that the architecture quirk does not want any - * generic (timer) setup to be performed after this: - */ - if (x86_quirks->arch_time_init()) - return; - } - - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - static struct resource video_ram_resource = { .name = "Video RAM area", .start = 0xa0000, diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 5c5d87f..89bbb52 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -72,7 +72,7 @@ EXPORT_SYMBOL(profile_pc); * Time Stamp Counter value at the time of the timer interrupt, so that * we later on can estimate the time of day more exactly. */ -irqreturn_t timer_interrupt(int irq, void *dev_id) +static irqreturn_t timer_interrupt(int irq, void *dev_id) { /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); @@ -113,25 +113,37 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -/* Duplicate of time_init() below, with hpet_enable part added */ +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, + .name = "timer" +}; + +void __init setup_default_timer_irq(void) +{ + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +/* Default timer init function */ void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - x86_quirk_time_init(); + setup_default_timer_irq(); +} + +static void x86_late_time_init(void) +{ + x86_init.timers.timer_init(); } /* - * This is called directly from init code; we must delay timer setup in the - * HPET case as we can't make the decision to turn on HPET this early in the - * boot process. - * - * The chosen time_init function will usually be hpet_time_init, above, but - * in the case of virtual hardware, an alternative function may be substituted. + * Initialize TSC and delay the periodic timer init to + * late x86_late_time_init() so ioremap works. */ void __init time_init(void) { - x86_quirk_pre_time_init(); tsc_init(); - late_time_init = choose_time_init(); + late_time_init = x86_late_time_init; } diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 5ba343e..38a7df9 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -127,9 +128,13 @@ void __init hpet_time_init(void) setup_irq(0, &irq0); } +static void x86_late_time_init(void) +{ + x86_init.timers.timer_init(); +} + void __init time_init(void) { tsc_init(); - - late_time_init = choose_time_init(); + late_time_init = x86_late_time_init; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368..652bc21 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -857,6 +857,8 @@ void __init tsc_init(void) u64 lpj; int cpu; + x86_init.timers.tsc_pre_init(); + if (!cpu_has_tsc) return; diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 2719091..f068553 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,7 @@ int is_visws_box(void) return visws_board_type >= 0; } -static int __init visws_time_init(void) +static void __init visws_time_init(void) { printk(KERN_INFO "Starting Cobalt Timer system clock\n"); @@ -66,11 +67,7 @@ static int __init visws_time_init(void) /* Enable (unmask) the timer interrupt */ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); - /* - * Zero return means the generic timer setup code will set up - * the standard vector: - */ - return 0; + setup_default_timer_irq(); } /* Replaces the default init_ISA_irqs in the generic setup */ @@ -226,10 +223,6 @@ static void __init visws_find_smp_config(unsigned int reserve) static void visws_trap_init(void); -static struct x86_quirks visws_x86_quirks __initdata = { - .arch_time_init = visws_time_init, -}; - void __init visws_early_detect(void) { int raw; @@ -241,17 +234,14 @@ void __init visws_early_detect(void) return; /* - * Install special quirks for timer, interrupt and memory setup: - * Fall back to generic behavior for traps: - * Override generic MP-table parsing: + * Override the default platform setup functions */ - x86_quirks = &visws_x86_quirks; - x86_init.resources.memory_setup = visws_memory_setup; x86_init.mpparse.get_smp_config = visws_get_smp_config; x86_init.mpparse.find_smp_config = visws_find_smp_config; x86_init.irqs.pre_vector_init = visws_pre_intr_init; x86_init.irqs.trap_init = visws_trap_init; + x86_init.timers.timer_init = visws_time_init; /* * Install reboot quirks: diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b43b668..cd7d0fb 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -817,7 +817,7 @@ static inline int __init activate_vmi(void) vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); vmi_timer_ops.cancel_alarm = vmi_get_function(VMI_CALL_CancelAlarm); - pv_time_ops.time_init = vmi_time_init; + x86_init.timers.timer_init = vmi_time_init; pv_time_ops.get_wallclock = vmi_get_wallclock; pv_time_ops.set_wallclock = vmi_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e666a98..4790b92 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -11,6 +11,7 @@ #include #include #include +#include #include void __cpuinit x86_init_noop(void) { } @@ -58,6 +59,8 @@ struct __initdata x86_init_ops x86_init = { .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, + .tsc_pre_init = x86_init_noop, + .timer_init = hpet_time_init, }, }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1ff98651..6caa8c0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1320,11 +1320,11 @@ __init void lguest_init(void) /* Time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; - pv_time_ops.time_init = lguest_time_init; pv_time_ops.get_tsc_khz = lguest_tsc_khz; x86_init.resources.memory_setup = lguest_memory_setup; x86_init.irqs.intr_init = lguest_init_IRQ; + x86_init.timers.timer_init = lguest_time_init; /* * Now is a good time to look at the implementations of these functions diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 14e597e..84826b8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -842,8 +842,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { }; static const struct pv_time_ops xen_time_ops __initdata = { - .time_init = xen_time_init, - .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, .get_tsc_khz = xen_tsc_khz, @@ -977,6 +975,8 @@ asmlinkage void __init xen_start_kernel(void) x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; + + x86_init.timers.timer_init = xen_time_init; x86_init.timers.setup_percpu_clockev = x86_init_noop; x86_cpuinit.setup_percpu_clockev = x86_init_noop; -- cgit v1.1 From ecce85089e6d31eed7535b68f5acdd194265690c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 15:28:50 +0200 Subject: x86: Remove do_timer hook This is a left over of the old x86 sub arch support. Remove it and open code it like we do in time_64.c Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/do_timer.h | 16 ---------------- arch/x86/kernel/time_32.c | 7 ++++--- 2 files changed, 4 insertions(+), 19 deletions(-) delete mode 100644 arch/x86/include/asm/do_timer.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/do_timer.h b/arch/x86/include/asm/do_timer.h deleted file mode 100644 index 23ecda0..0000000 --- a/arch/x86/include/asm/do_timer.h +++ /dev/null @@ -1,16 +0,0 @@ -/* defines for inline arch setup functions */ -#include - -#include -#include - -/** - * do_timer_interrupt_hook - hook into timer tick - * - * Call the pit clock event handler. see asm/i8253.h - **/ - -static inline void do_timer_interrupt_hook(void) -{ - global_clock_event->event_handler(global_clock_event); -} diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 89bbb52..6fef4ea 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -28,6 +28,7 @@ * serialize accesses to xtime/lost_ticks). */ +#include #include #include #include @@ -37,8 +38,8 @@ #include #include #include - -#include +#include +#include int timer_ack; @@ -92,7 +93,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) } #endif - do_timer_interrupt_hook(); + global_clock_event->event_handler(global_clock_event); #ifdef CONFIG_MCA if (MCA_bus) { -- cgit v1.1 From dd3e6e8c6e7a2294f137c4dbccb3e73e7fa8ba15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 15:35:23 +0200 Subject: x86: Prepare unification of time_32/64.c Unify the top comment and the includes. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_32.c | 44 +++++++++++++------------------------------- arch/x86/kernel/time_64.c | 13 +++++-------- 2 files changed, 18 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 6fef4ea..acbaefd 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -1,45 +1,27 @@ /* - * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * Copyright (c) 1991,1992,1995 Linus Torvalds + * Copyright (c) 1994 Alan Modra + * Copyright (c) 1995 Markus Kuhn + * Copyright (c) 1996 Ingo Molnar + * Copyright (c) 1998 Andrea Arcangeli + * Copyright (c) 2002,2006 Vojtech Pavlik + * Copyright (c) 2003 Andi Kleen * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update - * 1996-05-03 Ingo Molnar - * fixed time warps in do_[slow|fast]_gettimeoffset() - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-09-05 (Various) - * More robust do_fast_gettimeoffset() algorithm implemented - * (works with APM, Cyrix 6x86MX and Centaur C6), - * monotonic gettimeofday() with fast_get_timeoffset(), - * drift-proof precision TSC calibration on boot - * (C. Scott Ananian , Andrew D. - * Balsa , Philip Gladstone ; - * ported from 2.0.35 Jumbo-9 by Michael Krause ). - * 1998-12-16 Andrea Arcangeli - * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy - * because was not accounting lost_ticks. - * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli - * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). */ #include -#include #include #include #include -#include -#include -#include -#include +#include +#include #include #include +#include +#include +#include +#include int timer_ack; diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 38a7df9..45914f8 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -1,6 +1,4 @@ /* - * "High Precision Event Timer" based timekeeping. - * * Copyright (c) 1991,1992,1995 Linus Torvalds * Copyright (c) 1994 Alan Modra * Copyright (c) 1995 Markus Kuhn @@ -8,23 +6,22 @@ * Copyright (c) 1998 Andrea Arcangeli * Copyright (c) 2002,2006 Vojtech Pavlik * Copyright (c) 2003 Andi Kleen - * RTC support code taken from arch/i386/kernel/timers/time_hpet.c + * */ #include -#include #include -#include #include #include -#include +#include #include +#include #include +#include #include -#include #include -#include +#include volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -- cgit v1.1 From 64fcbac1f38882d8ae82c44a1c2a676cfa5e79e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 15:54:21 +0200 Subject: x86: Simplify timer_ack magic in time_32.c Let the compiler optimize the timer_ack magic away in the 32bit timer interrupt and put the same code into time_64.c. It's optimized out for CONFIG_X86_IO_APIC on 32bit and for 64bit because timer_ack is const 0 in both cases. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/timer.h | 6 ++++-- arch/x86/kernel/time_32.c | 5 +++-- arch/x86/kernel/time_64.c | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index e854c7a..65228cc 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -9,11 +9,13 @@ unsigned long long native_sched_clock(void); unsigned long native_calibrate_tsc(void); +extern int recalibrate_cpu_khz(void); -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) extern int timer_ack; +#else +# define timer_ack (0) #endif -extern int recalibrate_cpu_khz(void); extern int no_timer_check; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index acbaefd..7a26bcf 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -23,7 +23,9 @@ #include #include +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) int timer_ack; +#endif unsigned long profile_pc(struct pt_regs *regs) { @@ -60,7 +62,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); -#ifdef CONFIG_X86_IO_APIC + /* Optimized out for !IO_APIC and x86_64 */ if (timer_ack) { /* * Subtle, when I/O APICs are used we have to ack timer IRQ @@ -73,7 +75,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) inb(PIC_MASTER_POLL); spin_unlock(&i8259A_lock); } -#endif global_clock_event->event_handler(global_clock_event); diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 45914f8..35e0a92 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -51,6 +51,20 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) { inc_irq_stat(irq0_irqs); + /* Optimized out for !IO_APIC and x86_64 */ + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock(&i8259A_lock); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + spin_unlock(&i8259A_lock); + } + global_clock_event->event_handler(global_clock_event); #ifdef CONFIG_MCA -- cgit v1.1 From 0be6939422eb2f54df4b3d8763c569c6759c1a42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 15:59:35 +0200 Subject: x86: Remove mca bus ifdef from timer interrupt MCA_bus is constant 0 when CONFIG_MCA=n. So the compiler removes that code w/o needing an extra #ifdef Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_32.c | 18 +++--------------- arch/x86/kernel/time_64.c | 9 +++------ 2 files changed, 6 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 7a26bcf..ec729cd 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -78,21 +78,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) global_clock_event->event_handler(global_clock_event); -#ifdef CONFIG_MCA - if (MCA_bus) { - /* The PS/2 uses level-triggered interrupts. You can't - turn them off, nor would you want to (any attempt to - enable edge-triggered interrupts usually gets intercepted by a - special hardware circuit). Hence we have to acknowledge - the timer interrupt. Through some incredibly stupid - design idea, the reset for IRQ 0 is done by setting the - high bit of the PPI port B (0x61). Note that some PS/2s, - notably the 55SX, work fine if this is removed. */ - - u8 irq_v = inb_p(0x61); /* read the current state */ - outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */ - } -#endif + /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ + if (MCA_bus) + outb_p(inb_p(0x61)| 0x80, 0x61); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 35e0a92..7db3912 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -67,12 +67,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) global_clock_event->event_handler(global_clock_event); -#ifdef CONFIG_MCA - if (MCA_bus) { - u8 irq_v = inb_p(0x61); /* read the current state */ - outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ - } -#endif + /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ + if (MCA_bus) + outb_p(inb_p(0x61)| 0x80, 0x61); return IRQ_HANDLED; } -- cgit v1.1 From 454ede7eebf91b92ab1eafe10c6b6ed04de29bf8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 16:07:40 +0200 Subject: x86: Make timer setup and global variables the same in time_32/64.c The timer and timer irq setup code is identical in 32 and 64 bit. Make it the same formatting as well. Also add the global variables under the necessary ifdefs to both files. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_32.c | 8 +++++--- arch/x86/kernel/time_64.c | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ec729cd..186abc5 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -27,6 +27,10 @@ int timer_ack; #endif +#ifdef CONFIG_X86_64 +volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +#endif + unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -53,9 +57,7 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); /* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. + * Default timer interrupt handler for PIT/HPET */ static irqreturn_t timer_interrupt(int irq, void *dev_id) { diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 7db3912..78cbdf5 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -23,7 +23,13 @@ #include #include +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +int timer_ack; +#endif + +#ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +#endif unsigned long profile_pc(struct pt_regs *regs) { @@ -47,8 +53,12 @@ unsigned long profile_pc(struct pt_regs *regs) } EXPORT_SYMBOL(profile_pc); +/* + * Default timer interrupt handler for PIT/HPET + */ static irqreturn_t timer_interrupt(int irq, void *dev_id) { + /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); /* Optimized out for !IO_APIC and x86_64 */ @@ -74,8 +84,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -/* calibrate_cpu is used on systems with fixed rate TSCs to determine - * processor frequency */ +/* + * calibrate_cpu is used on systems with fixed rate TSCs to determine + * processor frequency + */ #define TICK_COUNT 100000000 unsigned long __init calibrate_cpu(void) { @@ -122,18 +134,24 @@ unsigned long __init calibrate_cpu(void) return pmc_now * tsc_khz / (tsc_now - tsc_start); } -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, - .name = "timer" +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, + .name = "timer" }; +void __init setup_default_timer_irq(void) +{ + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +/* Default timer init function */ void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - - setup_irq(0, &irq0); + setup_default_timer_irq(); } static void x86_late_time_init(void) @@ -141,6 +159,10 @@ static void x86_late_time_init(void) x86_init.timers.timer_init(); } +/* + * Initialize TSC and delay the periodic timer init to + * late x86_late_time_init() so ioremap works. + */ void __init time_init(void) { tsc_init(); -- cgit v1.1 From 08047c4f1740c7cee75d58e2919d48c09f951649 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 16:27:41 +0200 Subject: x86: Move calibrate_cpu to tsc.c Move the code where it's only user is. Also we need to look whether this hardwired hackery might interfere with perfcounters. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/time.h | 2 -- arch/x86/kernel/time_32.c | 1 - arch/x86/kernel/time_64.c | 51 ---------------------------------------- arch/x86/kernel/tsc.c | 57 +++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 55 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 91bb162..9c5608b 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -57,6 +57,4 @@ extern void time_init(void); #endif /* CONFIG_PARAVIRT */ -extern unsigned long __init calibrate_cpu(void); - #endif /* _ASM_X86_TIME_H */ diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 186abc5..fd876cc 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -21,7 +21,6 @@ #include #include #include -#include #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) int timer_ack; diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 78cbdf5..e59a40e 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -21,7 +21,6 @@ #include #include #include -#include #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) int timer_ack; @@ -84,56 +83,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -/* - * calibrate_cpu is used on systems with fixed rate TSCs to determine - * processor frequency - */ -#define TICK_COUNT 100000000 -unsigned long __init calibrate_cpu(void) -{ - int tsc_start, tsc_now; - int i, no_ctr_free; - unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; - unsigned long flags; - - for (i = 0; i < 4; i++) - if (avail_to_resrv_perfctr_nmi_bit(i)) - break; - no_ctr_free = (i == 4); - if (no_ctr_free) { - WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " - "cpu_khz value may be incorrect.\n"); - i = 3; - rdmsrl(MSR_K7_EVNTSEL3, evntsel3); - wrmsrl(MSR_K7_EVNTSEL3, 0); - rdmsrl(MSR_K7_PERFCTR3, pmc3); - } else { - reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); - reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } - local_irq_save(flags); - /* start measuring cycles, incrementing from 0 */ - wrmsrl(MSR_K7_PERFCTR0 + i, 0); - wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); - rdtscl(tsc_start); - do { - rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); - tsc_now = get_cycles(); - } while ((tsc_now - tsc_start) < TICK_COUNT); - - local_irq_restore(flags); - if (no_ctr_free) { - wrmsrl(MSR_K7_EVNTSEL3, 0); - wrmsrl(MSR_K7_PERFCTR3, pmc3); - wrmsrl(MSR_K7_EVNTSEL3, evntsel3); - } else { - release_perfctr_nmi(MSR_K7_PERFCTR0 + i); - release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } - - return pmc_now * tsc_khz / (tsc_now - tsc_start); -} - static struct irqaction irq0 = { .handler = timer_interrupt, .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 652bc21..97a0bcb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -17,6 +17,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -852,6 +853,60 @@ static void __init init_tsc_clocksource(void) clocksource_register(&clocksource_tsc); } +#ifdef CONFIG_X86_64 +/* + * calibrate_cpu is used on systems with fixed rate TSCs to determine + * processor frequency + */ +#define TICK_COUNT 100000000 +static unsigned long __init calibrate_cpu(void) +{ + int tsc_start, tsc_now; + int i, no_ctr_free; + unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; + unsigned long flags; + + for (i = 0; i < 4; i++) + if (avail_to_resrv_perfctr_nmi_bit(i)) + break; + no_ctr_free = (i == 4); + if (no_ctr_free) { + WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " + "cpu_khz value may be incorrect.\n"); + i = 3; + rdmsrl(MSR_K7_EVNTSEL3, evntsel3); + wrmsrl(MSR_K7_EVNTSEL3, 0); + rdmsrl(MSR_K7_PERFCTR3, pmc3); + } else { + reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); + reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } + local_irq_save(flags); + /* start measuring cycles, incrementing from 0 */ + wrmsrl(MSR_K7_PERFCTR0 + i, 0); + wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); + rdtscl(tsc_start); + do { + rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); + tsc_now = get_cycles(); + } while ((tsc_now - tsc_start) < TICK_COUNT); + + local_irq_restore(flags); + if (no_ctr_free) { + wrmsrl(MSR_K7_EVNTSEL3, 0); + wrmsrl(MSR_K7_PERFCTR3, pmc3); + wrmsrl(MSR_K7_EVNTSEL3, evntsel3); + } else { + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } + + return pmc_now * tsc_khz / (tsc_now - tsc_start); +} +#else +static inline unsigned long calibrate_cpu(void) { return cpu_khz; } +#endif + void __init tsc_init(void) { u64 lpj; @@ -870,11 +925,9 @@ void __init tsc_init(void) return; } -#ifdef CONFIG_X86_64 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) cpu_khz = calibrate_cpu(); -#endif printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, -- cgit v1.1 From ef4512882dbe9978e7a18ccbcb4cb45705ce5560 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Aug 2009 13:24:08 +0200 Subject: x86: time_32/64.c unify profile_pc The code is identical except for the formatting and a useless #ifdef. Make it the same. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time_32.c | 13 ++++++------- arch/x86/kernel/time_64.c | 8 +++++--- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index fd876cc..fda0c34 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -34,23 +34,22 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); -#ifdef CONFIG_SMP if (!user_mode_vm(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = (unsigned long *)®s->sp; - - /* Return address is either directly at stack pointer - or above a saved flags. Eflags has bits 22-31 zero, - kernel addresses don't. */ + unsigned long *sp = (unsigned long *)regs->sp; + /* + * Return address is either directly at stack pointer + * or above a saved flags. Eflags has bits 22-31 zero, + * kernel addresses don't. + */ if (sp[0] >> 22) return sp[0]; if (sp[1] >> 22) return sp[1]; #endif } -#endif return pc; } EXPORT_SYMBOL(profile_pc); diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e59a40e..fda0c34 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -34,14 +34,16 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - /* Assume the lock function has either no stack frame or a copy - of flags from PUSHF - Eflags always has bits 22 and up cleared unlike kernel addresses. */ if (!user_mode_vm(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else unsigned long *sp = (unsigned long *)regs->sp; + /* + * Return address is either directly at stack pointer + * or above a saved flags. Eflags has bits 22-31 zero, + * kernel addresses don't. + */ if (sp[0] >> 22) return sp[0]; if (sp[1] >> 22) -- cgit v1.1 From 47926214d8b2bef13b2be57c500194a804f16198 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 16:47:19 +0200 Subject: x86: Replace the now identical time_32/64.c by time.c Remove the redundant copy. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/time.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/time_32.c | 121 ---------------------------------------------- arch/x86/kernel/time_64.c | 121 ---------------------------------------------- 4 files changed, 122 insertions(+), 243 deletions(-) create mode 100644 arch/x86/kernel/time.c delete mode 100644 arch/x86/kernel/time_32.c delete mode 100644 arch/x86/kernel/time_64.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 313ed6f..ccf3db6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -31,7 +31,7 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o +obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += setup.o x86_init.o i8259.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c new file mode 100644 index 0000000..fda0c34 --- /dev/null +++ b/arch/x86/kernel/time.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1991,1992,1995 Linus Torvalds + * Copyright (c) 1994 Alan Modra + * Copyright (c) 1995 Markus Kuhn + * Copyright (c) 1996 Ingo Molnar + * Copyright (c) 1998 Andrea Arcangeli + * Copyright (c) 2002,2006 Vojtech Pavlik + * Copyright (c) 2003 Andi Kleen + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +int timer_ack; +#endif + +#ifdef CONFIG_X86_64 +volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +#endif + +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (!user_mode_vm(regs) && in_lock_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->bp + sizeof(long)); +#else + unsigned long *sp = (unsigned long *)regs->sp; + /* + * Return address is either directly at stack pointer + * or above a saved flags. Eflags has bits 22-31 zero, + * kernel addresses don't. + */ + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; +#endif + } + return pc; +} +EXPORT_SYMBOL(profile_pc); + +/* + * Default timer interrupt handler for PIT/HPET + */ +static irqreturn_t timer_interrupt(int irq, void *dev_id) +{ + /* Keep nmi watchdog up to date */ + inc_irq_stat(irq0_irqs); + + /* Optimized out for !IO_APIC and x86_64 */ + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock(&i8259A_lock); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + spin_unlock(&i8259A_lock); + } + + global_clock_event->event_handler(global_clock_event); + + /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ + if (MCA_bus) + outb_p(inb_p(0x61)| 0x80, 0x61); + + return IRQ_HANDLED; +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, + .name = "timer" +}; + +void __init setup_default_timer_irq(void) +{ + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +/* Default timer init function */ +void __init hpet_time_init(void) +{ + if (!hpet_enable()) + setup_pit_timer(); + setup_default_timer_irq(); +} + +static void x86_late_time_init(void) +{ + x86_init.timers.timer_init(); +} + +/* + * Initialize TSC and delay the periodic timer init to + * late x86_late_time_init() so ioremap works. + */ +void __init time_init(void) +{ + tsc_init(); + late_time_init = x86_late_time_init; +} diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c deleted file mode 100644 index fda0c34..0000000 --- a/arch/x86/kernel/time_32.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 1991,1992,1995 Linus Torvalds - * Copyright (c) 1994 Alan Modra - * Copyright (c) 1995 Markus Kuhn - * Copyright (c) 1996 Ingo Molnar - * Copyright (c) 1998 Andrea Arcangeli - * Copyright (c) 2002,2006 Vojtech Pavlik - * Copyright (c) 2003 Andi Kleen - * - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -int timer_ack; -#endif - -#ifdef CONFIG_X86_64 -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -#endif - -unsigned long profile_pc(struct pt_regs *regs) -{ - unsigned long pc = instruction_pointer(regs); - - if (!user_mode_vm(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = (unsigned long *)regs->sp; - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; -} -EXPORT_SYMBOL(profile_pc); - -/* - * Default timer interrupt handler for PIT/HPET - */ -static irqreturn_t timer_interrupt(int irq, void *dev_id) -{ - /* Keep nmi watchdog up to date */ - inc_irq_stat(irq0_irqs); - - /* Optimized out for !IO_APIC and x86_64 */ - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); - } - - global_clock_event->event_handler(global_clock_event); - - /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ - if (MCA_bus) - outb_p(inb_p(0x61)| 0x80, 0x61); - - return IRQ_HANDLED; -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .name = "timer" -}; - -void __init setup_default_timer_irq(void) -{ - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -/* Default timer init function */ -void __init hpet_time_init(void) -{ - if (!hpet_enable()) - setup_pit_timer(); - setup_default_timer_irq(); -} - -static void x86_late_time_init(void) -{ - x86_init.timers.timer_init(); -} - -/* - * Initialize TSC and delay the periodic timer init to - * late x86_late_time_init() so ioremap works. - */ -void __init time_init(void) -{ - tsc_init(); - late_time_init = x86_late_time_init; -} diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c deleted file mode 100644 index fda0c34..0000000 --- a/arch/x86/kernel/time_64.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 1991,1992,1995 Linus Torvalds - * Copyright (c) 1994 Alan Modra - * Copyright (c) 1995 Markus Kuhn - * Copyright (c) 1996 Ingo Molnar - * Copyright (c) 1998 Andrea Arcangeli - * Copyright (c) 2002,2006 Vojtech Pavlik - * Copyright (c) 2003 Andi Kleen - * - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -int timer_ack; -#endif - -#ifdef CONFIG_X86_64 -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -#endif - -unsigned long profile_pc(struct pt_regs *regs) -{ - unsigned long pc = instruction_pointer(regs); - - if (!user_mode_vm(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = (unsigned long *)regs->sp; - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; -} -EXPORT_SYMBOL(profile_pc); - -/* - * Default timer interrupt handler for PIT/HPET - */ -static irqreturn_t timer_interrupt(int irq, void *dev_id) -{ - /* Keep nmi watchdog up to date */ - inc_irq_stat(irq0_irqs); - - /* Optimized out for !IO_APIC and x86_64 */ - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); - } - - global_clock_event->event_handler(global_clock_event); - - /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ - if (MCA_bus) - outb_p(inb_p(0x61)| 0x80, 0x61); - - return IRQ_HANDLED; -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .name = "timer" -}; - -void __init setup_default_timer_irq(void) -{ - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -/* Default timer init function */ -void __init hpet_time_init(void) -{ - if (!hpet_enable()) - setup_pit_timer(); - setup_default_timer_irq(); -} - -static void x86_late_time_init(void) -{ - x86_init.timers.timer_init(); -} - -/* - * Initialize TSC and delay the periodic timer init to - * late x86_late_time_init() so ioremap works. - */ -void __init time_init(void) -{ - tsc_init(); - late_time_init = x86_late_time_init; -} -- cgit v1.1 From 2d826404f0bdcac2a4dd7e3c446b70d6a3b63b78 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 17:06:25 +0200 Subject: x86: Move tsc_calibration to x86_init_ops TSC calibration is modified by the vmware hypervisor and paravirt by separate means. Moorestown wants to add its own calibration routine as well. So make calibrate_tsc a proper x86_init_ops function and override it by paravirt or by the early setup of the vmware hypervisor. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hypervisor.h | 2 +- arch/x86/include/asm/paravirt.h | 1 - arch/x86/include/asm/timer.h | 5 ----- arch/x86/include/asm/tsc.h | 3 ++- arch/x86/include/asm/vmware.h | 2 +- arch/x86/include/asm/x86_init.h | 9 +++++++++ arch/x86/kernel/cpu/hypervisor.c | 14 +++++++------- arch/x86/kernel/cpu/vmware.c | 21 ++++++++++++--------- arch/x86/kernel/kvmclock.c | 2 +- arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/tsc.c | 13 ++++--------- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/kernel/x86_init.c | 5 +++++ arch/x86/lguest/boot.c | 2 +- arch/x86/xen/enlighten.c | 3 ++- 17 files changed, 48 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 369f5c5..b78c094 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,7 +20,7 @@ #ifndef ASM_X86__HYPERVISOR_H #define ASM_X86__HYPERVISOR_H -extern unsigned long get_hypervisor_tsc_freq(void); extern void init_hypervisor(struct cpuinfo_x86 *c); +extern void init_hypervisor_platform(void); #endif diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 11a4ba7..1e458a5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -210,7 +210,6 @@ static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) static inline unsigned long long paravirt_read_pmc(int counter) { diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 65228cc..5469630 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -8,7 +8,6 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -unsigned long native_calibrate_tsc(void); extern int recalibrate_cpu_khz(void); #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) @@ -19,10 +18,6 @@ extern int timer_ack; extern int no_timer_check; -#ifndef CONFIG_PARAVIRT -#define calibrate_tsc() native_calibrate_tsc() -#endif - /* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163..c042729 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -48,7 +48,8 @@ static __always_inline cycles_t vget_cycles(void) extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -int check_tsc_unstable(void); +extern int check_tsc_unstable(void); +extern unsigned long native_calibrate_tsc(void); /* * Boot-time check whether the TSCs are synchronized across diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index c11b7e1..e49ed6d 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -20,7 +20,7 @@ #ifndef ASM_X86__VMWARE_H #define ASM_X86__VMWARE_H -extern unsigned long vmware_get_tsc_khz(void); +extern void vmware_platform_setup(void); extern int vmware_platform(void); extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f8bdd22..20df518 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -112,8 +112,17 @@ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); }; +/** + * struct x86_platform_ops - platform specific runtime functions + * @calibrate_tsc: calibrate TSC + */ +struct x86_platform_ops { + unsigned long (*calibrate_tsc)(void); +}; + extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; +extern struct x86_platform_ops x86_platform; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 93ba8ee..08be922 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -34,13 +34,6 @@ detect_hypervisor_vendor(struct cpuinfo_x86 *c) c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; } -unsigned long get_hypervisor_tsc_freq(void) -{ - if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) - return vmware_get_tsc_khz(); - return 0; -} - static inline void __cpuinit hypervisor_set_feature_bits(struct cpuinfo_x86 *c) { @@ -55,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) detect_hypervisor_vendor(c); hypervisor_set_feature_bits(c); } + +void __init init_hypervisor_platform(void) +{ + init_hypervisor(&boot_cpu_data); + if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) + vmware_platform_setup(); +} diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index bc24f51..0a46b4d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -24,6 +24,7 @@ #include #include #include +#include #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 @@ -47,21 +48,29 @@ static inline int __vmware_platform(void) return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; } -static unsigned long __vmware_get_tsc_khz(void) +static unsigned long vmware_get_tsc_khz(void) { uint64_t tsc_hz; uint32_t eax, ebx, ecx, edx; VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); - if (ebx == UINT_MAX) - return 0; tsc_hz = eax | (((uint64_t)ebx) << 32); do_div(tsc_hz, 1000); BUG_ON(tsc_hz >> 32); return tsc_hz; } +void __init vmware_platform_setup(void) +{ + uint32_t eax, ebx, ecx, edx; + + VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); + + if (ebx != UINT_MAX) + x86_platform.calibrate_tsc = vmware_get_tsc_khz; +} + /* * While checking the dmi string infomation, just checking the product * serial key should be enough, as this will always have a VMware @@ -87,12 +96,6 @@ int vmware_platform(void) return 0; } -unsigned long vmware_get_tsc_khz(void) -{ - BUG_ON(!vmware_platform()); - return __vmware_get_tsc_khz(); -} - /* * VMware hypervisor takes care of exporting a reliable TSC to the guest. * Still, due to timing difference when running on virtual cpus, the TSC can diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 64e9b5f..75a21b6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -187,7 +187,7 @@ void __init kvmclock_init(void) pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.sched_clock = kvm_clock_read; - pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; + x86_platform.calibrate_tsc = kvm_get_tsc_khz; #ifdef CONFIG_X86_LOCAL_APIC x86_cpuinit.setup_percpu_clockev = kvm_setup_secondary_clock; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9c0e644..7cbf898 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -309,7 +309,6 @@ struct pv_time_ops pv_time_ops = { .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, .sched_clock = native_sched_clock, - .get_tsc_khz = native_calibrate_tsc, }; struct pv_irq_ops pv_irq_ops = { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bb207a4..2d93026 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -818,7 +818,7 @@ void __init setup_arch(char **cmdline_p) * VMware detection requires dmi to be available, so this * needs to be done after dmi_scan_machine, for the BP. */ - init_hypervisor(&boot_cpu_data); + init_hypervisor_platform(); x86_init.resources.probe_roms(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 97a0bcb..9917632 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -18,6 +18,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -401,15 +402,9 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; + unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; - hv_tsc_khz = get_hypervisor_tsc_freq(); - if (hv_tsc_khz) { - printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); - return hv_tsc_khz; - } - local_irq_save(flags); fast_calibrate = quick_pit_calibrate(); local_irq_restore(flags); @@ -567,7 +562,7 @@ int recalibrate_cpu_khz(void) unsigned long cpu_khz_old = cpu_khz; if (cpu_has_tsc) { - tsc_khz = calibrate_tsc(); + tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, @@ -917,7 +912,7 @@ void __init tsc_init(void) if (!cpu_has_tsc) return; - tsc_khz = calibrate_tsc(); + tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; if (!tsc_khz) { diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index cd7d0fb..052ae81 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -825,7 +825,7 @@ static inline int __init activate_vmi(void) x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; #endif pv_time_ops.sched_clock = vmi_sched_clock; - pv_time_ops.get_tsc_khz = vmi_tsc_khz; + x86_platform.calibrate_tsc = vmi_tsc_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 2b3eb82..611b9e2 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -68,7 +68,7 @@ unsigned long long vmi_sched_clock(void) return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } -/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ +/* x86_platform.calibrate_tsc = vmi_tsc_khz */ unsigned long vmi_tsc_khz(void) { unsigned long long khz; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4790b92..13081b9 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -13,6 +13,7 @@ #include #include #include +#include void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } @@ -67,3 +68,7 @@ struct __initdata x86_init_ops x86_init = { __cpuinitdata struct x86_cpuinit_ops x86_cpuinit = { .setup_percpu_clockev = setup_secondary_APIC_clock, }; + +struct x86_platform_ops x86_platform = { + .calibrate_tsc = native_calibrate_tsc, +}; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 6caa8c0..fabe745 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1320,11 +1320,11 @@ __init void lguest_init(void) /* Time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; - pv_time_ops.get_tsc_khz = lguest_tsc_khz; x86_init.resources.memory_setup = lguest_memory_setup; x86_init.irqs.intr_init = lguest_init_IRQ; x86_init.timers.timer_init = lguest_time_init; + x86_platform.calibrate_tsc = lguest_tsc_khz; /* * Now is a good time to look at the implementations of these functions diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 84826b8..ee8cac7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -844,7 +844,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { static const struct pv_time_ops xen_time_ops __initdata = { .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, - .get_tsc_khz = xen_tsc_khz, .sched_clock = xen_sched_clock, }; @@ -980,6 +979,8 @@ asmlinkage void __init xen_start_kernel(void) x86_init.timers.setup_percpu_clockev = x86_init_noop; x86_cpuinit.setup_percpu_clockev = x86_init_noop; + x86_platform.calibrate_tsc = xen_tsc_khz; + #ifdef CONFIG_X86_64 /* * Setup percpu state. We only need to do this for 64-bit -- cgit v1.1 From dd0a70c8f921708eba29ef9f30dde1f14a74af05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Aug 2009 16:51:07 +0200 Subject: x86: Move tsc_init to late_time_init We do not need the TSC before late_time_init. Move the tsc_init to the late time init code so we can also utilize HPET for calibration (which we claimed to do but never did except in some older kernel version). This also helps Moorestown to calibrate the TSC with the AHBT timer which needs to be initialized in late_time_init like HPET. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fda0c34..fcece00 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -108,6 +108,7 @@ void __init hpet_time_init(void) static void x86_late_time_init(void) { x86_init.timers.timer_init(); + tsc_init(); } /* @@ -116,6 +117,5 @@ static void x86_late_time_init(void) */ void __init time_init(void) { - tsc_init(); late_time_init = x86_late_time_init; } -- cgit v1.1 From 47a3d5da70f411bc044ecd3c0593b158b09d0efa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 15:03:59 +0200 Subject: x86: Add early platform detection Platforms like Moorestown require early setup and want to avoid the call to reserve_ebda_region. The x86_init override is too late when the MRST detection happens in setup_arch. Move the default i386 x86_init overrides and the call to reserve_ebda_region into a separate function which is called as the default of a switch case depending on the hardware_subarch id in boot params. This allows us to add a case for MRST and let MRST have its own early setup function. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 3 +-- arch/x86/kernel/head32.c | 22 +++++++++++++++++----- arch/x86/kernel/head64.c | 3 ++- arch/x86/kernel/x86_init.c | 1 - 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 20df518..b6c8942 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -2,6 +2,7 @@ #define _ASM_X86_PLATFORM_H #include +#include struct mpc_bus; struct mpc_cpu; @@ -34,14 +35,12 @@ struct x86_init_mpparse { * @probe_roms: probe BIOS roms * @reserve_resources: reserve the standard resources for the * platform - * @reserve_ebda_region: reserve the extended bios data area * @memory_setup: platform specific memory setup * */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); - void (*reserve_ebda_region)(void); char *(*memory_setup)(void); }; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index a21398f..441c075 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -15,6 +15,17 @@ #include #include #include +#include + +static void __init i386_default_early_setup(void) +{ + /* Initilize 32bit specific setup functions */ + x86_init.resources.probe_roms = probe_roms; + x86_init.resources.reserve_resources = i386_reserve_resources; + x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; + + reserve_ebda_region(); +} void __init i386_start_kernel(void) { @@ -31,12 +42,13 @@ void __init i386_start_kernel(void) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif - /* Initilize 32bit specific setup functions */ - x86_init.resources.probe_roms = probe_roms; - x86_init.resources.reserve_resources = i386_reserve_resources; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; - x86_init.resources.reserve_ebda_region(); + /* Call the subarch specific early setup function */ + switch (boot_params.hdr.hardware_subarch) { + default: + i386_default_early_setup(); + break; + } /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index cead814..0b06cd7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -24,6 +24,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { @@ -111,7 +112,7 @@ void __init x86_64_start_reservations(char *real_mode_data) } #endif - x86_init.resources.reserve_ebda_region(); + reserve_ebda_region(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 13081b9..24be7f3 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -28,7 +28,6 @@ struct __initdata x86_init_ops x86_init = { .resources = { .probe_roms = x86_init_noop, .reserve_resources = reserve_standard_io_resources, - .reserve_ebda_region = reserve_ebda_region, .memory_setup = default_machine_specific_memory_setup, }, -- cgit v1.1 From 162bc7ab01a00eba1c5d614e64a51e1268ee3f96 Mon Sep 17 00:00:00 2001 From: "Pan, Jacob jun" Date: Fri, 28 Aug 2009 14:52:47 -0700 Subject: x86: Add hardware_subarch ID for Moorestown x86 bootprotocol 2.07 has introduced hardware_subarch ID in the boot parameters provided by FW. We use it to identify Moorestown platforms. [ tglx: Cleanup and paravirt fix ] Signed-off-by: Jacob Pan Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/bootparam.h | 10 ++++++++++ arch/x86/kernel/head_32.S | 1 + 2 files changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 1724e8d..283a9a1 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -109,4 +109,14 @@ struct boot_params { __u8 _pad9[276]; /* 0xeec */ } __attribute__((packed)); +enum { + X86_SUBARCH_PC = 0, + X86_SUBARCH_LGUEST, + X86_SUBARCH_XEN, + X86_SUBARCH_MRST, + X86_NR_SUBARCHS, +}; + + + #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index cc827ac..304e3f3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -157,6 +157,7 @@ subarch_entries: .long default_entry /* normal x86/PC */ .long lguest_entry /* lguest hypervisor */ .long xen_entry /* Xen hypervisor */ + .long default_entry /* Moorestown MID */ num_subarch_entries = (. - subarch_entries) / 4 .previous #endif /* CONFIG_PARAVIRT */ -- cgit v1.1 From 3f4110a48a749a1aa1c54fb807afb3f32f49711c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 14:54:20 +0200 Subject: x86: Add Moorestown early detection Moorestown MID devices need to be detected early in the boot process to setup and do not call x86_default_early_setup as there is no EBDA region to reserve. [ Copied the minimal code from Jacobs latest MRST series ] Signed-off-by: Thomas Gleixner Cc: Jacob Pan --- arch/x86/Kconfig | 13 +++++++++++++ arch/x86/include/asm/setup.h | 6 ++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/head32.c | 3 +++ arch/x86/kernel/mrst.c | 24 ++++++++++++++++++++++++ 5 files changed, 47 insertions(+) create mode 100644 arch/x86/kernel/mrst.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5d..586d845 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -318,6 +318,7 @@ config X86_EXTENDED_PLATFORM SGI 320/540 (Visual Workstation) Summit/EXA (IBM x440) Unisys ES7000 IA32 series + Moorestown MID devices If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. @@ -377,6 +378,18 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. +config X86_MRST + bool "Moorestown MID platform" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + ---help--- + Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin + Internet Device(MID) platform. Moorestown consists of two chips: + Lincroft (CPU core, graphics, and memory controller) and Langwell IOH. + Unlike standard x86 PCs, Moorestown does not have many legacy devices + nor standard legacy replacement devices/features. e.g. Moorestown does + not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. + config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 861e1fe..18e496c 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -49,6 +49,12 @@ extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); extern void setup_default_timer_irq(void); +#ifdef CONFIG_X86_MRST +extern void x86_mrst_early_setup(void); +#else +static inline void x86_mrst_early_setup(void) { } +#endif + #ifndef _SETUP /* diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ccf3db6..5f33316 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o obj-$(CONFIG_OLPC) += olpc.o +obj-$(CONFIG_X86_MRST) += mrst.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 441c075..4f8e250 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -45,6 +45,9 @@ void __init i386_start_kernel(void) /* Call the subarch specific early setup function */ switch (boot_params.hdr.hardware_subarch) { + case X86_SUBARCH_MRST: + x86_mrst_early_setup(); + break; default: i386_default_early_setup(); break; diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c new file mode 100644 index 0000000..3b7078a --- /dev/null +++ b/arch/x86/kernel/mrst.c @@ -0,0 +1,24 @@ +/* + * mrst.c: Intel Moorestown platform specific setup code + * + * (C) Copyright 2008 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include + +#include + +/* + * Moorestown specific x86_init function overrides and early setup + * calls. + */ +void __init x86_mrst_early_setup(void) +{ + x86_init.resources.probe_roms = x86_init_noop; + x86_init.resources.reserve_resources = x86_init_noop; +} -- cgit v1.1 From bc07844a33734c4b2f32ef26d942d2f3ef9302ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 18:09:57 +0200 Subject: x86: Distangle ioapic and i8259 The proposed Moorestown support patches use an extra feature flag mechanism to make the ioapic work w/o an i8259. There is a much simpler solution. Most i8259 specific functions are already called dependend on the irq number less than NR_IRQS_LEGACY. Replacing that constant by a read_mostly variable which can be set to 0 by the platform setup code allows us to achieve the same without any special feature flags. That trivial change allows us to proceed with MRST w/o doing a full blown overhaul of the ioapic code which would delay MRST unduly. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 2 ++ arch/x86/kernel/apic/io_apic.c | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 2b8aeb8..e1f89a1 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -143,6 +143,8 @@ extern int noioapicreroute; /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; +extern void io_apic_disable_legacy(void); + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5f46871..6c96129 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -91,6 +91,11 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +/* Number of legacy interrupts */ +static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; +/* GSI interrupts */ +static int nr_irqs_gsi = NR_IRQS_LEGACY; + #if defined (CONFIG_MCA) || defined (CONFIG_EISA) int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif @@ -172,6 +177,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = { [15] = { .vector = IRQ15_VECTOR, }, }; +void __init io_apic_disable_legacy(void) +{ + nr_legacy_irqs = 0; + nr_irqs_gsi = 0; +} + int __init arch_early_irq_init(void) { struct irq_cfg *cfg; @@ -189,7 +200,7 @@ int __init arch_early_irq_init(void) desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < NR_IRQS_LEGACY) + if (i < nr_legacy_irqs) cpumask_setall(cfg[i].domain); } @@ -883,7 +894,7 @@ static int __init find_isa_irq_apic(int irq, int type) */ static int EISA_ELCR(unsigned int irq) { - if (irq < NR_IRQS_LEGACY) { + if (irq < nr_legacy_irqs) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1480,7 +1491,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq } ioapic_register_intr(irq, desc, trigger); - if (irq < NR_IRQS_LEGACY) + if (irq < nr_legacy_irqs) disable_8259A_irq(irq); ioapic_write_entry(apic_id, pin, entry); @@ -1851,7 +1862,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET) + if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1914,6 +1925,10 @@ void __init enable_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } + + if (!nr_legacy_irqs) + return; + for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ @@ -1968,6 +1983,9 @@ void disable_IO_APIC(void) */ clear_IO_APIC(); + if (!nr_legacy_irqs) + return; + /* * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode @@ -2198,7 +2216,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < NR_IRQS_LEGACY) { + if (irq < nr_legacy_irqs) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; @@ -2709,7 +2727,7 @@ static inline void init_IO_APIC_traps(void) * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < NR_IRQS_LEGACY) + if (irq < nr_legacy_irqs) make_8259A_irq(irq); else /* Strange. Oh, well.. */ @@ -3045,7 +3063,7 @@ out: * the I/O APIC in all cases now. No actual device should request * it anyway. --macro */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) +#define PIC_IRQS (1UL << PIC_CASCADE_IR) void __init setup_IO_APIC(void) { @@ -3053,8 +3071,7 @@ void __init setup_IO_APIC(void) /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ - - io_apic_irqs = ~PIC_IRQS; + io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); /* @@ -3065,7 +3082,8 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - check_timer(); + if (nr_legacy_irqs) + check_timer(); } /* @@ -3166,7 +3184,6 @@ static int __init ioapic_init_sysfs(void) device_initcall(ioapic_init_sysfs); -static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ @@ -3907,7 +3924,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= NR_IRQS_LEGACY) { + if (irq >= nr_legacy_irqs) { cfg = desc->chip_data; add_pin_to_irq_node(cfg, node, ioapic, pin); } -- cgit v1.1 From e11dadabf443dc3101f28b74d8b9d56870a87db4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 31 Aug 2009 15:18:40 +0200 Subject: x86: apic namespace cleanup boot_cpu_physical_apicid is a global variable and used as function argument as well. Rename the function arguments to avoid confusion. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 14 +++++++------- arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/numaq_32.c | 2 +- arch/x86/kernel/apic/summit_32.c | 2 +- arch/x86/kernel/setup.c | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6f15b29..d6a0f26 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -292,7 +292,7 @@ struct apic { int (*cpu_present_to_apicid)(int mps_cpu); physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); int (*phys_pkg_id)(int cpuid_apic, int index_msb); @@ -426,7 +426,7 @@ extern struct apic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +extern int default_check_phys_apicid_present(int phys_apicid); #endif static inline void default_wait_for_init_deassert(atomic_t *deassert) @@ -542,9 +542,9 @@ static inline int __default_cpu_present_to_apicid(int mps_cpu) } static inline int -__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +__default_check_phys_apicid_present(int phys_apicid) { - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); + return physid_isset(phys_apicid, phys_cpu_present_map); } #ifdef CONFIG_X86_32 @@ -554,13 +554,13 @@ static inline int default_cpu_present_to_apicid(int mps_cpu) } static inline int -default_check_phys_apicid_present(int boot_cpu_physical_apicid) +default_check_phys_apicid_present(int phys_apicid) { - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); + return __default_check_phys_apicid_present(phys_apicid); } #else extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +extern int default_check_phys_apicid_present(int phys_apicid); #endif static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 676cdac..77a0641 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -112,7 +112,7 @@ static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xFFL); } -static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) +static int bigsmp_check_phys_apicid_present(int phys_apicid) { return 1; } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f1ebed6b..efa00e2 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -413,7 +413,7 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline int numaq_check_phys_apicid_present(int phys_apicid) { return 1; } diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index eafdfbd1..645ecc4 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -272,7 +272,7 @@ static physid_mask_t summit_apicid_to_cpu_present(int apicid) return physid_mask_of_physid(0); } -static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +static int summit_check_phys_apicid_present(int physical_apicid) { return 1; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2d93026..fda22ec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -129,9 +129,9 @@ int default_cpu_present_to_apicid(int mps_cpu) return __default_cpu_present_to_apicid(mps_cpu); } -int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +int default_check_phys_apicid_present(int phys_apicid) { - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); + return __default_check_phys_apicid_present(phys_apicid); } #endif -- cgit v1.1 From 132ec92f3f70fe365c1f4b8d46e66cf8a2a16880 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 31 Aug 2009 09:50:09 +0200 Subject: x86, msr: Add rd/wrmsr interfaces with preset registers native_{rdmsr,wrmsr}_safe_regs are two new interfaces which allow presetting of a subset of eight x86 GPRs before executing the rd/wrmsr instructions. This is needed at least on AMD K8 for accessing an erratum workaround MSR. Originally based on an idea by H. Peter Anvin. Signed-off-by: Borislav Petkov LKML-Reference: <1251705011-18636-1-git-send-email-petkovbb@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 13 ++++++ arch/x86/include/asm/paravirt.h | 16 +++++++ arch/x86/kernel/paravirt.c | 2 + arch/x86/lib/Makefile | 1 + arch/x86/lib/msr-reg.S | 98 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 arch/x86/lib/msr-reg.S (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 48ad9d2..184d4a1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -113,6 +113,9 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern unsigned long long native_read_tsc(void); +extern int native_rdmsr_safe_regs(u32 *regs); +extern int native_wrmsr_safe_regs(u32 *regs); + static __always_inline unsigned long long __native_read_tsc(void) { DECLARE_ARGS(val, low, high); @@ -189,6 +192,16 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) return err; } +static inline int rdmsr_safe_regs(u32 *regs) +{ + return native_rdmsr_safe_regs(regs); +} + +static inline int wrmsr_safe_regs(u32 *regs) +{ + return native_wrmsr_safe_regs(regs); +} + #define rdtscl(low) \ ((low) = (u32)__native_read_tsc()) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4fb37c8..1705944 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -168,7 +168,9 @@ struct pv_cpu_ops { err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr_amd)(unsigned int msr, int *err); u64 (*read_msr)(unsigned int msr, int *err); + int (*rdmsr_regs)(u32 *regs); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); + int (*wrmsr_regs)(u32 *regs); u64 (*read_tsc)(void); u64 (*read_pmc)(int counter); @@ -820,6 +822,12 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) { return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); } + +static inline int paravirt_rdmsr_regs(u32 *regs) +{ + return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); +} + static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) { return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); @@ -829,6 +837,11 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); } +static inline int paravirt_wrmsr_regs(u32 *regs) +{ + return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); +} + /* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr, val1, val2) \ do { \ @@ -862,6 +875,9 @@ do { \ _err; \ }) +#define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) +#define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) + static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b9..67594af 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -362,8 +362,10 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .rdmsr_regs = native_rdmsr_safe_regs, .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, + .wrmsr_regs = native_wrmsr_safe_regs, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, .read_tscp = native_read_tscp, diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 07c3189..b59c064 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -8,6 +8,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += msr-reg.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S new file mode 100644 index 0000000..51f1bb3 --- /dev/null +++ b/arch/x86/lib/msr-reg.S @@ -0,0 +1,98 @@ +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +/* + * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); + * + * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] + * + */ +.macro op_safe_regs op:req +ENTRY(native_\op\()_safe_regs) + push %rbx + push %rbp + push $0 /* Return value */ + push %rdi + movl (%rdi), %eax + movl 4(%rdi), %ecx + movl 8(%rdi), %edx + movl 12(%rdi), %ebx + movl 20(%rdi), %ebp + movl 24(%rdi), %esi + movl 28(%rdi), %edi +1: \op +2: movl %edi, %r10d + pop %rdi + movl %eax, (%rdi) + movl %ecx, 4(%rdi) + movl %edx, 8(%rdi) + movl %ebx, 12(%rdi) + movl %ebp, 20(%rdi) + movl %esi, 24(%rdi) + movl %r10d, 28(%rdi) + pop %rax + pop %rbp + pop %rbx + ret +3: + movq $-EIO, 8(%rsp) + jmp 2b + .section __ex_table,"ax" + .balign 4 + .quad 1b, 3b + .previous +ENDPROC(native_\op\()_safe_regs) +.endm + +#else /* X86_32 */ + +.macro op_safe_regs op:req +ENTRY(native_\op\()_safe_regs) + push %ebx + push %ebp + push %esi + push %edi + push $0 /* Return value */ + push %eax + movl 4(%eax), %ecx + movl 8(%eax), %edx + movl 12(%eax), %ebx + movl 20(%eax), %ebp + movl 24(%eax), %esi + movl 28(%eax), %edi + movl (%eax), %eax +1: \op +2: push %eax + movl 4(%esp), %eax + pop (%eax) + addl $4, %esp + movl %ecx, 4(%eax) + movl %edx, 8(%eax) + movl %ebx, 12(%eax) + movl %ebp, 20(%eax) + movl %esi, 24(%eax) + movl %edi, 28(%eax) + pop %eax + pop %edi + pop %esi + pop %ebp + pop %ebx + ret +3: + movl $-EIO, 4(%esp) + jmp 2b + .section __ex_table,"ax" + .balign 4 + .long 1b, 3b + .previous +ENDPROC(native_\op\()_safe_regs) +.endm + +#endif + +op_safe_regs rdmsr +op_safe_regs wrmsr + -- cgit v1.1 From 177fed1ee8d727c39601ce9fc2299b4cb25a718e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 31 Aug 2009 09:50:10 +0200 Subject: x86, msr: Rewrite AMD rd/wrmsr variants Switch them to native_{rd,wr}msr_safe_regs and remove pv_cpu_ops.read_msr_amd. Signed-off-by: Borislav Petkov LKML-Reference: <1251705011-18636-2-git-send-email-petkovbb@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 38 +++++++++++++++++++++----------------- arch/x86/include/asm/paravirt.h | 26 ++++++++++++++++++++------ arch/x86/kernel/paravirt.c | 1 - 3 files changed, 41 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 184d4a1..09c5ca7 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -71,22 +71,6 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } -static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, - int *err) -{ - DECLARE_ARGS(val, low, high); - - asm volatile("2: rdmsr ; xor %0,%0\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %3,%0 ; jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) - : "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); - return EAX_EDX_VAL(val, low, high); -} - static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { @@ -184,14 +168,34 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = native_read_msr_safe(msr, &err); return err; } + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { + u32 gprs[8] = { 0 }; int err; - *p = native_read_msr_amd_safe(msr, &err); + gprs[1] = msr; + gprs[7] = 0x9c5a203a; + + err = native_rdmsr_safe_regs(gprs); + + *p = gprs[0] | ((u64)gprs[2] << 32); + return err; } +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ + u32 gprs[8] = { 0 }; + + gprs[0] = (u32)val; + gprs[1] = msr; + gprs[2] = val >> 32; + gprs[7] = 0x9c5a203a; + + return native_wrmsr_safe_regs(gprs); +} + static inline int rdmsr_safe_regs(u32 *regs) { return native_rdmsr_safe_regs(regs); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 1705944..1157493 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -166,7 +166,6 @@ struct pv_cpu_ops { /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (*read_msr_amd)(unsigned int msr, int *err); u64 (*read_msr)(unsigned int msr, int *err); int (*rdmsr_regs)(u32 *regs); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); @@ -828,10 +827,6 @@ static inline int paravirt_rdmsr_regs(u32 *regs) return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); } -static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) -{ - return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); -} static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); @@ -887,12 +882,31 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) } static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { + u32 gprs[8] = { 0 }; int err; - *p = paravirt_read_msr_amd(msr, &err); + gprs[1] = msr; + gprs[7] = 0x9c5a203a; + + err = paravirt_rdmsr_regs(gprs); + + *p = gprs[0] | ((u64)gprs[2] << 32); + return err; } +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ + u32 gprs[8] = { 0 }; + + gprs[0] = (u32)val; + gprs[1] = msr; + gprs[2] = val >> 32; + gprs[7] = 0x9c5a203a; + + return paravirt_wrmsr_regs(gprs); +} + static inline u64 paravirt_read_tsc(void) { return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 67594af..f5b0b4a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -363,7 +363,6 @@ struct pv_cpu_ops pv_cpu_ops = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .rdmsr_regs = native_rdmsr_safe_regs, - .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .wrmsr_regs = native_wrmsr_safe_regs, .read_tsc = native_read_tsc, -- cgit v1.1 From 6b0f43ddfa358dc71ad2a2d57bce5906c1c5dc1a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 31 Aug 2009 09:50:11 +0200 Subject: x86, AMD: Disable wrongly set X86_FEATURE_LAHF_LM CPUID bit fbd8b1819e80ac5a176d085fdddc3a34d1499318 turns off the bit for /proc/cpuinfo. However, a proper/full fix would be to additionally turn off the bit in the CPUID output so that future callers get correct CPU features info. Do that by basically reversing what the BIOS wrongfully does at boot. Signed-off-by: Borislav Petkov LKML-Reference: <1251705011-18636-3-git-send-email-petkovbb@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 63fddcd0..0a717fc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -404,9 +404,18 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* * Some BIOSes incorrectly force this feature, but only K8 * revision D (model = 0x14) and later actually support it. + * (AMD Erratum #110, docId: 25759). */ - if (c->x86_model < 0x14) + if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { + u64 val; + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); + if (!rdmsrl_amd_safe(0xc001100d, &val)) { + val &= ~(1ULL << 32); + wrmsrl_amd_safe(0xc001100d, val); + } + } + } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); -- cgit v1.1 From fe9b4e4e40ffdabbd385cdf171cb861c2fd517c0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 11:53:23 -0700 Subject: x86, asm: Add 32-bit versions of the combined CFI macros Add 32-bit versions of the combined CFI macros, equivalent to the 64-bit ones except, obviously, operating on 32-bit stack words. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/dwarf2.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 3afc5e8..ae6253a 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -87,9 +87,25 @@ CFI_RESTORE \reg .endm #else /*!CONFIG_X86_64*/ + .macro pushl_cfi reg + pushl \reg + CFI_ADJUST_CFA_OFFSET 4 + .endm - /* 32bit defenitions are missed yet */ + .macro popl_cfi reg + popl \reg + CFI_ADJUST_CFA_OFFSET -4 + .endm + .macro movl_cfi reg offset=0 + movl %\reg, \offset(%esp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movl_cfi_restore offset reg + movl \offset(%esp), %\reg + CFI_RESTORE \reg + .endm #endif /*!CONFIG_X86_64*/ #endif /*__ASSEMBLY__*/ -- cgit v1.1 From 709972b1f6f70535d1fddbe1243a51b90c408a1c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 11:57:20 -0700 Subject: x86, asm: Make _ASM_EXTABLE() usable from assembly code We have had this convenient macro _ASM_EXTABLE() to generate exception table entry in inline assembly. Make it also usable for pure assembly. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/asm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 56be78f..b3ed1e1 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -3,7 +3,7 @@ #ifdef __ASSEMBLY__ # define __ASM_FORM(x) x -# define __ASM_EX_SEC .section __ex_table +# define __ASM_EX_SEC .section __ex_table, "a" #else # define __ASM_FORM(x) " " #x " " # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" @@ -38,10 +38,18 @@ #define _ASM_DI __ASM_REG(di) /* Exception table entry */ +#ifdef __ASSEMBLY__ +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC ; \ + _ASM_ALIGN ; \ + _ASM_PTR from , to ; \ + .previous +#else # define _ASM_EXTABLE(from,to) \ __ASM_EX_SEC \ _ASM_ALIGN "\n" \ _ASM_PTR #from "," #to "\n" \ " .previous\n" +#endif #endif /* _ASM_X86_ASM_H */ -- cgit v1.1 From 79c5dca3619d6ae15815eec14cd7a43db5f38b47 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 13:59:53 -0700 Subject: x86, msr: CFI annotations, cleanups for msr-reg.S Add CFI annotations for native_{rd,wr}msr_safe_regs(). Simplify the 64-bit implementation: we don't allow the upper half registers to be set, and so we can use them to carry state across the operation. Signed-off-by: H. Peter Anvin Cc: Borislav Petkov LKML-Reference: <1251705011-18636-1-git-send-email-petkovbb@gmail.com> --- arch/x86/lib/msr-reg.S | 80 ++++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 51f1bb3..9e8cdcf 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -12,10 +13,11 @@ */ .macro op_safe_regs op:req ENTRY(native_\op\()_safe_regs) - push %rbx - push %rbp - push $0 /* Return value */ - push %rdi + CFI_STARTPROC + pushq_cfi %rbx + pushq_cfi %rbp + movq %rdi, %r10 /* Save pointer */ + xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax movl 4(%rdi), %ecx movl 8(%rdi), %edx @@ -23,27 +25,26 @@ ENTRY(native_\op\()_safe_regs) movl 20(%rdi), %ebp movl 24(%rdi), %esi movl 28(%rdi), %edi + CFI_REMEMBER_STATE 1: \op -2: movl %edi, %r10d - pop %rdi - movl %eax, (%rdi) - movl %ecx, 4(%rdi) - movl %edx, 8(%rdi) - movl %ebx, 12(%rdi) - movl %ebp, 20(%rdi) - movl %esi, 24(%rdi) - movl %r10d, 28(%rdi) - pop %rax - pop %rbp - pop %rbx +2: movl %eax, (%r10) + movl %r11d, %eax /* Return value */ + movl %ecx, 4(%r10) + movl %edx, 8(%r10) + movl %ebx, 12(%r10) + movl %ebp, 20(%r10) + movl %esi, 24(%r10) + movl %edi, 28(%r10) + popq_cfi %rbp + popq_cfi %rbx ret 3: - movq $-EIO, 8(%rsp) + CFI_RESTORE_STATE + movl $-EIO, %r11d jmp 2b - .section __ex_table,"ax" - .balign 4 - .quad 1b, 3b - .previous + + _ASM_EXTABLE(1b, 3b) + CFI_ENDPROC ENDPROC(native_\op\()_safe_regs) .endm @@ -51,12 +52,13 @@ ENDPROC(native_\op\()_safe_regs) .macro op_safe_regs op:req ENTRY(native_\op\()_safe_regs) - push %ebx - push %ebp - push %esi - push %edi - push $0 /* Return value */ - push %eax + CFI_STARTPROC + pushl_cfi %ebx + pushl_cfi %ebp + pushl_cfi %esi + pushl_cfi %edi + pushl_cfi $0 /* Return value */ + pushl_cfi %eax movl 4(%eax), %ecx movl 8(%eax), %edx movl 12(%eax), %ebx @@ -64,30 +66,32 @@ ENTRY(native_\op\()_safe_regs) movl 24(%eax), %esi movl 28(%eax), %edi movl (%eax), %eax + CFI_REMEMBER_STATE 1: \op -2: push %eax +2: pushl_cfi %eax movl 4(%esp), %eax - pop (%eax) + popl_cfi (%eax) addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 movl %ecx, 4(%eax) movl %edx, 8(%eax) movl %ebx, 12(%eax) movl %ebp, 20(%eax) movl %esi, 24(%eax) movl %edi, 28(%eax) - pop %eax - pop %edi - pop %esi - pop %ebp - pop %ebx + popl_cfi %eax + popl_cfi %edi + popl_cfi %esi + popl_cfi %ebp + popl_cfi %ebx ret 3: + CFI_RESTORE_STATE movl $-EIO, 4(%esp) jmp 2b - .section __ex_table,"ax" - .balign 4 - .long 1b, 3b - .previous + + _ASM_EXTABLE(1b, 3b) + CFI_ENDPROC ENDPROC(native_\op\()_safe_regs) .endm -- cgit v1.1 From 0cc0213e73af5963eca259c84876937c20689dbd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 14:23:29 -0700 Subject: x86, msr: Have the _safe MSR functions return -EIO, not -EFAULT For some reason, the _safe MSR functions returned -EFAULT, not -EIO. However, the only user which cares about the return code as anything other than a boolean is the MSR driver, which wants -EIO. Change it to -EIO across the board. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: Alok Kataria Cc: Rusty Russell --- arch/x86/include/asm/msr.h | 4 ++-- arch/x86/kernel/msr.c | 10 ++-------- arch/x86/xen/enlighten.c | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 09c5ca7..943fdd5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -67,7 +67,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), [fault] "i" (-EFAULT)); + : "c" (msr), [fault] "i" (-EIO)); return EAX_EDX_VAL(val, low, high); } @@ -90,7 +90,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr, _ASM_EXTABLE(2b, 3b) : [err] "=a" (err) : "c" (msr), "0" (low), "d" (high), - [fault] "i" (-EFAULT) + [fault] "i" (-EIO) : "memory"); return err; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 98fd6cd..2cfbb4b 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -80,11 +80,8 @@ static ssize_t msr_read(struct file *file, char __user *buf, for (; count; count -= 8) { err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) { - if (err == -EFAULT) /* Fix idiotic error code */ - err = -EIO; + if (err) break; - } if (copy_to_user(tmp, &data, 8)) { err = -EFAULT; break; @@ -115,11 +112,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf, break; } err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) { - if (err == -EFAULT) /* Fix idiotic error code */ - err = -EIO; + if (err) break; - } tmp += 2; bytes += 8; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0a1700a..a8432d8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -713,7 +713,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) set: base = ((u64)high << 32) | low; if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EFAULT; + ret = -EIO; break; #endif -- cgit v1.1 From 8b956bf1f0f2b552ed93cf6cafe823edff298b3b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 14:13:48 -0700 Subject: x86, msr: Create _on_cpu helpers for {rw,wr}msr_safe_regs() Create _on_cpu helpers for {rw,wr}msr_safe_regs() analogously with the other MSR functions. This will be necessary to add support for these to the MSR driver. Signed-off-by: H. Peter Anvin Cc: Borislav Petkov --- arch/x86/include/asm/msr.h | 18 +++++++++++++---- arch/x86/lib/msr.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 943fdd5..8e56712 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -97,8 +97,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern unsigned long long native_read_tsc(void); -extern int native_rdmsr_safe_regs(u32 *regs); -extern int native_wrmsr_safe_regs(u32 *regs); +extern int native_rdmsr_safe_regs(u32 regs[8]); +extern int native_wrmsr_safe_regs(u32 regs[8]); static __always_inline unsigned long long __native_read_tsc(void) { @@ -196,12 +196,12 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) return native_wrmsr_safe_regs(gprs); } -static inline int rdmsr_safe_regs(u32 *regs) +static inline int rdmsr_safe_regs(u32 regs[8]) { return native_rdmsr_safe_regs(regs); } -static inline int wrmsr_safe_regs(u32 *regs) +static inline int wrmsr_safe_regs(u32 regs[8]) { return native_wrmsr_safe_regs(regs); } @@ -245,6 +245,8 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); #else /* CONFIG_SMP */ static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { @@ -275,6 +277,14 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { return wrmsr_safe(msr_no, l, h); } +static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) +{ + return rdmsr_safe_regs(regs); +} +static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) +{ + return wrmsr_safe_regs(regs); +} #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index caa24ac..33a1e3c 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -175,3 +175,52 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) return err ? err : rv.err; } EXPORT_SYMBOL(wrmsr_safe_on_cpu); + +/* + * These variants are significantly slower, but allows control over + * the entire 32-bit GPR set. + */ +struct msr_regs_info { + u32 *regs; + int err; +}; + +static void __rdmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = rdmsr_safe_regs(rv->regs); +} + +static void __wrmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = wrmsr_safe_regs(rv->regs); +} + +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); + +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); -- cgit v1.1 From ff55df53dfdd338906c8ba9d1f4a759b86b869d5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 31 Aug 2009 14:16:57 -0700 Subject: x86, msr: Export the register-setting MSR functions via /dev/*/msr Make it possible to access the all-register-setting/getting MSR functions via the MSR driver. This is implemented as an ioctl() on the standard MSR device node. Signed-off-by: H. Peter Anvin Cc: Borislav Petkov --- arch/x86/include/asm/msr.h | 10 +++++++-- arch/x86/kernel/msr.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 8e56712..7e2b6ba 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -3,10 +3,16 @@ #include -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ #include +#include + +#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) +#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) + +#ifdef __KERNEL__ + #include #include #include @@ -286,6 +292,6 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 2cfbb4b..7dd9500 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -1,6 +1,7 @@ /* ----------------------------------------------------------------------- * * * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved + * Copyright 2009 Intel Corporation; author: H. Peter Anvin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -121,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf, return bytes ? bytes : err; } +static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) +{ + u32 __user *uregs = (u32 __user *)arg; + u32 regs[8]; + int cpu = iminor(file->f_path.dentry->d_inode); + int err; + + switch (ioc) { + case X86_IOC_RDMSR_REGS: + if (!(file->f_mode & FMODE_READ)) { + err = -EBADF; + break; + } + if (copy_from_user(®s, uregs, sizeof regs)) { + err = -EFAULT; + break; + } + err = rdmsr_safe_regs_on_cpu(cpu, regs); + if (err) + break; + if (copy_to_user(uregs, ®s, sizeof regs)) + err = -EFAULT; + break; + + case X86_IOC_WRMSR_REGS: + if (!(file->f_mode & FMODE_WRITE)) { + err = -EBADF; + break; + } + if (copy_from_user(®s, uregs, sizeof regs)) { + err = -EFAULT; + break; + } + err = wrmsr_safe_regs_on_cpu(cpu, regs); + if (err) + break; + if (copy_to_user(uregs, ®s, sizeof regs)) + err = -EFAULT; + break; + + default: + err = -ENOTTY; + break; + } + + return err; +} + static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); @@ -151,6 +200,8 @@ static const struct file_operations msr_fops = { .read = msr_read, .write = msr_write, .open = msr_open, + .unlocked_ioctl = msr_ioctl, + .compat_ioctl = msr_ioctl, }; static int __cpuinit msr_device_create(int cpu) -- cgit v1.1 From acde31dc467797ccae3a55b791a77af446cce018 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 27 Aug 2009 14:29:20 +0100 Subject: kmemleak: Ignore the aperture memory hole on x86_64 This block is allocated with alloc_bootmem() and scanned by kmemleak but the kernel direct mapping may no longer exist. This patch tells kmemleak to ignore this memory hole. The dma32_bootmem_ptr in dma32_reserve_bootmem() is also ignored. Signed-off-by: Catalin Marinas Acked-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 ++++++ arch/x86/kernel/pci-dma.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debf..128111d 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) * code for safe */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); + /* + * Kmemleak should not scan this block as it may not be mapped via the + * kernel direct mapping. + */ + kmemleak_ignore(p); if (!p || __pa(p)+aper_size > 0xffffffff) { printk(KERN_ERR "Cannot allocate aperture memory hole (%p,%uK)\n", diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bc..fa80f60 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,11 @@ void __init dma32_reserve_bootmem(void) size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); + /* + * Kmemleak should not scan this block as it may not be mapped via the + * kernel direct mapping. + */ + kmemleak_ignore(dma32_bootmem_ptr); if (dma32_bootmem_ptr) dma32_bootmem_size = size; else -- cgit v1.1 From db39d5529d347de5e2eec1a72d67fcfacae6c5a2 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 21 Aug 2009 19:15:28 -0500 Subject: [CPUFREQ] Powernow-k8: Enable more than 2 low P-states Remove an obsolete check that used to prevent there being more than 2 low P-states. Now that low-to-low P-states changes are enabled, it prevents otherwise workable configurations with multiple low P-states. Signed-off-by: Mark Langsdorf Tested-by: Krists Krilovs Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2a50ef8..0cbce04 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -854,6 +854,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } + /* fill in data */ + data->numps = data->acpi_data.state_count; + powernow_k8_acpi_pst_values(data, 0); + if (cpu_family == CPU_HW_PSTATE) ret_val = fill_powernow_table_pstate(data, powernow_table); else @@ -866,11 +870,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; - /* fill in data */ - data->numps = data->acpi_data.state_count; if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) print_basics(data); - powernow_k8_acpi_pst_values(data, 0); /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); @@ -941,7 +942,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; - int cntlofreq = 0; for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; @@ -982,27 +982,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, continue; } - /* verify only 1 entry from the lo frequency table */ - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, - * ignore this one ... */ - if ((freq != powernow_table[cntlofreq].frequency) || - (index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX - "Too many lo freq table " - "entries\n"); - return 1; - } - - dprintk("double low frequency table entry, " - "ignoring it.\n"); - invalidate_entry(data, i); - continue; - } else - cntlofreq = i; - } - if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries " "%u kHz vs. %u kHz\n", freq, -- cgit v1.1 From 1a8e42fa81e62d47cc471f7764f906bb42b27a54 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 26 Aug 2009 13:19:37 -0400 Subject: [CPUFREQ] Create a blacklist for processors that should not load the acpi-cpufreq module. Create a blacklist for processors that should not load the acpi-cpufreq module. The initial entry in the blacklist function is the Intel 0f68 processor. It's specification update mentions errata AL30 which implies that cpufreq should not run on this processor. Signed-off-by: Prarit Bhargava Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b5032..badce50 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -588,6 +588,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; + +static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) +{ + /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * AL30: A Machine Check Exception (MCE) Occurring during an + * Enhanced Intel SpeedStep Technology Ratio Change May Cause + * Both Processor Cores to Lock Up when HT is enabled*/ + if (c->x86_vendor == X86_VENDOR_INTEL) { + if ((c->x86 == 15) && + (c->x86_model == 6) && + (c->x86_mask == 8) && smt_capable()) + return -ENODEV; + } + return 0; +} #endif static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -602,6 +617,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dprintk("acpi_cpufreq_cpu_init\n"); +#ifdef CONFIG_SMP + result = acpi_cpufreq_blacklist(c); + if (result) + return result; +#endif + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); if (!data) return -ENOMEM; -- cgit v1.1 From f6909f394c2d4a0a71320797df72d54c49c5927e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 1 Sep 2009 13:31:52 -0700 Subject: x86, msr: fix msr-reg.S compilation with gas 2.16.1 msr-reg.S used the :req option on a macro argument, which wasn't supported by gas 2.16.1 (but apparently by some earlier versions of gas, just to be confusing.) It isn't necessary, so just remove it. Signed-off-by: H. Peter Anvin Cc: Borislav Petkov --- arch/x86/lib/msr-reg.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 9e8cdcf..d5eaf53 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -11,7 +11,7 @@ * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] * */ -.macro op_safe_regs op:req +.macro op_safe_regs op ENTRY(native_\op\()_safe_regs) CFI_STARTPROC pushq_cfi %rbx -- cgit v1.1 From 69575d388603365f2afbf4166df93152df59b165 Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Tue, 1 Sep 2009 18:25:07 -0700 Subject: x86, intel_txt: clean up the impact on generic code, unbreak non-x86 Move tboot.h from asm to linux to fix the build errors of intel_txt patch on non-X86 platforms. Remove the tboot code from generic code init/main.c and kernel/cpu.c. Signed-off-by: Shane Wang Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 + arch/x86/include/asm/tboot.h | 197 ------------------------------------------- arch/x86/kernel/reboot.c | 3 +- arch/x86/kernel/setup.c | 3 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tboot.c | 58 ++++++++++--- 6 files changed, 54 insertions(+), 213 deletions(-) delete mode 100644 arch/x86/include/asm/tboot.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6..b66f210 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -178,6 +178,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config HAVE_INTEL_TXT + def_bool y + depends on EXPERIMENTAL && DMAR && ACPI + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h deleted file mode 100644 index b13929d..0000000 --- a/arch/x86/include/asm/tboot.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * tboot.h: shared data structure with tboot and kernel and functions - * used by kernel for runtime support of Intel(R) Trusted - * Execution Technology - * - * Copyright (c) 2006-2009, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#ifndef _ASM_TBOOT_H -#define _ASM_TBOOT_H - -#include - -/* these must have the values from 0-5 in this order */ -enum { - TB_SHUTDOWN_REBOOT = 0, - TB_SHUTDOWN_S5, - TB_SHUTDOWN_S4, - TB_SHUTDOWN_S3, - TB_SHUTDOWN_HALT, - TB_SHUTDOWN_WFS -}; - -#ifdef CONFIG_INTEL_TXT - -/* used to communicate between tboot and the launched kernel */ - -#define TB_KEY_SIZE 64 /* 512 bits */ - -#define MAX_TB_MAC_REGIONS 32 - -struct tboot_mac_region { - u64 start; /* must be 64 byte -aligned */ - u32 size; /* must be 64 byte -granular */ -} __packed; - -/* GAS - Generic Address Structure (ACPI 2.0+) */ -struct tboot_acpi_generic_address { - u8 space_id; - u8 bit_width; - u8 bit_offset; - u8 access_width; - u64 address; -} __packed; - -/* - * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec - * (http://www.acpi.info/) - */ -struct tboot_acpi_sleep_info { - struct tboot_acpi_generic_address pm1a_cnt_blk; - struct tboot_acpi_generic_address pm1b_cnt_blk; - struct tboot_acpi_generic_address pm1a_evt_blk; - struct tboot_acpi_generic_address pm1b_evt_blk; - u16 pm1a_cnt_val; - u16 pm1b_cnt_val; - u64 wakeup_vector; - u32 vector_width; - u64 kernel_s3_resume_vector; -} __packed; - -/* - * shared memory page used for communication between tboot and kernel - */ -struct tboot { - /* - * version 3+ fields: - */ - - /* TBOOT_UUID */ - u8 uuid[16]; - - /* version number: 5 is current */ - u32 version; - - /* physical addr of tb_log_t log */ - u32 log_addr; - - /* - * physical addr of entry point for tboot shutdown and - * type of shutdown (TB_SHUTDOWN_*) being requested - */ - u32 shutdown_entry; - u32 shutdown_type; - - /* kernel-specified ACPI info for Sx shutdown */ - struct tboot_acpi_sleep_info acpi_sinfo; - - /* tboot location in memory (physical) */ - u32 tboot_base; - u32 tboot_size; - - /* memory regions (phys addrs) for tboot to MAC on S3 */ - u8 num_mac_regions; - struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS]; - - - /* - * version 4+ fields: - */ - - /* symmetric key for use by kernel; will be encrypted on S3 */ - u8 s3_key[TB_KEY_SIZE]; - - - /* - * version 5+ fields: - */ - - /* used to 4byte-align num_in_wfs */ - u8 reserved_align[3]; - - /* number of processors in wait-for-SIPI */ - u32 num_in_wfs; -} __packed; - -/* - * UUID for tboot data struct to facilitate matching - * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is - * represented as {} in the char array used here - */ -#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\ - 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8} - -extern struct tboot *tboot; - -static inline int tboot_enabled(void) -{ - return tboot != NULL; -} - -extern void tboot_probe(void); -extern void tboot_create_trampoline(void); -extern void tboot_shutdown(u32 shutdown_type); -extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -extern int tboot_wait_for_aps(int num_aps); -extern struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl); -extern int tboot_force_iommu(void); - -#else /* CONFIG_INTEL_TXT */ - -static inline int tboot_enabled(void) -{ - return 0; -} - -static inline void tboot_probe(void) -{ -} - -static inline void tboot_create_trampoline(void) -{ -} - -static inline void tboot_shutdown(u32 shutdown_type) -{ -} - -static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control, - u32 pm1b_control) -{ -} - -static inline int tboot_wait_for_aps(int num_aps) -{ - return 0; -} - -static inline struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl) -{ - return dmar_tbl; -} - -static inline int tboot_force_iommu(void) -{ - return 0; -} - -#endif /* !CONFIG_INTEL_TXT */ - -#endif /* _ASM_TBOOT_H */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9de01c5..18ce5c0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -24,8 +25,6 @@ # include #endif -#include - /* * Power off function, if any */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 80d6e9e..6ce0d6f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -66,6 +66,7 @@ #include #include +#include #include