From c6e5ca35c4685cd920b1d5279dbc9f4483d7dfd4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:55 +0200 Subject: x86/asm/tsc: Inline native_read_tsc() and remove __native_read_tsc() In the following commit: cdc7957d1954 ("x86: move native_read_tsc() offline") ... native_read_tsc() was moved out of line, presumably for some now-obsolete vDSO-related reason. Undo it. The entire rdtsc, shl, or sequence is only 11 bytes, and calls via rdtscl() and similar helpers were already inlined. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/d05ffe2aaf8468ca475ebc00efad7b2fa174af19.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 8 +++----- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/tsc.h | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e6a707e..8871147 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -106,12 +106,10 @@ notrace static inline int native_write_msr_safe(unsigned int msr, return err; } -extern unsigned long long native_read_tsc(void); - extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -static __always_inline unsigned long long __native_read_tsc(void) +static __always_inline unsigned long long native_read_tsc(void) { DECLARE_ARGS(val, low, high); @@ -181,10 +179,10 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) } #define rdtscl(low) \ - ((low) = (u32)__native_read_tsc()) + ((low) = (u32)native_read_tsc()) #define rdtscll(val) \ - ((val) = __native_read_tsc()) + ((val) = native_read_tsc()) #define rdpmc(counter, low, high) \ do { \ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 628954c..2bd69d6 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = __native_read_tsc() - src->tsc_timestamp; + u64 delta = native_read_tsc() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c2e00bb..bc5fa2a 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) * on during the bootup the random pool has true entropy too. */ get_random_bytes(&canary, sizeof(canary)); - tsc = __native_read_tsc(); + tsc = native_read_tsc(); canary += tsc + (tsc << 32UL); current->stack_canary = canary; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 94605c0..fd11128 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -42,7 +42,7 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - return (cycles_t)__native_read_tsc(); + return (cycles_t)native_read_tsc(); } extern void tsc_init(void); -- cgit v1.1 From 881d7bf843d7139c6dfbffdec4903b3354423c49 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:56 +0200 Subject: x86/asm/tsc, kvm: Remove vget_cycles() The only caller was KVM's read_tsc(). The only difference between vget_cycles() and native_read_tsc() was that vget_cycles() returned zero instead of crashing on TSC-less systems. KVM already checks vclock_mode() before calling that function, so the extra check is unnecessary. Also, KVM (host-side) requires the TSC to exist. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/20615df14ae2eb713ea7a5f5123c1dc4c7ca993d.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tsc.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index fd11128..3da1cc1 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -32,19 +32,6 @@ static inline cycles_t get_cycles(void) return ret; } -static __always_inline cycles_t vget_cycles(void) -{ - /* - * We only do VDSOs on TSC capable CPUs, so this shouldn't - * access boot_cpu_data (which is not VDSO-safe): - */ -#ifndef CONFIG_X86_TSC - if (!cpu_has_tsc) - return 0; -#endif - return (cycles_t)native_read_tsc(); -} - extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -- cgit v1.1 From 9261e050b686c9fe229cd9918d997b3caaf20e34 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:57 +0200 Subject: x86/asm/tsc, x86/paravirt: Remove read_tsc() and read_tscp() paravirt hooks We've had ->read_tsc() and ->read_tscp() paravirt hooks since the very beginning of paravirt, i.e., d3561b7fa0fb ("[PATCH] paravirt: header and stubs for paravirtualisation"). AFAICT, the only paravirt guest implementation that ever replaced these calls was vmware, and it's gone. Arguably even vmware shouldn't have hooked RDTSC -- we fully support systems that don't have a TSC at all, so there's no point for a paravirt implementation to pretend that we have a TSC but to replace it. I also doubt that these hooks actually worked. Calls to rdtscl() and rdtscll(), which respected the hooks, were used seemingly interchangeably with native_read_tsc(), which did not. Just remove them. If anyone ever needs them again, they can try to make a case for why they need them. Before, on a paravirt config: text data bss dec hex filename 12618257 1816384 1093632 15528273 ecf151 vmlinux After: text data bss dec hex filename 12617207 1816384 1093632 15527223 eced37 vmlinux Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/d08a2600fb298af163681e5efd8e599d889a5b97.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 16 ++++++++-------- arch/x86/include/asm/paravirt.h | 34 ---------------------------------- arch/x86/include/asm/paravirt_types.h | 2 -- 3 files changed, 8 insertions(+), 44 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 8871147..d1afac7 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -178,12 +178,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -#define rdtscl(low) \ - ((low) = (u32)native_read_tsc()) - -#define rdtscll(val) \ - ((val) = native_read_tsc()) - #define rdpmc(counter, low, high) \ do { \ u64 _l = native_read_pmc((counter)); \ @@ -193,6 +187,14 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) +#endif /* !CONFIG_PARAVIRT */ + +#define rdtscl(low) \ + ((low) = (u32)native_read_tsc()) + +#define rdtscll(val) \ + ((val) = native_read_tsc()) + #define rdtscp(low, high, aux) \ do { \ unsigned long long _val = native_read_tscp(&(aux)); \ @@ -202,8 +204,6 @@ do { \ #define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) -#endif /* !CONFIG_PARAVIRT */ - /* * 64-bit version of wrmsr_safe(): */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d143bfa..c2be037 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -174,19 +174,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -static inline u64 paravirt_read_tsc(void) -{ - return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); -} - -#define rdtscl(low) \ -do { \ - u64 _l = paravirt_read_tsc(); \ - low = (int)_l; \ -} while (0) - -#define rdtscll(val) (val = paravirt_read_tsc()) - static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); @@ -215,27 +202,6 @@ do { \ #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) -static inline unsigned long long paravirt_rdtscp(unsigned int *aux) -{ - return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); -} - -#define rdtscp(low, high, aux) \ -do { \ - int __aux; \ - unsigned long __val = paravirt_rdtscp(&__aux); \ - (low) = (u32)__val; \ - (high) = (u32)(__val >> 32); \ - (aux) = __aux; \ -} while (0) - -#define rdtscpll(val, aux) \ -do { \ - unsigned long __aux; \ - val = paravirt_rdtscp(&__aux); \ - (aux) = __aux; \ -} while (0) - static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a6b8f9f..ce029e4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -156,9 +156,7 @@ struct pv_cpu_ops { u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); - u64 (*read_tsc)(void); u64 (*read_pmc)(int counter); - unsigned long long (*read_tscp)(unsigned int *aux); #ifdef CONFIG_X86_32 /* -- cgit v1.1 From 87be28aaf1458445d5f648688c2eec0f13b8f3b9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:58 +0200 Subject: x86/asm/tsc: Replace rdtscll() with native_read_tsc() Now that the ->read_tsc() paravirt hook is gone, rdtscll() is just a wrapper around native_read_tsc(). Unwrap it. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/d2449ae62c1b1fb90195bcfb19ef4a35883a04dc.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 3 --- arch/x86/include/asm/tsc.h | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index d1afac7..7273b74 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -192,9 +192,6 @@ do { \ #define rdtscl(low) \ ((low) = (u32)native_read_tsc()) -#define rdtscll(val) \ - ((val) = native_read_tsc()) - #define rdtscp(low, high, aux) \ do { \ unsigned long long _val = native_read_tscp(&(aux)); \ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 3da1cc1..b488390 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -21,15 +21,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { - unsigned long long ret = 0; - #ifndef CONFIG_X86_TSC if (!cpu_has_tsc) return 0; #endif - rdtscll(ret); - return ret; + return native_read_tsc(); } extern void tsc_init(void); -- cgit v1.1 From ec69de52c648b1d9416a810943e68dbe9fe519f4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:59 +0200 Subject: x86/asm/tsc: Remove the rdtscp() and rdtscpll() macros They have no users. Leave native_read_tscp() which seems potentially useful despite also having no callers. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/6abfa3ef80534b5d73898a48c4d25e069303cbe5.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7273b74..626f781 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -192,15 +192,6 @@ do { \ #define rdtscl(low) \ ((low) = (u32)native_read_tsc()) -#define rdtscp(low, high, aux) \ -do { \ - unsigned long long _val = native_read_tscp(&(aux)); \ - (low) = (u32)_val; \ - (high) = (u32)(_val >> 32); \ -} while (0) - -#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) - /* * 64-bit version of wrmsr_safe(): */ -- cgit v1.1 From fe47ae6e1a5005b2e82f7eab57b5c3820453293a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:06 +0200 Subject: x86/asm/tsc: Remove rdtscl() It has no more callers, and it was never a very sensible interface to begin with. Users of the TSC should either read all 64 bits or explicitly throw out the high bits. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/250105f7cee519be9d7fc4464b5784caafc8f4fe.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 626f781..c89ed6c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -189,9 +189,6 @@ do { \ #endif /* !CONFIG_PARAVIRT */ -#define rdtscl(low) \ - ((low) = (u32)native_read_tsc()) - /* * 64-bit version of wrmsr_safe(): */ -- cgit v1.1 From 4ea1636b04dbd66536fa387bae2eea463efc705b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:07 +0200 Subject: x86/asm/tsc: Rename native_read_tsc() to rdtsc() Now that there is no paravirt TSC, the "native" is inappropriate. The function does RDTSC, so give it the obvious name: rdtsc(). Suggested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/fd43e16281991f096c1e4d21574d9e1402c62d39.1434501121.git.luto@kernel.org [ Ported it to v4.2-rc1. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 11 ++++++++++- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/tsc.h | 2 +- 4 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c89ed6c..ff0c120 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -109,7 +109,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -static __always_inline unsigned long long native_read_tsc(void) +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 2bd69d6..5c490db 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = native_read_tsc() - src->tsc_timestamp; + u64 delta = rdtsc() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index bc5fa2a..58505f0 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) * on during the bootup the random pool has true entropy too. */ get_random_bytes(&canary, sizeof(canary)); - tsc = native_read_tsc(); + tsc = rdtsc(); canary += tsc + (tsc << 32UL); current->stack_canary = canary; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index b488390..3df7675 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -26,7 +26,7 @@ static inline cycles_t get_cycles(void) return 0; #endif - return native_read_tsc(); + return rdtsc(); } extern void tsc_init(void); -- cgit v1.1 From 03b9730b769fc4d87e40f6104f4c5b2e43889f19 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:08 +0200 Subject: x86/asm/tsc: Add rdtsc_ordered() and use it in trivial call sites rdtsc_barrier(); rdtsc() is an unnecessary mouthful and requires more thought than should be necessary. Add an rdtsc_ordered() helper and replace the trivial call sites with it. This should not change generated code. The duplication of the fence asm is temporary. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index ff0c120..02bdd6c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -127,6 +127,32 @@ static __always_inline unsigned long long rdtsc(void) return EAX_EDX_VAL(val, low, high); } +/** + * rdtsc_ordered() - read the current TSC in program order + * + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. + * It is ordered like a load to a global in-memory counter. It should + * be impossible to observe non-monotonic rdtsc_unordered() behavior + * across multiple CPUs as long as the TSC is synced. + */ +static __always_inline unsigned long long rdtsc_ordered(void) +{ + /* + * The RDTSC instruction is not ordered relative to memory + * access. The Intel SDM and the AMD APM are both vague on this + * point, but empirically an RDTSC instruction can be + * speculatively executed before prior loads. An RDTSC + * immediately after an appropriate barrier appears to be + * ordered as a normal load, that is, it provides the same + * ordering guarantees as reading from a global memory location + * that some other imaginary CPU is updating continuously with a + * time stamp. + */ + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, + "lfence", X86_FEATURE_LFENCE_RDTSC); + return rdtsc(); +} + static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); -- cgit v1.1 From 502dfeff239e8313bfbe906ca0a1a6827ac8481b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:11 +0200 Subject: x86/asm/tsc, x86/kvm: Drop open-coded barrier and use rdtsc_ordered() in kvmclock __pvclock_read_cycles() used to have two barriers, one of which was unnecessary, which got removed after an initial version of this patch was sent. But the barrier is still open-coded unnecessarily - get rid of that barrier and clean up the code by just using rdtsc_ordered(). Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krcmar Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/678981cc4761fb38a793c217c9cac42503cf3719.1434501121.git.luto@kernel.org [ Ported it to v4.2-rc1. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pvclock.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 5c490db..7a6bed5 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = rdtsc() - src->tsc_timestamp; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } @@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u8 ret_flags; version = src->version; - /* Note: emulated platforms which do not advertise SSE2 support - * result in kvmclock not using the necessary RDTSC barriers. - * Without barriers, it is possible that RDTSC instruction reads from - * the time stamp counter outside rdtsc_barrier protected section - * below, resulting in violation of monotonicity. - */ - rdtsc_barrier(); + offset = pvclock_get_nsec_offset(src); ret = src->system_time + offset; ret_flags = src->flags; -- cgit v1.1 From bb8dd96032fc63babfc8b378a37dd7681eeec326 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:12 +0200 Subject: x86/asm/tsc: Remove rdtsc_barrier() All callers have been converted to rdtsc_ordered(). Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Richard Weinberger Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/9baa4ae9a1e7c7c282f9cb2f15bb6bf5c2004032.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/barrier.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e51a8f8..818cb87 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -91,15 +91,4 @@ do { \ #define smp_mb__before_atomic() barrier() #define smp_mb__after_atomic() barrier() -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - */ -static __always_inline void rdtsc_barrier(void) -{ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); -} - #endif /* _ASM_X86_BARRIER_H */ -- cgit v1.1 From 5a33fcb8d991209bac0a266ab499e4b53d116cdd Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Thu, 25 Jun 2015 18:44:13 +0200 Subject: x86/asm/tsc: Save an instruction in DECLARE_ARGS users Before, the code to do RDTSC looked like: rdtsc shl $0x20, %rdx mov %eax, %eax or %rdx, %rax The "mov %eax, %eax" is required to clear the high 32 bits of RAX. By declaring low and high as 64-bit variables, the code is simplified to: rdtsc shl $0x20,%rdx or %rdx,%rax Yes, it's a 2-byte instruction that's not on a critical path, but there are principles to be upheld. Every user of EAX_EDX_RET has been checked. I tried to check users of EAX_EDX_ARGS, but there weren't any, so I deleted it to be safe. ( There's no benefit to making "high" 64 bits, but it was the simplest way to proceed. ) Signed-off-by: George Spelvin Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: jacob.jun.pan@linux.intel.com Link: http://lkml.kernel.org/r/20150618075906.4615.qmail@ns.horizon.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 02bdd6c..131eec2 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) * it means rax *or* rdx. */ #ifdef CONFIG_X86_64 -#define DECLARE_ARGS(val, low, high) unsigned low, high -#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) -#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +/* Using 64-bit values saves one instruction clearing the high half of low */ +#define DECLARE_ARGS(val, low, high) unsigned long low, high +#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) #else #define DECLARE_ARGS(val, low, high) unsigned long long val #define EAX_EDX_VAL(val, low, high) (val) -#define EAX_EDX_ARGS(val, low, high) "A" (val) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif -- cgit v1.1 From b2e02b820d5b42479195b89d3d73f31bcedb264e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 22 Jun 2015 07:55:11 -0400 Subject: x86/compat: Make mmap_is_ia32() common compat TIF_ADDR32 is set for both ia32 and x32 tasks, so change from CONFIG_IA32_EMULATION to CONFIG_COMPAT. Use config_enabled() to make the function more readable. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434974121-32575-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/elf.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f161c18..180b6fe 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, */ static inline int mmap_is_ia32(void) { -#ifdef CONFIG_X86_32 - return 1; -#endif -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_ADDR32)) - return 1; -#endif - return 0; + return config_enabled(CONFIG_X86_32) || + (config_enabled(CONFIG_COMPAT) && + test_thread_flag(TIF_ADDR32)); } /* Do not change the values. See get_align_mask() */ -- cgit v1.1 From b829d1be20ab51a3b76ec003118c9260d1fa424e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 22 Jun 2015 07:55:12 -0400 Subject: x86/compat: Move ucontext_x32 to sigframe.h ia32.h should only contain the code for 32-bit compatability. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434974121-32575-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ia32.h | 9 --------- arch/x86/include/asm/sigframe.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index d0e8e01..2801976 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -22,15 +22,6 @@ struct ucontext_ia32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; -struct ucontext_x32 { - unsigned int uc_flags; - unsigned int uc_link; - compat_stack_t uc_stack; - unsigned int uc__pad0; /* needed for alignment */ - struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ - compat_sigset_t uc_sigmask; /* mask last for extensibility */ -}; - /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 7c7c27c..1f3175b 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -4,6 +4,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 #define sigframe_ia32 sigframe @@ -69,6 +70,15 @@ struct rt_sigframe { #ifdef CONFIG_X86_X32_ABI +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + compat_stack_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + struct rt_sigframe_x32 { u64 pretcode; struct ucontext_x32 uc; -- cgit v1.1 From 7da770785f9740af1cb24b8fd63075543bd00711 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 22 Jun 2015 07:55:13 -0400 Subject: x86/compat: Rename 'start_thread_ia32' to 'compat_start_thread' This function is shared between the 32-bit compat and x32 ABIs. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434974121-32575-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 180b6fe..2bf67c0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t, #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); -#define compat_start_thread start_thread_ia32 +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp); +#define compat_start_thread compat_start_thread void set_personality_ia32(bool); #define COMPAT_SET_PERSONALITY(ex) \ -- cgit v1.1 From ab8b82ee6dad7c9c257f450d14719a0e3f327244 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 22 Jun 2015 07:55:15 -0400 Subject: x86/compat: Don't build the 32-bit VDSO if not needed Build the 32-bit vdso only for native 32-bit or 32-bit compat is enabled. x32 should not force it to build. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434974121-32575-7-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 2bf67c0..141c561 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #ifdef CONFIG_X86_64 extern unsigned int vdso64_enabled; #endif -#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) extern unsigned int vdso32_enabled; #endif -- cgit v1.1 From 68872eb9b19bbd85883262a4e0927b487653816c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Jul 2015 17:29:00 +0300 Subject: x86/platform/intel/pmc_atom: Export accessors to PMC registers Export the pmc_atom_read() and pmc_atom_write() accessors to the PMC registers. On early initcall stages the functions will return -ENODEV, and caller has to wait when it will be available. Additionally make absence of debugfs a non-fatal error. The patch will be useful for the upcoming fixes regarding to the LPSS block found on Intel BayTrail-T and Braswell. Signed-off-by: Andy Shevchenko Cc: Aubrey Li Cc: Kumar P Mahesh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1436192944-56496-2-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pmc_atom.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h index bc0fc08..6ee2200 100644 --- a/arch/x86/include/asm/pmc_atom.h +++ b/arch/x86/include/asm/pmc_atom.h @@ -126,4 +126,8 @@ #define SLEEP_TYPE_MASK 0xFFFFECFF #define SLEEP_TYPE_S5 0x1C00 #define SLEEP_ENABLE 0x2000 + +extern int pmc_atom_read(int offset, u32 *value); +extern int pmc_atom_write(int offset, u32 value); + #endif /* PMC_ATOM_H */ -- cgit v1.1 From 2b8f8eddaf05c02bb4a21db5be1691e36e242c65 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Jul 2015 17:29:03 +0300 Subject: x86/platform/intel/pmc_atom: Add Cherrytrail PMC interface The patch adds CHT PMC interface. This exposes all the South IP device power states and S0ix states for CHT. The bit map of FUNC_DIS and D3_STS_0 registers for SoCs are consistent. The D3_STS_1 and FUNC_DIS_2 registers, however, are not aligned. This is fixed by splitting a common mapping on per register basis. (Originally based on code from Kumar P Mahesh.) Originally-from: Kumar P Mahesh Signed-off-by: Andy Shevchenko Cc: Aubrey Li Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1436192944-56496-5-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pmc_atom.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h index 6ee2200..aa8744c 100644 --- a/arch/x86/include/asm/pmc_atom.h +++ b/arch/x86/include/asm/pmc_atom.h @@ -18,6 +18,8 @@ /* ValleyView Power Control Unit PCI Device ID */ #define PCI_DEVICE_ID_VLV_PMC 0x0F1C +/* CherryTrail Power Control Unit PCI Device ID */ +#define PCI_DEVICE_ID_CHT_PMC 0x229C /* PMC Memory mapped IO registers */ #define PMC_BASE_ADDR_OFFSET 0x44 @@ -29,6 +31,10 @@ #define PMC_FUNC_DIS 0x34 #define PMC_FUNC_DIS_2 0x38 +/* CHT specific bits in FUNC_DIS2 register */ +#define BIT_FD_GMM BIT(3) +#define BIT_FD_ISH BIT(4) + /* S0ix wake event control */ #define PMC_S0IX_WAKE_EN 0x3C @@ -75,6 +81,21 @@ #define PMC_PSS_BIT_USB BIT(16) #define PMC_PSS_BIT_USB_SUS BIT(17) +/* CHT specific bits in PSS register */ +#define PMC_PSS_BIT_CHT_UFS BIT(7) +#define PMC_PSS_BIT_CHT_UXD BIT(11) +#define PMC_PSS_BIT_CHT_UXD_FD BIT(12) +#define PMC_PSS_BIT_CHT_UX_ENG BIT(15) +#define PMC_PSS_BIT_CHT_USB_SUS BIT(16) +#define PMC_PSS_BIT_CHT_GMM BIT(17) +#define PMC_PSS_BIT_CHT_ISH BIT(18) +#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31) + /* These registers reflect D3 status of functions */ #define PMC_D3_STS_0 0xA0 @@ -117,6 +138,10 @@ #define BIT_USH_SS_PHY BIT(2) #define BIT_DFX BIT(3) +/* CHT specific bits in PMC_D3_STS_1 register */ +#define BIT_STS_GMM BIT(1) +#define BIT_STS_ISH BIT(2) + /* PMC I/O Registers */ #define ACPI_BASE_ADDR_OFFSET 0x40 #define ACPI_BASE_ADDR_MASK 0xFFFFFE00 -- cgit v1.1 From 1f484aa6904697f390027c12fba130fa94b20831 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2015 12:44:23 -0700 Subject: x86/entry: Move C entry and exit code to arch/x86/entry/common.c The entry and exit C helpers were confusingly scattered between ptrace.c and signal.c, even though they aren't specific to ptrace or signal handling. Move them together in a new file. This change just moves code around. It doesn't change anything. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/324d686821266544d8572423cc281f961da445f4.1435952415.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 31eab86..b42408b 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -30,6 +30,7 @@ typedef sigset_t compat_sigset_t; #endif /* __ASSEMBLY__ */ #include #ifndef __ASSEMBLY__ +extern void do_signal(struct pt_regs *regs); extern void do_notify_resume(struct pt_regs *, void *, __u32); #define __ARCH_HAS_SA_RESTORER -- cgit v1.1 From 8c84014f3bbb112d07e73f30a10ac8a3a72f8649 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2015 12:44:32 -0700 Subject: x86/entry: Remove exception_enter() from most trap handlers On 64-bit kernels, we don't need it any more: we handle context tracking directly on entry from user mode and exit to user mode. On 32-bit kernels, we don't support context tracking at all, so these callbacks had no effect. Note: this doesn't change do_page_fault(). Before we do that, we need to make sure that there is no code that can page fault from kernel mode with CONTEXT_USER. The 32-bit fast system call stack argument code is the only offender I'm aware of right now. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/ae22f4dfebd799c916574089964592be218151f9.1435952415.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c5380be..c3496619 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void); asmlinkage void smp_deferred_error_interrupt(void); #endif -extern enum ctx_state ist_enter(struct pt_regs *regs); -extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); +extern void ist_enter(struct pt_regs *regs); +extern void ist_exit(struct pt_regs *regs); extern void ist_begin_non_atomic(struct pt_regs *regs); extern void ist_end_non_atomic(void); -- cgit v1.1 From 06a7b36c7bd932e60997bedbae32b3d8e6722281 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2015 12:44:33 -0700 Subject: x86/entry: Remove SCHEDULE_USER and asm/context-tracking.h SCHEDULE_USER is no longer used, and asm/context-tracking.h contained nothing else. Remove the header entirely. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/854e9b45f69af20e26c47099eb236321563ebcee.1435952415.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/context_tracking.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 arch/x86/include/asm/context_tracking.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h deleted file mode 100644 index 1fe4970..0000000 --- a/arch/x86/include/asm/context_tracking.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_X86_CONTEXT_TRACKING_H -#define _ASM_X86_CONTEXT_TRACKING_H - -#ifdef CONFIG_CONTEXT_TRACKING -# define SCHEDULE_USER call schedule_user -#else -# define SCHEDULE_USER call schedule -#endif - -#endif -- cgit v1.1 From 7e1ff15b699bcb2bce1e8086323d227788960044 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Jul 2015 17:45:08 +0300 Subject: x86/platform/iosf_mbi: Source cleanup - Move the static variables to one place - Fix indentations in the header - Correct comments No functional change. [ tglx: Massaged changelog ] Signed-off-by: Andy Shevchenko Cc: David E . Box Link: http://lkml.kernel.org/r/1436366709-17683-5-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/iosf_mbi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index 57995f0..b72ad0f 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -52,20 +52,20 @@ /* Quark available units */ #define QRK_MBI_UNIT_HBA 0x00 -#define QRK_MBI_UNIT_HB 0x03 +#define QRK_MBI_UNIT_HB 0x03 #define QRK_MBI_UNIT_RMU 0x04 -#define QRK_MBI_UNIT_MM 0x05 +#define QRK_MBI_UNIT_MM 0x05 #define QRK_MBI_UNIT_MMESRAM 0x05 #define QRK_MBI_UNIT_SOC 0x31 /* Quark read/write opcodes */ #define QRK_MBI_HBA_READ 0x10 #define QRK_MBI_HBA_WRITE 0x11 -#define QRK_MBI_HB_READ 0x10 +#define QRK_MBI_HB_READ 0x10 #define QRK_MBI_HB_WRITE 0x11 #define QRK_MBI_RMU_READ 0x10 #define QRK_MBI_RMU_WRITE 0x11 -#define QRK_MBI_MM_READ 0x10 +#define QRK_MBI_MM_READ 0x10 #define QRK_MBI_MM_WRITE 0x11 #define QRK_MBI_MMESRAM_READ 0x12 #define QRK_MBI_MMESRAM_WRITE 0x13 -- cgit v1.1 From 0233606ce5cf12c1a0e27cb197066ea5bc2bb488 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 19 Jul 2015 21:09:04 -0400 Subject: x86/entry/vm86: Clean up saved_fs/gs There is no need to save FS and non-lazy GS outside the 32-bit regs. Lazy GS still needs to be saved because it wasn't saved on syscall entry. Save it in the gs slot of regs32, which is present but unused. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437354550-25858-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 43e6519..f4e4e3f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -410,8 +410,6 @@ struct thread_struct { unsigned long v86flags; unsigned long v86mask; unsigned long saved_sp0; - unsigned int saved_fs; - unsigned int saved_gs; #endif /* IO permissions: */ unsigned long *io_bitmap_ptr; -- cgit v1.1 From ed0b2edb61ba4e557de759093d965654186f28b2 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 19 Jul 2015 21:09:06 -0400 Subject: x86/entry/vm86: Move userspace accesses to do_sys_vm86() Move the userspace accesses down into the common function in preparation for the next set of patches. Also change to copying the fields explicitly instead of assuming a fixed order in pt_regs and the kernel data structures. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437354550-25858-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f4e4e3f..35ad554 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -405,7 +405,7 @@ struct thread_struct { unsigned long error_code; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ - struct vm86_struct __user *vm86_info; + struct vm86plus_struct __user *vm86_info; unsigned long screen_bitmap; unsigned long v86flags; unsigned long v86mask; -- cgit v1.1 From 8c7ea50c010b2f1e006ad37c43f98202a31de2cb Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 9 Jul 2015 17:28:16 -0700 Subject: x86/mm, asm-generic: Add IOMMU ioremap_uc() variant default We currently have no safe way of currently defining architecture agnostic IOMMU ioremap_*() variants. The trend is for folks to *assume* that ioremap_nocache() should be the default everywhere and then add this mapping on each architectures -- this is not correct today for a variety of reasons. We have two options: 1) Sit and wait for every architecture in Linux to get a an ioremap_*() variant defined before including it upstream. 2) Gather consensus on a safe architecture agnostic ioremap_*() default. Approach 1) introduces development latencies, and since 2) will take time and work on clarifying semantics the only remaining sensible thing to do to avoid issues is returning NULL on ioremap_*() variants. In order for this to work we must have all architectures declare their own ioremap_*() variants as defined. This will take some work, do this for ioremp_uc() to set the example as its only currently implemented on x86. Document all this. We only provide implementation support for ioremap_uc() as the other ioremap_*() variants are well defined all over the kernel for other architectures already. Signed-off-by: Luis R. Rodriguez Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: benh@kernel.crashing.org Cc: bp@suse.de Cc: dan.j.williams@intel.com Cc: geert@linux-m68k.org Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: linux-mm@kvack.org Cc: luto@amacapital.net Cc: mpe@ellerman.id.au Cc: mst@redhat.com Cc: ralf@linux-mips.org Cc: ross.zwisler@linux.intel.com Cc: stefan.bader@canonical.com Cc: tj@kernel.org Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Link: http://lkml.kernel.org/r/1436488096-3165-1-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 83ec9b1..de25aad 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -180,6 +180,8 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); +#define ioremap_uc ioremap_uc + extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); -- cgit v1.1 From 949163015ce6fdb76a5e846a3582d3c40c23c001 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Mon, 20 Jul 2015 18:23:50 +0200 Subject: x86/boot: Obsolete the MCA sys_desc_table The kernel does not support the MCA bus anymroe, so mark sys_desc_table as obsolete: remove any reference from the code together with the remaining of MCA logic. bloat-o-meter output: add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-55 (-55) function old new delta i386_start_kernel 128 119 -9 setup_arch 1421 1375 -46 Signed-off-by: Paolo Pisati Cc: Josh Triplett Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437409430-8491-1-git-send-email-p.pisati@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 8 -------- arch/x86/include/uapi/asm/bootparam.h | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 43e6519..3e15e13 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -647,14 +647,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) extern void set_task_blockstep(struct task_struct *task, bool on); -/* - * from system description table in BIOS. Mostly for MCA use, but - * others may find it useful: - */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; - /* Boot loader type from the setup header: */ extern int bootloader_type; extern int bootloader_version; diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index ab456dc..3292543 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -120,7 +120,7 @@ struct boot_params { __u8 _pad3[16]; /* 0x070 */ __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ - struct sys_desc_table sys_desc_table; /* 0x0a0 */ + struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */ struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ __u32 ext_ramdisk_image; /* 0x0c0 */ __u32 ext_ramdisk_size; /* 0x0c4 */ -- cgit v1.1 From e83d58874ba1de74c13d3c6b05f95a023c860d25 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 3 Jul 2015 15:01:34 +0300 Subject: kvm/x86: move Hyper-V MSR's/hypercall code into hyperv.c file This patch introduce Hyper-V related source code file - hyperv.c and per vm and per vcpu hyperv context structures. All Hyper-V MSR's and hypercall code moved into hyperv.c. All Hyper-V kvm/vcpu fields moved into appropriate hyperv context structures. Copyrights and authors information copied from x86.c to hyperv.c. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49ec903..2416882 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -358,6 +358,11 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V per vcpu emulation context */ +struct kvm_vcpu_hv { + u64 hv_vapic; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -514,8 +519,7 @@ struct kvm_vcpu_arch { /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; - /* fields used by HYPER-V emulation */ - u64 hv_vapic; + struct kvm_vcpu_hv hyperv; cpumask_var_t wbinvd_dirty_mask; @@ -586,6 +590,13 @@ struct kvm_apic_map { struct kvm_lapic *logical_map[16][16]; }; +/* Hyper-V emulation context */ +struct kvm_hv { + u64 hv_guest_os_id; + u64 hv_hypercall; + u64 hv_tsc_page; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -645,10 +656,7 @@ struct kvm_arch { /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; - /* fields used by HYPER-V emulation */ - u64 hv_guest_os_id; - u64 hv_hypercall; - u64 hv_tsc_page; + struct kvm_hv hyperv; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; -- cgit v1.1 From e7d9513b60e87f62e41090fa3a26eca796924346 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 3 Jul 2015 15:01:37 +0300 Subject: kvm/x86: added hyper-v crash msrs into kvm hyperv context Added kvm Hyper-V context hv crash variables as storage of Hyper-V crash msrs. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2416882..fa32b53 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -595,6 +595,10 @@ struct kvm_hv { u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; + + /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ + u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; + u64 hv_crash_ctl; }; struct kvm_arch { -- cgit v1.1 From 5f3d45e7f2827f48e60eb821efd909713b43ac63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mihai=20Don=C8=9Bu?= Date: Sun, 5 Jul 2015 20:08:57 +0300 Subject: kvm/x86: add support for MONITOR_TRAP_FLAG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow a nested hypervisor to single step its guests. Signed-off-by: Mihai Donțu [Fix overlong line. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/include/uapi/asm/vmx.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index da772ed..9299ae5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -47,6 +47,7 @@ #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 #define CPU_BASED_USE_MSR_BITMAPS 0x10000000 #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 1fe9218..37fee27 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -58,6 +58,7 @@ #define EXIT_REASON_INVALID_STATE 33 #define EXIT_REASON_MSR_LOAD_FAIL 34 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_TRAP_FLAG 37 #define EXIT_REASON_MONITOR_INSTRUCTION 39 #define EXIT_REASON_PAUSE_INSTRUCTION 40 #define EXIT_REASON_MCE_DURING_VMENTRY 41 @@ -106,6 +107,7 @@ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ -- cgit v1.1 From d71ba788345c2b5646101766e0c52714a9b5ed7f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 Jul 2015 11:56:48 +0200 Subject: KVM: move code related to KVM_SET_BOOT_CPU_ID to x86 This is another remnant of ia64 support. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fa32b53..2f9e504 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -667,6 +667,7 @@ struct kvm_arch { #endif bool boot_vcpu_runs_old_kvmclock; + u32 bsp_vcpu_id; u64 disabled_quirks; }; @@ -1215,5 +1216,7 @@ int __x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.1 From 991de2e59090e55c65a7f59a049142e3c480f7bd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 10 Jun 2015 16:54:59 +0800 Subject: PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq() To support IOAPIC hotplug, we need to allocate PCI IRQ resources on demand and free them when not used anymore. Implement pcibios_alloc_irq() and pcibios_free_irq() to dynamically allocate and free PCI IRQs. Remove mp_should_keep_irq(), which is no longer used. [bhelgaas: changelog] Signed-off-by: Jiang Liu Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner --- arch/x86/include/asm/pci_x86.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 164e3f8..fa1195d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,8 +93,6 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); -extern bool mp_should_keep_irq(struct device *dev); - struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); -- cgit v1.1 From b1c599b8ff80ea79b9f8277a3f9f36a7b0cfedce Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 24 Jul 2015 09:15:11 +0200 Subject: x86/cpufeature: Add feature bit for Intel's Silicon Debug CPUID bit Add a CPUID feature bit for the SDBG (Silicon Debug) CPU feature found on recent Intel systems starting with Haswell. Using the IA32_DEBUG_INTERFACE MSR (index C80H) one can at least detect if SDBG has been enabled by the firmware and if it has been used or not. Signed-off-by: Mathias Krause Signed-off-by: Borislav Petkov Cc: Aaron Lu Cc: Dave Hansen Cc: Dirk Brandewie Cc: H. Peter Anvin Cc: Josh Triplett Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ross Zwisler Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437330403-12102-1-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606f..4b11974 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -119,6 +119,7 @@ #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -- cgit v1.1 From a5b9e5a2f14f25a8dae987494d50ad3aac7366b6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Jul 2015 14:31:34 -0700 Subject: x86/ldt: Make modify_ldt() optional The modify_ldt syscall exposes a large attack surface and is unnecessary for modern userspace. Make it optional. Signed-off-by: Andy Lutomirski Reviewed-by: Kees Cook Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt Cc: Thomas Gleixner Cc: security@kernel.org Cc: xen-devel Link: http://lkml.kernel.org/r/a605166a771c343fd64802dece77a903507333bd.1438291540.git.luto@kernel.org [ Made MATH_EMULATION dependent on MODIFY_LDT_SYSCALL. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 2 ++ arch/x86/include/asm/mmu_context.h | 28 +++++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 364d274..55234d5 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,7 +9,9 @@ * we put the segment information here. */ typedef struct { +#ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; +#endif #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 984abfe..379cd36 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm) static inline void load_mm_cr4(struct mm_struct *mm) {} #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * ldt_structs can be allocated, used, and freed, but they are never * modified while live. @@ -48,8 +49,23 @@ struct ldt_struct { int size; }; +/* + * Used for LDT copy/destruction. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context(struct mm_struct *mm); +#else /* CONFIG_MODIFY_LDT_SYSCALL */ +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + return 0; +} +static inline void destroy_context(struct mm_struct *mm) {} +#endif + static inline void load_mm_ldt(struct mm_struct *mm) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; /* lockless_dereference synchronizes with smp_store_release */ @@ -73,17 +89,13 @@ static inline void load_mm_ldt(struct mm_struct *mm) set_ldt(ldt->entries, ldt->size); else clear_LDT(); +#else + clear_LDT(); +#endif DEBUG_LOCKS_WARN_ON(preemptible()); } -/* - * Used for LDT copy/destruction. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); - - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP @@ -114,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Load per-mm CR4 state */ load_mm_cr4(next); +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * Load the LDT, if the LDT is different. * @@ -128,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); +#endif } #ifdef CONFIG_SMP else { -- cgit v1.1 From 9fda6a0681e070b496235b132bc70ceb80300211 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:16 -0400 Subject: x86/vm86: Move vm86 fields out of 'thread_struct' Allocate a separate structure for the vm86 fields. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-2-git-send-email-brgerst@gmail.com [ Build fixes. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 11 +++-------- arch/x86/include/asm/vm86.h | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index befc134..9615a4e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -6,8 +6,8 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct vm86; -#include #include #include #include @@ -400,13 +400,9 @@ struct thread_struct { unsigned long cr2; unsigned long trap_nr; unsigned long error_code; -#ifdef CONFIG_X86_32 +#ifdef CONFIG_VM86 /* Virtual 86 mode info */ - struct vm86plus_struct __user *vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags; - unsigned long v86mask; - unsigned long saved_sp0; + struct vm86 *vm86; #endif /* IO permissions: */ unsigned long *io_bitmap_ptr; @@ -718,7 +714,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ - .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ } diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 1d8de3f..20b43b7 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_VM86_H #define _ASM_X86_VM86_H - #include #include @@ -58,6 +57,14 @@ struct kernel_vm86_struct { */ }; +struct vm86 { + struct vm86plus_struct __user *vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags; + unsigned long v86mask; + unsigned long saved_sp0; +}; + #ifdef CONFIG_VM86 void handle_vm86_fault(struct kernel_vm86_regs *, long); @@ -67,6 +74,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *); struct task_struct; void release_vm86_irqs(struct task_struct *); +#define free_vm86(t) do { \ + struct thread_struct *__t = (t); \ + if (__t->vm86 != NULL) { \ + kfree(__t->vm86); \ + __t->vm86 = NULL; \ + } \ +} while (0) + #else #define handle_vm86_fault(a, b) @@ -77,6 +92,8 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) return 0; } +#define free_vm86(t) do { } while(0) + #endif /* CONFIG_VM86 */ #endif /* _ASM_X86_VM86_H */ -- cgit v1.1 From d4ce0f26c790af8e829d3fad0a6787f40f98e24f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:17 -0400 Subject: x86/vm86: Move fields from 'struct kernel_vm86_struct' to 'struct vm86' Move the non-regs fields to the off-stack data. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vm86.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 20b43b7..47c7648 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -37,13 +37,7 @@ struct kernel_vm86_struct { * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct' * in kernelspace, hence we need not reget the data from userspace. */ -#define VM86_TSS_ESP0 flags - unsigned long flags; - unsigned long screen_bitmap; - unsigned long cpu_type; - struct revectored_struct int_revectored; - struct revectored_struct int21_revectored; - struct vm86plus_info_struct vm86plus; +#define VM86_TSS_ESP0 regs32 struct pt_regs *regs32; /* here we save the pointer to the old regs */ /* * The below is not part of the structure, but the stack layout continues @@ -59,10 +53,16 @@ struct kernel_vm86_struct { struct vm86 { struct vm86plus_struct __user *vm86_info; - unsigned long screen_bitmap; unsigned long v86flags; unsigned long v86mask; unsigned long saved_sp0; + + unsigned long flags; + unsigned long screen_bitmap; + unsigned long cpu_type; + struct revectored_struct int_revectored; + struct revectored_struct int21_revectored; + struct vm86plus_info_struct vm86plus; }; #ifdef CONFIG_VM86 -- cgit v1.1 From 90c6085a248f8f964588617f51329688bcc9f2bc Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:18 -0400 Subject: x86/vm86: Eliminate 'struct kernel_vm86_struct' Now there is no vm86-specific data left on the kernel stack while in userspace, except for the 32-bit regs. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vm86.h | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 47c7648..226d6c1 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -27,32 +27,9 @@ struct kernel_vm86_regs { unsigned short gs, __gsh; }; -struct kernel_vm86_struct { - struct kernel_vm86_regs regs; -/* - * the below part remains on the kernel stack while we are in VM86 mode. - * 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we - * get forced back from VM86, the CPU and "SAVE_ALL" will restore the above - * 'struct kernel_vm86_regs' with the then actual values. - * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct' - * in kernelspace, hence we need not reget the data from userspace. - */ -#define VM86_TSS_ESP0 regs32 - struct pt_regs *regs32; /* here we save the pointer to the old regs */ -/* - * The below is not part of the structure, but the stack layout continues - * this way. In front of 'return-eip' may be some data, depending on - * compilation, so we don't rely on this and save the pointer to 'oldregs' - * in 'regs32' above. - * However, with GCC-2.7.2 and the current CFLAGS you see exactly this: - - long return-eip; from call to vm86() - struct pt_regs oldregs; user space registers as saved by syscall - */ -}; - struct vm86 { struct vm86plus_struct __user *vm86_info; + struct pt_regs *regs32; unsigned long v86flags; unsigned long v86mask; unsigned long saved_sp0; -- cgit v1.1 From 5ed92a8ab71f8865ba07811429c988c72299b315 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:19 -0400 Subject: x86/vm86: Use the normal pt_regs area for vm86 Change to use the normal pt_regs area to enter and exit vm86 mode. This is done by increasing the padding at the top of the stack to make room for the extra vm86 segment slots in the IRET frame. It then saves the 32-bit regs in the off-stack vm86 data, and copies in the vm86 regs. Exiting back to 32-bit mode does the reverse. This allows removing the hacks to jump directly into the exit asm code due to having to change the stack pointer. Returning normally from the vm86 syscall and the exception handlers allows things like ptrace and auditing to work properly. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 11 +++++++---- arch/x86/include/asm/vm86.h | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 225ee54..fdad5c2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -27,14 +27,17 @@ * Without this offset, that can result in a page fault. (We are * careful that, in this case, the value we read doesn't matter.) * - * In vm86 mode, the hardware frame is much longer still, but we neither - * access the extra members from NMI context, nor do we write such a - * frame at sp0 at all. + * In vm86 mode, the hardware frame is much longer still, so add 16 + * bytes to make room for the real-mode segments. * * x86_64 has a fixed-length stack frame. */ #ifdef CONFIG_X86_32 -# define TOP_OF_KERNEL_STACK_PADDING 8 +# ifdef CONFIG_VM86 +# define TOP_OF_KERNEL_STACK_PADDING 16 +# else +# define TOP_OF_KERNEL_STACK_PADDING 8 +# endif #else # define TOP_OF_KERNEL_STACK_PADDING 0 #endif diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 226d6c1..e45386e 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -29,7 +29,7 @@ struct kernel_vm86_regs { struct vm86 { struct vm86plus_struct __user *vm86_info; - struct pt_regs *regs32; + struct pt_regs regs32; unsigned long v86flags; unsigned long v86mask; unsigned long saved_sp0; @@ -46,7 +46,7 @@ struct vm86 { void handle_vm86_fault(struct kernel_vm86_regs *, long); int handle_vm86_trap(struct kernel_vm86_regs *, long, int); -struct pt_regs *save_v86_state(struct kernel_vm86_regs *); +void save_v86_state(struct kernel_vm86_regs *, int); struct task_struct; void release_vm86_irqs(struct task_struct *); @@ -69,6 +69,8 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) return 0; } +static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } + #define free_vm86(t) do { } while(0) #endif /* CONFIG_VM86 */ -- cgit v1.1 From af3e565a8542c4be699a0403b88fd6c691f5914f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 31 Jul 2015 10:59:20 +0200 Subject: x86/vm86: Move the vm86 IRQ definitions to vm86.h Move vm86 specific definitions from irq_vectors.h to vm86.h. Based on patch from Brian Gerst. Originally-from: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-6-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 10 ---------- arch/x86/include/asm/vm86.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4c2d2eb..6ca9fd6 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -117,16 +117,6 @@ #define FPU_IRQ 13 -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 - -#ifndef __ASSEMBLY__ -static inline int invalid_vm86_irq(int irq) -{ - return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; -} -#endif - /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index e45386e..b063196 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -49,7 +49,6 @@ int handle_vm86_trap(struct kernel_vm86_regs *, long, int); void save_v86_state(struct kernel_vm86_regs *, int); struct task_struct; -void release_vm86_irqs(struct task_struct *); #define free_vm86(t) do { \ struct thread_struct *__t = (t); \ @@ -59,6 +58,20 @@ void release_vm86_irqs(struct task_struct *); } \ } while (0) +/* + * Support for VM86 programs to request interrupts for + * real mode hardware drivers: + */ +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 + +static inline int invalid_vm86_irq(int irq) +{ + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; +} + +void release_vm86_irqs(struct task_struct *); + #else #define handle_vm86_fault(a, b) -- cgit v1.1 From ba3e127ec105e790eeec4034d9769e018e4a1b54 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:21 -0400 Subject: x86/vm86: Clean up vm86.h includes vm86.h was being implicitly included in alot of places via processor.h, which in turn got it from math_emu.h. Break that chain and explicitly include vm86.h in all files that need it. Also remove unused vm86 field from math_emu_info. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-7-git-send-email-brgerst@gmail.com [ Fixed build failure. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 6 +----- arch/x86/include/asm/syscalls.h | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 031f626..0d9b14f 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -2,7 +2,6 @@ #define _ASM_X86_MATH_EMU_H #include -#include /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved @@ -10,9 +9,6 @@ */ struct math_emu_info { long ___orig_eip; - union { - struct pt_regs *regs; - struct kernel_vm86_regs *vm86; - }; + struct pt_regs *regs; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 592a6a6..91dfcaf 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); asmlinkage unsigned long sys_sigreturn(void); /* kernel/vm86_32.c */ +struct vm86_struct; asmlinkage long sys_vm86old(struct vm86_struct __user *); asmlinkage long sys_vm86(unsigned long, unsigned long); -- cgit v1.1 From 1342635638cba9b7c8eac776da5e54390d14d313 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:22 -0400 Subject: x86/vm86: Rename vm86->vm86_info to user_vm86 Make it clearer that this is the pointer to the userspace vm86 state area. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-8-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vm86.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index b063196..c93ae73 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -28,7 +28,7 @@ struct kernel_vm86_regs { }; struct vm86 { - struct vm86plus_struct __user *vm86_info; + struct vm86plus_struct __user *user_vm86; struct pt_regs regs32; unsigned long v86flags; unsigned long v86mask; -- cgit v1.1 From decd275e62d5eef4b947fab89652fa6afdadf2f2 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 29 Jul 2015 01:41:23 -0400 Subject: x86/vm86: Rename vm86->v86flags and v86mask Rename v86flags to veflags, and v86mask to veflags_mask. Signed-off-by: Brian Gerst Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438148483-11932-9-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vm86.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index c93ae73..1e491f3 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -30,8 +30,8 @@ struct kernel_vm86_regs { struct vm86 { struct vm86plus_struct __user *user_vm86; struct pt_regs regs32; - unsigned long v86flags; - unsigned long v86mask; + unsigned long veflags; + unsigned long veflags_mask; unsigned long saved_sp0; unsigned long flags; -- cgit v1.1 From fe32d3cd5e8eb0f82e459763374aa80797023403 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Jul 2015 12:52:04 +0300 Subject: sched/preempt: Fix cond_resched_lock() and cond_resched_softirq() These functions check should_resched() before unlocking spinlock/bh-enable: preempt_count always non-zero => should_resched() always returns false. cond_resched_lock() worked iff spin_needbreak is set. This patch adds argument "preempt_offset" to should_resched(). preempt_count offset constants for that: PREEMPT_DISABLE_OFFSET - offset after preempt_disable() PREEMPT_LOCK_OFFSET - offset after spin_lock() SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable() SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh() Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Boris Ostrovsky Cc: David Vrabel Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: bdb438065890 ("sched: Extract the basic add/sub preempt_count modifiers") Link: http://lkml.kernel.org/r/20150715095204.12246.98268.stgit@buzz Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index dca7171..b12f810 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!raw_cpu_read_4(__preempt_count)); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPT -- cgit v1.1 From b1bf72d6691cc33fc7763fc8ec77df42ca1a8702 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Jul 2015 16:15:31 +0300 Subject: perf/x86/intel/pt: Add new timing packet enables Intel PT chapter in the new Intel Architecture SDM adds several packets corresponding enable bits and registers that control packet generation. Also, additional bits in the Intel PT CPUID leaf were added to enumerate presence and parameters of these new packets and features. The packets and enables are: * CYC: cycle accurate mode, provides the number of cycles elapsed since previous CYC packet; its presence and available threshold values are enumerated via CPUID; * MTC: mini time counter packets, used for tracking TSC time between full TSC packets; its presence and available resolution options are enumerated via CPUID; * PSB packet period is now configurable, available period values are enumerated via CPUID. This patch adds corresponding bit and register definitions, pmu driver capabilities based on CPUID enumeration, new attribute format bits for the new featurens and extends event configuration validation function to take these into account. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1438262131-12725-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9ebc3d0..c665d34 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -80,13 +80,21 @@ #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) #define RTIT_CTL_OS BIT(2) #define RTIT_CTL_USR BIT(3) #define RTIT_CTL_CR3EN BIT(7) #define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) #define RTIT_CTL_TSC_EN BIT(10) #define RTIT_CTL_DISRETC BIT(11) #define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) #define MSR_IA32_RTIT_STATUS 0x00000571 #define RTIT_STATUS_CONTEXTEN BIT(1) #define RTIT_STATUS_TRIGGEREN BIT(2) -- cgit v1.1 From a94cab2376cb35f236be14e2833cef63a8762a31 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 10 May 2015 12:22:39 -0700 Subject: perf/x86: Add a native_perf_sched_clock_from_tsc() PEBSv3 has a raw TSC time stamp in its memory buffer that later needs to to be converted to perf_clock. Add a native_sched_clock_from_tsc() that works the same as native_sched_clock(), but starts with an already given TSC value. Paravirt is ignored, it will just get the native clock. But there isn't a para virtualized PEBS anyway. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Link: http://lkml.kernel.org/r/1431285767-27027-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tsc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 94605c0..aad56eb 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -51,6 +51,7 @@ extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern int check_tsc_disabled(void); extern unsigned long native_calibrate_tsc(void); +extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern int tsc_clocksource_reliable; -- cgit v1.1 From b83ff1c8617aac03a1cf807aafa848fe0f0908f2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 10 May 2015 12:22:41 -0700 Subject: x86: Add new MSRs and MSR bits used for Intel Skylake PMU support Add new MSRs (LBR_INFO) and some new MSR bits used by the Intel Skylake PMU driver. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Link: http://lkml.kernel.org/r/1431285767-27027-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/include/asm/perf_event.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c665d34..fcd17c1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -73,6 +73,12 @@ #define MSR_LBR_CORE_FROM 0x00000040 #define MSR_LBR_CORE_TO 0x00000060 +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index dc0f6ed..7bcb861 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -159,6 +159,13 @@ struct x86_pmu_capability { */ #define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) +#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) +#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62) +#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) +#define GLOBAL_STATUS_ASIF BIT_ULL(60) +#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) + /* * IBS cpuid feature detection */ -- cgit v1.1 From 2517281d63a2b09d94aedfb522943617048f337e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 1 Aug 2015 16:08:07 -0700 Subject: Drivers: hv: vmbus: add special kexec handler When general-purpose kexec (not kdump) is being performed in Hyper-V guest the newly booted kernel fails with an MCE error coming from the host. It is the same error which was fixed in the "Drivers: hv: vmbus: Implement the protocol for tearing down vmbus state" commit - monitor pages remain special and when they're being written to (as the new kernel doesn't know these pages are special) bad things happen. We need to perform some minimalistic cleanup before booting a new kernel on kexec. To do so we need to register a special machine_ops.shutdown handler to be executed before the native_machine_shutdown(). Registering a shutdown notification handler via the register_reboot_notifier() call is not sufficient as it happens to early for our purposes. machine_ops is not being exported to modules (and I don't think we want to export it) so let's do this in mshyperv.c The minimalistic cleanup consists of cleaning up clockevents, synic MSRs, guest os id MSR, and hypercall MSR. Kdump doesn't require all this stuff as it lives in a separate memory space. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index c163215..d3db910 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -20,4 +20,6 @@ void hyperv_vector_handler(struct pt_regs *regs); void hv_setup_vmbus_irq(void (*handler)(void)); void hv_remove_vmbus_irq(void); +void hv_setup_kexec_handler(void (*handler)(void)); +void hv_remove_kexec_handler(void); #endif -- cgit v1.1 From b4370df2b1f5158de028e167974263c5757b34a6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 1 Aug 2015 16:08:09 -0700 Subject: Drivers: hv: vmbus: add special crash handler Full kernel hang is observed when kdump kernel starts after a crash. This hang happens in vmbus_negotiate_version() function on wait_for_completion() as Hyper-V host (Win2012R2 in my testing) never responds to CHANNELMSG_INITIATE_CONTACT as it thinks the connection is already established. We need to perform some mandatory minimalistic cleanup before we start new kernel. Reported-by: K. Y. Srinivasan Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index d3db910..d02f9c9 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -22,4 +22,6 @@ void hv_remove_vmbus_irq(void); void hv_setup_kexec_handler(void (*handler)(void)); void hv_remove_kexec_handler(void); +void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); +void hv_remove_crash_handler(void); #endif -- cgit v1.1 From cc2dd4027a43bb36c846f195a764edabc0828602 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 1 Aug 2015 16:08:20 -0700 Subject: mshyperv: fix recognition of Hyper-V guest crash MSR's Hypervisor Top Level Functional Specification v3.1/4.0 notes that cpuid (0x40000003) EDX's 10th bit should be used to check that Hyper-V guest crash MSR's functionality available. This patch should fix this recognition. Currently the code checks EAX register instead of EDX. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index d02f9c9..aaf59b7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -7,6 +7,7 @@ struct ms_hyperv_info { u32 features; + u32 misc_features; u32 hints; }; -- cgit v1.1 From d14edb1648221e59fc9fd47127fcc57bf26d759f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 4 Aug 2015 16:15:15 +0200 Subject: x86/hweight: Force inlining of __arch_hweight{32,64}() With this config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os gcc-4.7.2 generates many copies of these tiny functions: __arch_hweight32 (35 copies): 55 push %rbp e8 66 9b 4a 00 callq __sw_hweight32 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq __arch_hweight64 (8 copies): 55 push %rbp e8 5e c2 8a 00 callq __sw_hweight64 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 This patch fixes this via s/inline/__always_inline/ To avoid touching 32-bit case where such change was not tested to be a win, reformat __arch_hweight64() to have completely disjoint 64-bit and 32-bit implementations. IOW: made #ifdef / 32 bits and 64 bits instead of having #ifdef / #else / #endif inside a single function body. Only 64-bit __arch_hweight64() is __always_inline'd. text data bss dec filename 86971120 17195912 36659200 140826232 vmlinux.before 86970954 17195912 36659200 140826066 vmlinux Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Graf Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1438697716-28121-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/arch_hweight.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 9686c3d..259a7c1 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -21,7 +21,7 @@ * ARCH_HWEIGHT_CFLAGS in for the respective * compiler switches. */ -static inline unsigned int __arch_hweight32(unsigned int w) +static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res = 0; @@ -42,20 +42,23 @@ static inline unsigned int __arch_hweight8(unsigned int w) return __arch_hweight32(w & 0xff); } +#ifdef CONFIG_X86_32 static inline unsigned long __arch_hweight64(__u64 w) { - unsigned long res = 0; - -#ifdef CONFIG_X86_32 return __arch_hweight32((u32)w) + __arch_hweight32((u32)(w >> 32)); +} #else +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res = 0; + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); -#endif /* CONFIG_X86_32 */ return res; } +#endif /* CONFIG_X86_32 */ #endif -- cgit v1.1 From 88cd622f9299c4c9e61e978bb9ef9d7599769ed0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 31 Jul 2015 14:41:10 -0700 Subject: x86/entry: Remove do_notify_resume(), syscall_trace_leave(), and their TIF masks They are no longer used. Good riddance! Deleting the TIF_ macros is really nice. It was never clear why there were so many variants. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Eric Paris Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/22c61682f446628573dde0f1d573ab821677e06da.1438378274.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 1 - arch/x86/include/asm/signal.h | 1 - arch/x86/include/asm/thread_info.h | 16 ---------------- 3 files changed, 18 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5fabf13..6271281 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch, unsigned long phase1_result); extern long syscall_trace_enter(struct pt_regs *); -extern void syscall_trace_leave(struct pt_regs *); static inline unsigned long regs_return_value(struct pt_regs *regs) { diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index b42408b..c481be7 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,7 +31,6 @@ typedef sigset_t compat_sigset_t; #include #ifndef __ASSEMBLY__ extern void do_signal(struct pt_regs *regs); -extern void do_notify_resume(struct pt_regs *, void *, __u32); #define __ARCH_HAS_SA_RESTORER diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fdad5c2..8afdc3e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -143,27 +143,11 @@ struct thread_info { _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) -/* work to do in syscall_trace_leave() */ -#define _TIF_WORK_SYSCALL_EXIT \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ - _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) - /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) -/* Only used for 64 bit */ -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) - /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) -- cgit v1.1 From a0a64f50aac731d42125dd8581b9a31e4fdb0f75 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 5 Aug 2015 12:04:21 +0800 Subject: KVM: MMU: introduce rsvd_bits_validate These two fields, rsvd_bits_mask and bad_mt_xwr, in "struct kvm_mmu" are used to check if reserved bits set on guest ptes, move them to a data struct so that the approach can be applied to check host shadow page table entries as well Signed-off-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2f9e504..847b37c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -252,6 +252,11 @@ struct kvm_pio_request { int size; }; +struct rsvd_bits_validate { + u64 rsvd_bits_mask[2][4]; + u64 bad_mt_xwr; +}; + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu @@ -289,8 +294,7 @@ struct kvm_mmu { u64 *pae_root; u64 *lm_root; - u64 rsvd_bits_mask[2][4]; - u64 bad_mt_xwr; + struct rsvd_bits_validate guest_rsvd_check; /* * Bitmap: bit set = last pte in walk -- cgit v1.1 From c258b62b264fdc469b6d3610a907708068145e3b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 5 Aug 2015 12:04:24 +0800 Subject: KVM: MMU: introduce the framework to check zero bits on sptes We have abstracted the data struct and functions which are used to check reserved bit on guest page tables, now we extend the logic to check zero bits on shadow page tables The zero bits on sptes include not only reserved bits on hardware but also the bits that SPTEs willnever use. For example, shadow pages will never use GB pages unless the guest uses them too. Signed-off-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 847b37c..c12e845 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -294,6 +294,14 @@ struct kvm_mmu { u64 *pae_root; u64 *lm_root; + + /* + * check zero bits on shadow page table entries, these + * bits include not only hardware reserved bits but also + * the bits spte never used. + */ + struct rsvd_bits_validate shadow_zero_check; + struct rsvd_bits_validate guest_rsvd_check; /* -- cgit v1.1 From ca9357bd26c2f8e7b909321eedd651f52cc30d04 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 5 Aug 2015 00:52:42 -0700 Subject: Drivers: hv: vmbus: Implement a clocksource based on the TSC page The current Hyper-V clock source is based on the per-partition reference counter and this counter is being accessed via s synthetic MSR - HV_X64_MSR_TIME_REF_COUNT. Hyper-V has a more efficient way of computing the per-partition reference counter value that does not involve reading a synthetic MSR. We implement a time source based on this mechanism. Tested-by: Vivek Yadav Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f36d56b..f0412c5 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -27,6 +27,8 @@ #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* Partition reference TSC MSR is available */ +#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 -- cgit v1.1 From 7276c6a2cb5f7ae46fd0c9539af02dbcb7c4f3f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 2 Aug 2015 20:38:25 +0000 Subject: x86/irq: Rename VECTOR_UNDEFINED to VECTOR_UNUSED VECTOR_UNDEFINED is a misnomer. The vector is defined, but unused. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Peter Zijlstra Cc: Rusty Russell Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/20150802203609.477282494@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 6615032..62bb8d2 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -182,7 +182,7 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif -#define VECTOR_UNDEFINED (-1) +#define VECTOR_UNUSED (-1) #define VECTOR_RETRIGGERED (-2) typedef int vector_irq_t[NR_VECTORS]; -- cgit v1.1 From a782a7e46bb50822fabfeb7271605762a59c86df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 2 Aug 2015 20:38:27 +0000 Subject: x86/irq: Store irq descriptor in vector array We can spare the irq_desc lookup in the interrupt entry code if we store the descriptor pointer in the vector array instead the interrupt number. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Peter Zijlstra Cc: Rusty Russell Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/20150802203609.717724106@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 6 +++--- arch/x86/include/asm/irq.h | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 62bb8d2..1e3408e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -182,10 +182,10 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif -#define VECTOR_UNUSED (-1) -#define VECTOR_RETRIGGERED (-2) +#define VECTOR_UNUSED NULL +#define VECTOR_RETRIGGERED ((void *)~0UL) -typedef int vector_irq_t[NR_VECTORS]; +typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 8008d06..881b476 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,7 +36,9 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -extern bool handle_irq(unsigned irq, struct pt_regs *regs); + +struct irq_desc; +extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible unsigned int do_IRQ(struct pt_regs *regs); -- cgit v1.1 From 20d51a426fe9a0d0a63cc3a7488f621c8bac37e1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:33 +0200 Subject: x86/mce: Reuse one of the u16 padding fields in 'struct mce' ... to save the error severity of the MCE and whether the reported address of the error is usable. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index a0eab85..76880ed 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -15,7 +15,8 @@ struct mce { __u64 time; /* wall time_t when error was detected */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 inject_flags; /* software inject flags */ - __u16 pad; + __u8 severity; + __u8 usable_addr; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ -- cgit v1.1 From fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 12 Aug 2015 18:29:36 +0200 Subject: x86/mce: Remove the MCE ring for Action Optional errors Use unified genpool to save Action Optional error events and put Action Optional error handling in the same notification chain as MCE error decoding. Signed-off-by: Chen, Gong [ Fold in subsequent patch from Boris for early boot logging. ] Signed-off-by: Tony Luck [ Correct a lot. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3..dfaa4de 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include -- cgit v1.1 From eef4dfa0cb83899c782935ac5345532f47073cea Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Aug 2015 18:29:38 +0200 Subject: x86/mce: Kill drain_mcelog_buffer() This used to flush out MCEs logged during early boot and which were in the MCA registers from a previous system run. No need for that now, since we've moved to a genpool. Suggested-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dfaa4de..982dfc3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); +extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include -- cgit v1.1 From 4d1d5cdc345d15e09518a2410f7fcd069465ffac Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 12 Aug 2015 18:29:39 +0200 Subject: x86/mce: Remove unused function declarations Remove unused function declarations. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1439396985-12812-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3..38d3a1a8 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -185,16 +185,12 @@ void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); -void lmce_clear(void); -void lmce_enable(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} -static inline void lmce_clear(void) {} -static inline void lmce_enable(void) {} #endif #ifdef CONFIG_X86_MCE_AMD -- cgit v1.1 From 8838eb6c0bf3b6a6494a163947ab3d1700ab45d2 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 12 Aug 2015 18:29:40 +0200 Subject: x86/mce: Clear Local MCE opt-in before kexec kexec could boot a kernel that could be legacy with no knowledge of LMCE. Hence we should make sure we clear LMCE optin before kexec reboot. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1439396985-12812-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 38d3a1a8..2dbc0bf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -151,10 +151,12 @@ extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); +void mcheck_cpu_clear(struct cpuinfo_x86 *c); void mcheck_vendor_init_severity(void); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} +static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline void mcheck_vendor_init_severity(void) {} #endif @@ -181,12 +183,14 @@ DECLARE_PER_CPU(struct device *, mce_device); #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); +void mce_intel_feature_clear(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } +static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} -- cgit v1.1 From 4d283ec908e617fa28bcb06bce310206f0655d67 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 13 Aug 2015 13:18:48 -0700 Subject: x86/kvm: Rename VMX's segment access rights defines VMX encodes access rights differently from LAR, and the latter is most likely what x86 people think of when they think of "access rights". Rename them to avoid confusion. Cc: kvm@vger.kernel.org Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9299ae5..448b7ca 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -368,29 +368,29 @@ enum vmcs_field { #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) #define TYPE_PHYSICAL_APIC_INST (15 << 12) -/* segment AR */ -#define SEGMENT_AR_L_MASK (1 << 13) - -#define AR_TYPE_ACCESSES_MASK 1 -#define AR_TYPE_READABLE_MASK (1 << 1) -#define AR_TYPE_WRITEABLE_MASK (1 << 2) -#define AR_TYPE_CODE_MASK (1 << 3) -#define AR_TYPE_MASK 0x0f -#define AR_TYPE_BUSY_64_TSS 11 -#define AR_TYPE_BUSY_32_TSS 11 -#define AR_TYPE_BUSY_16_TSS 3 -#define AR_TYPE_LDT 2 - -#define AR_UNUSABLE_MASK (1 << 16) -#define AR_S_MASK (1 << 4) -#define AR_P_MASK (1 << 7) -#define AR_L_MASK (1 << 13) -#define AR_DB_MASK (1 << 14) -#define AR_G_MASK (1 << 15) -#define AR_DPL_SHIFT 5 -#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) - -#define AR_RESERVD_MASK 0xfffe0f00 +/* segment AR in VMCS -- these are different from what LAR reports */ +#define VMX_SEGMENT_AR_L_MASK (1 << 13) + +#define VMX_AR_TYPE_ACCESSES_MASK 1 +#define VMX_AR_TYPE_READABLE_MASK (1 << 1) +#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2) +#define VMX_AR_TYPE_CODE_MASK (1 << 3) +#define VMX_AR_TYPE_MASK 0x0f +#define VMX_AR_TYPE_BUSY_64_TSS 11 +#define VMX_AR_TYPE_BUSY_32_TSS 11 +#define VMX_AR_TYPE_BUSY_16_TSS 3 +#define VMX_AR_TYPE_LDT 2 + +#define VMX_AR_UNUSABLE_MASK (1 << 16) +#define VMX_AR_S_MASK (1 << 4) +#define VMX_AR_P_MASK (1 << 7) +#define VMX_AR_L_MASK (1 << 13) +#define VMX_AR_DB_MASK (1 << 14) +#define VMX_AR_G_MASK (1 << 15) +#define VMX_AR_DPL_SHIFT 5 +#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3) + +#define VMX_AR_RESERVD_MASK 0xfffe0f00 #define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) -- cgit v1.1 From 656bba306827a44ed73b3f93f75bb3147de17fae Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 16 Aug 2015 11:45:48 -0400 Subject: x86/smpboot: Remove APIC.wait_for_init_deassert and atomic init_deasserted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the per-APIC flag ".wait_for_init_deassert", and the global atomic_t "init_deasserted" are dead code -- remove them. For all APIC types, "wait_for_master()" prevents an AP from proceeding until the BSP has set cpu_callout_mask, making "init_deasserted" {unnecessary}: BSP: ... BSP: {set init_deasserted} AP: wait_for_master() set cpu_initialized_mask wait for cpu_callout_mask BSP: test cpu_initialized_mask BSP: set cpu_callout_mask AP: test cpu_callout_mask AP: {wait for init_deasserted} ... AP: Deleting the {dead code} above is necessary to enable some parallelism in a future patch. Signed-off-by: Len Brown Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Dave Hansen Cc: Igor Mammedov Cc: Jan H. Schönherr Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhu Guihua Link: http://lkml.kernel.org/r/de4b3a9bab894735e285870b5296da25ee6a8a5a.1439739165.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c839363..ebf6d5e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -313,7 +313,6 @@ struct apic { /* wakeup_secondary_cpu */ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - bool wait_for_init_deassert; void (*inquire_remote_apic)(int apicid); /* apic ops */ @@ -378,7 +377,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; * APIC functionality to boot other CPUs - only used on SMP: */ #ifdef CONFIG_SMP -extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); #endif -- cgit v1.1 From 99770737ca7e3ebc14e66460a69b7032de9421e1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Aug 2015 08:33:53 +0200 Subject: x86/asm/tsc: Add rdtscll() merge helper Some in-flight code makes use of the old rdtscll() (now removed), provide a wrapper for a kernel cycle to smooth the transition to rdtsc(). ( We use the safest variant, rdtsc_ordered(), which has barriers - this adds another incentive to remove the wrapper in the future. ) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 131eec2..54e9f08 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -152,6 +152,9 @@ static __always_inline unsigned long long rdtsc_ordered(void) return rdtsc(); } +/* Deprecated, keep it for a cycle for easier merging: */ +#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) + static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); -- cgit v1.1 From f96756746c7909de37db3d03ac5fd5cfb2757f38 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 10 Aug 2015 12:19:53 +0200 Subject: x86/asm: Add MONITORX/MWAITX instruction support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AMD Carrizo processors (Family 15h, Models 60h-6fh) added a new feature called MWAITX (MWAIT with extensions) as an extension to MONITOR/MWAIT. This new instruction controls a configurable timer which causes the core to exit wait state on timer expiration, in addition to "normal" MWAIT condition of reading from a monitored VA. Compared to MONITOR/MWAIT, there are minor differences in opcode and input parameters: MWAITX ECX[1]: enable timer if set MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks == TSC. The software P0 frequency is the same as the TSC frequency. MWAIT MWAITX opcode 0f 01 c9 | 0f 01 fb ECX[0] value of RFLAGS.IF seen by instruction ECX[1] unused/#GP if set | enable timer if set ECX[31:2] unused/#GP if set EAX unused (reserve for hint) EBX[31:0] unused | max wait time (SW P0 == TSC) MONITOR MONITORX opcode 0f 01 c8 | 0f 01 fa EAX (logical) address to monitor ECX #GP if not zero Max timeout = EBX/(TSC frequency) Signed-off-by: Huang Rui Signed-off-by: Borislav Petkov Cc: Aaron Lu Cc: Alexander Shishkin Cc: Andreas Herrmann Cc: Andy Lutomirski Cc: Dave Hansen Cc: Dirk Brandewie Cc: Fengguang Wu Cc: Frédéric Weisbecker Cc: H. Peter Anvin Cc: John Stultz Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Ross Zwisler Cc: Thomas Gleixner Cc: Tony Li Link: http://lkml.kernel.org/r/1439201994-28067-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/mwait.h | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606f..a39e570 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -176,6 +176,7 @@ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* * Auxiliary flags: Linux defined - For features scattered in various diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 653dfa7..c70689b 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -14,6 +14,9 @@ #define CPUID5_ECX_INTERRUPT_BREAK 0x2 #define MWAIT_ECX_INTERRUPT_BREAK 0x1 +#define MWAITX_ECX_TIMER_ENABLE BIT(1) +#define MWAITX_MAX_LOOPS ((u32)-1) +#define MWAITX_DISABLE_CSTATES 0xf static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) @@ -23,6 +26,14 @@ static inline void __monitor(const void *eax, unsigned long ecx, :: "a" (eax), "c" (ecx), "d"(edx)); } +static inline void __monitorx(const void *eax, unsigned long ecx, + unsigned long edx) +{ + /* "monitorx %eax, %ecx, %edx;" */ + asm volatile(".byte 0x0f, 0x01, 0xfa;" + :: "a" (eax), "c" (ecx), "d"(edx)); +} + static inline void __mwait(unsigned long eax, unsigned long ecx) { /* "mwait %eax, %ecx;" */ @@ -30,6 +41,40 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) :: "a" (eax), "c" (ecx)); } +/* + * MWAITX allows for a timer expiration to get the core out a wait state in + * addition to the default MWAIT exit condition of a store appearing at a + * monitored virtual address. + * + * Registers: + * + * MWAITX ECX[1]: enable timer if set + * MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0 + * frequency is the same as the TSC frequency. + * + * Below is a comparison between MWAIT and MWAITX on AMD processors: + * + * MWAIT MWAITX + * opcode 0f 01 c9 | 0f 01 fb + * ECX[0] value of RFLAGS.IF seen by instruction + * ECX[1] unused/#GP if set | enable timer if set + * ECX[31:2] unused/#GP if set + * EAX unused (reserve for hint) + * EBX[31:0] unused | max wait time (P0 clocks) + * + * MONITOR MONITORX + * opcode 0f 01 c8 | 0f 01 fa + * EAX (logical) address to monitor + * ECX #GP if not zero + */ +static inline void __mwaitx(unsigned long eax, unsigned long ebx, + unsigned long ecx) +{ + /* "mwaitx %eax, %ebx, %ecx;" */ + asm volatile(".byte 0x0f, 0x01, 0xfb;" + :: "a" (eax), "b" (ebx), "c" (ecx)); +} + static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { trace_hardirqs_on(); -- cgit v1.1 From b466bdb614823aaaa7188e85516177d2850f4782 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 10 Aug 2015 12:19:54 +0200 Subject: x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MWAITX can enable a timer and a corresponding timer value specified in SW P0 clocks. The SW P0 frequency is the same as TSC. The timer provides an upper bound on how long the instruction waits before exiting. This way, a delay function in the kernel can leverage that MWAITX timer of MWAITX. When a CPU core executes MWAITX, it will be quiesced in a waiting phase, diminishing its power consumption. This way, we can save power in comparison to our default TSC-based delays. A simple test shows that: $ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 10000s $ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc Results: * TSC-based default delay: 485115 uWatts average power * MWAITX-based delay: 252738 uWatts average power Thus, that's about 240 milliWatts less power consumption. The test method relies on the support of AMD CPU accumulated power algorithm in fam15h_power for which patches are forthcoming. Suggested-by: Andy Lutomirski Suggested-by: Borislav Petkov Suggested-by: Peter Zijlstra Signed-off-by: Huang Rui [ Fix delay truncation. ] Signed-off-by: Borislav Petkov Cc: Aaron Lu Cc: Andreas Herrmann Cc: Aravind Gopalakrishnan Cc: Fengguang Wu Cc: Frédéric Weisbecker Cc: H. Peter Anvin Cc: Hector Marco-Gisbert Cc: Jacob Shin Cc: Jiri Olsa Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Tony Li Link: http://lkml.kernel.org/r/1438744732-1459-3-git-send-email-ray.huang@amd.com Link: http://lkml.kernel.org/r/1439201994-28067-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/delay.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 9b3b4f2..36a760b 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -4,5 +4,6 @@ #include void use_tsc_delay(void); +void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ -- cgit v1.1 From 920e277e17f12870188f4564887a95ae9ac03e31 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 13 Aug 2015 08:37:23 +0300 Subject: x86/kasan: Define KASAN_SHADOW_OFFSET per architecture Current definition of KASAN_SHADOW_OFFSET in include/linux/kasan.h will not work for upcomming arm64, so move it to the arch header. Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Alexey Klimov Cc: Andrew Morton Cc: Aneesh Kumar K.V Cc: Arnd Bergmann Cc: Catalin Marinas Cc: David Keitel Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Linus Walleij Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Will Deacon Cc: Yury Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1439444244-26057-2-git-send-email-ryabinin.a.a@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kasan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 74a2a8d..1410b56 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -1,6 +1,9 @@ #ifndef _ASM_X86_KASAN_H #define _ASM_X86_KASAN_H +#include +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + /* * Compiler uses shadow offset assuming that addresses start * from 0. Kernel addresses don't start from 0, so shadow -- cgit v1.1 From 47edb65178cb7056c2eea0b6c41a7d8c84547192 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 23 Jul 2015 12:14:40 -0700 Subject: x86/asm/msr: Make wrmsrl() a function As of cf991de2f614 ("x86/asm/msr: Make wrmsrl_safe() a function"), wrmsrl_safe is a function, but wrmsrl is still a macro. The wrmsrl macro performs invalid shifts if the value argument is 32 bits. This makes it unnecessarily awkward to write code that puts an unsigned long into an MSR. To make this work, syscall_init needs tweaking to stop passing a function pointer to wrmsrl. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Willy Tarreau Link: http://lkml.kernel.org/r/690f0c629a1085d054e2d1ef3da073cfb3f7db92.1437678821.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 6 ++++-- arch/x86/include/asm/paravirt.h | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 54e9f08..77d8b28 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -188,8 +188,10 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high) #define rdmsrl(msr, val) \ ((val) = native_read_msr((msr))) -#define wrmsrl(msr, val) \ - native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32)) +static inline void wrmsrl(unsigned msr, u64 val) +{ + native_write_msr(msr, (u32)val, (u32)(val >> 32)); +} /* wrmsr with exception handling */ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c2be037..10d0596 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -153,7 +153,11 @@ do { \ val = paravirt_read_msr(msr, &_err); \ } while (0) -#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) +static inline void wrmsrl(unsigned msr, u64 val) +{ + wrmsr(msr, (u32)val, (u32)(val>>32)); +} + #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) /* rdmsr with exception handling */ -- cgit v1.1 From 7e01ebffffedec22cea86ebe94802f909e4579ca Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 27 Aug 2015 18:04:04 +0800 Subject: x86/asm: Drop repeated macro of X86_EFLAGS_AC definition We just need one macro of X86_EFLAGS_AC_BIT and X86_EFLAGS_AC. Signed-off-by: Huang Rui Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Fengguang Wu Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Li Cc: Tony Luck Link: http://lkml.kernel.org/r/1440669844-21535-1-git-send-email-ray.huang@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/processor-flags.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 180a0c3..79887ab 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -37,8 +37,6 @@ #define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT) #define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ #define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) -#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ -#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) #define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */ #define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT) #define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */ -- cgit v1.1