From 62623d5f918fb1c8ed86b03b9a86cc81f1cb1878 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Oct 2016 13:32:55 +1100 Subject: KVM: PPC: Book3S HV: Fix build error when SMP=n Commit 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") broke the SMP=n build: arch/powerpc/kvm/book3s_hv_rm_xics.c:758:2: error: implicit declaration of function 'get_hard_smp_processor_id' That is because we lost the implicit include of asm/smp.h, so include it explicitly to get the definition for get_hard_smp_processor_id(). Fixes: 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rm_xics.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 82ff5de..a0ea63a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "book3s_xics.h" -- cgit v1.1 From 80f23935cadb1c654e81951f5a8b7ceae0acc1b4 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 6 Oct 2016 13:42:19 +0000 Subject: powerpc: Convert cmp to cmpd in idle enter sequence PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this instance the code comes directly from ISA v2.07, including the cmp, but cmpd is correct. Backport to stable so that new toolchains can build old kernels. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0 Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 01b8a13..3919332 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ +1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ b . -- cgit v1.1 From 56c46222af0d09149fadec2a3ce9d4889de01cc6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:03:05 +1100 Subject: powerpc/64: Re-fix race condition between going idle and entering guest Commit 8117ac6a6c2f ("powerpc/powernv: Switch off MMU before entering nap/sleep/rvwinkle mode", 2014-12-10) fixed a race condition where one thread entering a KVM guest could switch the MMU context to the guest while another thread was still in host kernel context with the MMU on. That commit moved the point where a thread entering a power-saving mode set its kvm_hstate.hwthread_state field in its PACA to KVM_HWTHREAD_IN_IDLE from a point where the MMU was on to after the MMU had been switched off. That commit also added a comment explaining that we have to switch to real mode before setting hwthread_state to avoid this race. Nevertheless, commit 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic", 2016-07-08) subsequently moved the setting of hwthread_state back to a point where the MMU is on, thus reintroducing the race, despite the comment saying that this should not be done being included in full in the context lines of the patch that did it. This fixes the race again and adds a bigger and shoutier comment explaining the potential race condition. Fixes: 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Paul Mackerras Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index bd739fe..0d8712a 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -163,12 +163,6 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, @@ -185,6 +179,26 @@ _GLOBAL(pnv_powersave_common) .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ + /* reset interrupt vector in exceptions-64s.S. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -250,6 +264,12 @@ enter_winkle: * r3 - requested stop state */ power_enter_stop: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /* DO THIS IN REAL MODE! See comment above. */ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif /* * Check if the requested state is a deep idle state. */ -- cgit v1.1 From 09b7e37b18eecc1e347f4b1a3bc863f32801f634 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:04:17 +1100 Subject: powerpc/64: Fix race condition in setting lock bit in idle/wakeup code This fixes a race condition where one thread that is entering or leaving a power-saving state can inadvertently ignore the lock bit that was set by another thread, and potentially also clear it. The core_idle_lock_held function is called when the lock bit is seen to be set. It polls the lock bit until it is clear, then does a lwarx to load the word containing the lock bit and thread idle bits so it can be updated. However, it is possible that the value loaded with the lwarx has the lock bit set, even though an immediately preceding lwz loaded a value with the lock bit clear. If this happens then we go ahead and update the word despite the lock bit being set, and when called from pnv_enter_arch207_idle_mode, we will subsequently clear the lock bit. No identifiable misbehaviour has been attributed to this race. This fixes it by checking the lock bit in the value loaded by the lwarx. If it is set then we just go back and keep on polling. Fixes: b32aadc1a8ed ("powerpc/powernv: Fix race in updating core_idle_state") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 0d8712a..72dac0b 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state * r15 - used to load contents of core_idle_state + * r9 - used as a temporary variable */ core_idle_lock_held: @@ -99,6 +100,8 @@ core_idle_lock_held: bne 3b HMT_MEDIUM lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_held blr /* -- cgit v1.1 From 39715bf972ed4fee18fe5409609a971fb16b1771 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Wed, 5 Oct 2016 07:57:26 +0200 Subject: powerpc/process: Fix CONFIG_ALIVEC typo in restore_tm_state() It should be ALTIVEC, not ALIVEC. Cyril explains: If a thread performs a transaction with altivec and then gets preempted for whatever reason, this bug may cause the kernel to not re-enable altivec when that thread runs again. This will result in an altivec unavailable fault, when that fault happens inside a user transaction the kernel has no choice but to enable altivec and doom the transaction. The result is that transactions using altivec may get aborted more often than they should. The difficulty in catching this with a selftest is my deliberate use of the word may above. Optimisations to avoid FPU/altivec/VSX faults mean that the kernel will always leave them on for 255 switches. This code prevents the kernel turning it off if it got to the 256th switch (and userspace was transactional). Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware transactional memory in use") Reviewed-by: Cyril Bur Signed-off-by: Valentin Rothberg Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9e7c10f..ce6dc61 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs) /* Ensure that restore_math() will restore */ if (msr_diff & MSR_FP) current->thread.load_fp = 1; -#ifdef CONFIG_ALIVEC +#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) current->thread.load_vec = 1; #endif -- cgit v1.1 From bd77c4498616e27d5725b5959d880ce2272fefa9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 24 Oct 2016 08:50:43 +0530 Subject: powerpc/mm/radix: Use tlbiel only if we ever ran on the current cpu Before this patch, we used tlbiel, if we ever ran only on this core. That was mostly derived from the nohash usage of the same. But is incorrect, the ISA 3.0 clarifies tlbiel such that: "All TLB entries that have all of the following properties are made invalid on the thread executing the tlbiel instruction" ie. tlbiel only invalidates TLB entries on the current thread. So if the mm has been used on any other thread (aka. cpu) then we must broadcast the invalidate. This bug could lead to invalid TLB entries if a program runs on multiple threads of a core. Hence use tlbiel, if we only ever ran on only the current cpu. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tlb.h | 12 ++++++++++++ arch/powerpc/mm/tlb-radix.c | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index f6f68f7..99e1397 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -52,11 +52,23 @@ static inline int mm_is_core_local(struct mm_struct *mm) return cpumask_subset(mm_cpumask(mm), topology_sibling_cpumask(smp_processor_id())); } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return cpumask_equal(mm_cpumask(mm), + cpumask_of(smp_processor_id())); +} + #else static inline int mm_is_core_local(struct mm_struct *mm) { return 1; } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return 1; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0e49ec5..bda8c43 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -175,7 +175,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -201,7 +201,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -226,7 +226,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -321,7 +321,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, { unsigned long pid; unsigned long addr; - int local = mm_is_core_local(mm); + int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; -- cgit v1.1 From fb479e44a9e240a23c2d208c2ace23542a47f41c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 13:17:14 +1100 Subject: powerpc/64s: relocation, register save fixes for system reset interrupt This patch does a couple of things. First of all, powernv immediately explodes when running a relocated kernel, because the system reset exception for handling sleeps does not do correct relocated branches. Secondly, the sleep handling code trashes the condition and cfar registers, which we would like to preserve for debugging purposes (for non-sleep case exception). This patch changes the exception to use the standard format that saves registers before any tests or branches are made. It adds the test for idle-wakeup as an "extra" to break out of the normal exception path. Then it branches to a relocated idle handler that calls the various idle handling functions. After this patch, POWER8 CPU simulator now boots powernv kernel that is running at non-zero. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Acked-by: Gautham R. Shenoy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 16 ++++++++++ arch/powerpc/kernel/exceptions-64s.S | 50 ++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 2e4e7d8..84d49b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -93,6 +93,10 @@ ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif +#ifdef CONFIG_RELOCATABLE +#define BRANCH_TO_COMMON(reg, label) \ + __LOAD_HANDLER(reg, label); \ + mtctr reg; \ + bctr + +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f129408..08ba447 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -95,19 +95,35 @@ __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x4100) -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) - SET_SCRATCH0(r13) + #ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap/sleep/winkle. + /* + * If running native on arch 2.06 or later, check if we are waking up + * from nap/sleep/winkle, and branch to idle handler. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - beq 9f +#define IDLETEST(n) \ + BEGIN_FTR_SECTION ; \ + mfspr r10,SPRN_SRR1 ; \ + rlwinm. r10,r10,47-31,30,31 ; \ + beq- 1f ; \ + cmpwi cr3,r10,2 ; \ + BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ +1: \ + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#else +#define IDLETEST NOTEST +#endif - cmpwi cr3,r13,2 - GET_PACA(r13) +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + IDLETEST, 0x100) + +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) + +#ifdef CONFIG_PPC_P7_NAP +EXC_COMMON_BEGIN(system_reset_idle_common) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -130,14 +146,8 @@ BEGIN_FTR_SECTION blt cr3,2f b pnv_wakeup_loss 2: b pnv_wakeup_noloss +#endif -9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) EXC_COMMON(system_reset_common, 0x100, system_reset_exception) #ifdef CONFIG_PPC_PSERIES @@ -817,10 +827,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) - -#define LOAD_SYSCALL_HANDLER(reg) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(system_call_common))@l; +#define LOAD_SYSCALL_HANDLER(reg) \ + __LOAD_HANDLER(reg, system_call_common) /* Syscall routine is used twice, in reloc-off and reloc-on paths */ #define SYSCALL_PSERIES_1 \ -- cgit v1.1