From 31f6a4ada14de04ee6cd7ff03c8b6b5e282a13f0 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 8 Feb 2016 14:39:19 +1100 Subject: powerpc/eeh: fix incorrect function name in comment The comment block above pcibios_set_pcie_reset_state() incorrectly refers to pcibios_set_pcie_slot_reset(). Fix the comment accordingly. Signed-off-by: Andrew Donnellan Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 40e4d4a..8c6005c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -739,7 +739,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata) } /** - * pcibios_set_pcie_slot_reset - Set PCI-E reset state + * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct * @state: reset state to enter * -- cgit v1.1 From 94e3d923592fcfe5585c18a0af9b196de0a13072 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 1 Feb 2016 17:03:25 +1100 Subject: powerpc: Fix kgdb on little endian ppc64le I spent some time trying to use kgdb and debugged my inability to resume from kgdb_handle_breakpoint(). NIP is not incremented and that leads to a loop in the debugger. I've tested this lightly on a virtual instance with KDB enabled. After the patch, I am able to get the "go" command to work as expected. Signed-off-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kgdb.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e77c3cc..dbf0981 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -445,7 +445,11 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, * Global data */ struct kgdb_arch arch_kgdb_ops = { +#ifdef __LITTLE_ENDIAN__ + .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d}, +#else .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +#endif }; static int kgdb_not_implemented(struct pt_regs *regs) -- cgit v1.1 From 15b1624b78075d4f52e170600c81720532ca790d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 19 Feb 2016 11:16:23 +1100 Subject: powerpc: Use defines for __init_tlb_power[78] Use defines for literals __init_tlb_power[78] rather than hand coding them. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 9c9b741..cb3e272 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -15,6 +15,7 @@ #include #include #include +#include /* Entry: r3 = crap, r4 = ptr to cputable entry * @@ -139,7 +140,7 @@ __init_HFSCR: * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. */ __init_tlb_power7: - li r6,128 + li r6,POWER7_TLB_SETS mtctr r6 li r7,0xc00 /* IS field = 0b11 */ ptesync @@ -150,7 +151,7 @@ __init_tlb_power7: 1: blr __init_tlb_power8: - li r6,512 + li r6,POWER8_TLB_SETS mtctr r6 li r7,0xc00 /* IS field = 0b11 */ ptesync -- cgit v1.1 From c3ab300ea55541014348561e7690c41c79966ac6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 19 Feb 2016 11:16:24 +1100 Subject: powerpc: Add POWER9 cputable entry Add a cputable entry for POWER9. More code is required to actually boot and run on a POWER9 but this gets the base piece in which we can start building on. Copies over from POWER8 except for: - Adds a new CPU_FTR_ARCH_300 bit to start hanging new architecture features from (in subsequent patches). - Advertises new user features bits PPC_FEATURE2_ARCH_3_00 & HAS_IEEE128 when on POWER9. - Drops CPU_FTR_SUBCORE. - Drops PMU code and machine check. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 44 +++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 27 +++++++++++++++++++++ arch/powerpc/kernel/mce_power.c | 17 +++++++------- 3 files changed, 79 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index cb3e272..5932219 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -84,6 +84,39 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power9) + mflr r11 + bl __init_FSCR + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power9) + mflr r11 + bl __init_FSCR + mfmsr r3 + rldicl. r0,r3,4,63 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + __init_hvmode_206: /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 @@ -161,6 +194,17 @@ __init_tlb_power8: ptesync 1: blr +__init_tlb_power9: + li r6,POWER9_TLB_SETS_HASH + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 7d80bfd..be4d730 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,9 +70,12 @@ extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); +extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power9(void); extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned int action); extern void __flush_tlb_power8(unsigned int action); +extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ @@ -116,6 +119,11 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) +#define COMMON_USER_POWER9 COMMON_USER_POWER8 +#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128) + #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) #else @@ -499,6 +507,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power9 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 2c647b1..ee62b19 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -54,8 +54,8 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action) } /* - * Generic routine to flush TLB on power7. This routine is used as - * flush_tlb hook in cpu_spec for Power7 processor. + * Generic routines to flush TLB on POWER processors. These routines + * are used as flush_tlb hook in the cpu_spec. * * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. @@ -65,18 +65,17 @@ void __flush_tlb_power7(unsigned int action) flush_tlb_206(POWER7_TLB_SETS, action); } -/* - * Generic routine to flush TLB on power8. This routine is used as - * flush_tlb hook in cpu_spec for power8 processor. - * - * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ void __flush_tlb_power8(unsigned int action) { flush_tlb_206(POWER8_TLB_SETS, action); } +void __flush_tlb_power9(unsigned int action) +{ + flush_tlb_206(POWER9_TLB_SETS_HASH, action); +} + + /* flush SLBs and reload */ static void flush_and_reload_slb(void) { -- cgit v1.1 From a4c3f909b4d9bb9430bf42ad583661df6dfa86c6 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Thu, 18 Feb 2016 13:48:01 +1100 Subject: powerpc: Fix BUG_ON() reporting in real mode I ran into this issue while debugging an early boot problem. The system hit a BUG_ON() but report bug failed to print the line number and file name. The reason being that the system was running in real mode and report_bug() searches for addresses in the PAGE_OFFSET+ region. Suggested-by: Paul Mackerras Signed-off-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b6becc7..4e5c11d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1148,6 +1148,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) goto bail; } if (reason & REASON_TRAP) { + unsigned long bugaddr; /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) @@ -1158,8 +1159,15 @@ void __kprobes program_check_exception(struct pt_regs *regs) == NOTIFY_STOP) goto bail; + bugaddr = regs->nip; + /* + * Fixup bugaddr for BUG_ON() in real mode + */ + if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR)) + bugaddr += PAGE_OFFSET; + if (!(regs->msr & MSR_PR) && /* not user-mode */ - report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { + report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; goto bail; } -- cgit v1.1 From 446957ba5127141ee007fc61509e24a9e60853d9 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Wed, 24 Feb 2016 10:51:11 -0800 Subject: powerpc: Fix misspellings in comments. Signed-off-by: Adam Buchbinder Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.h | 2 +- arch/powerpc/kernel/traps.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b5061ab..9cdf5c7 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -806,7 +806,7 @@ _GLOBAL(set_context) _GLOBAL(init_cpu_state) mflr r22 #ifdef CONFIG_PPC_47x - /* We use the PVR to differenciate 44x cores from 476 */ + /* We use the PVR to differentiate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 cmplwi cr0,r3,PVR_476FPE@h diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf8c7e4..cb64d6f 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,7 +1,7 @@ /* * Common signal handling code for both 32 and 64 bits * - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General @@ -178,7 +178,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) * need to use the stack pointer from the checkpointed state, rather * than the speculated state. This ensures that the signal context * (written tm suspended) will be written below the stack required for - * the rollback. The transaction is aborted becuase of the treclaim, + * the rollback. The transaction is aborted because of the treclaim, * so any memory written between the tbegin and the signal will be * rolled back anyway. * diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 51b2741..be305c8 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4e5c11d..88414dd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1402,7 +1402,7 @@ void facility_unavailable_exception(struct pt_regs *regs) * is a read DSCR attempt through a mfspr instruction, we * just emulate the instruction instead. This code path will * always emulate all the mfspr instructions till the user - * has attempted atleast one mtspr instruction. This way it + * has attempted at least one mtspr instruction. This way it * preserves the same behaviour when the user is accessing * the DSCR through privilege level only SPR number (0x11) * which is emulated through illegal instruction exception. -- cgit v1.1 From d272f6670a0bcc91fa50c234088d21cd6ac30af4 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:46 +1100 Subject: powerpc: Explicitly disable math features when copying thread Currently when threads get scheduled off they always giveup the FPU, Altivec (VMX) and Vector (VSX) units if they were using them. When they are scheduled back on a fault is then taken to enable each facility and load registers. As a result explicitly disabling FPU/VMX/VSX has not been necessary. Future changes and optimisations remove this mandatory giveup and fault which could cause calls such as clone() and fork() to copy threads and run them later with FPU/VMX/VSX enabled but no registers loaded. This patch starts the process of having MSR_{FP,VEC,VSX} mean that a threads registers are hot while not having MSR_{FP,VEC,VSX} means that the registers must be loaded. This allows for a smarter return to userspace. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dccc87e..e0c3d2d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, f = ret_from_fork; } + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; /* -- cgit v1.1 From 70fe3d980f5f14d8125869125ba9a0ea95e09c6b Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:47 +1100 Subject: powerpc: Restore FPU/VEC/VSX if previously used Currently the FPU, VEC and VSX facilities are lazily loaded. This is not a problem unless a process is using these facilities. Modern versions of GCC are very good at automatically vectorising code, new and modernised workloads make use of floating point and vector facilities, even the kernel makes use of vectorised memcpy. All this combined greatly increases the cost of a syscall since the kernel uses the facilities sometimes even in syscall fast-path making it increasingly common for a thread to take an *_unavailable exception soon after a syscall, not to mention potentially taking all three. The obvious overcompensation to this problem is to simply always load all the facilities on every exit to userspace. Loading up all FPU, VEC and VSX registers every time can be expensive and if a workload does avoid using them, it should not be forced to incur this penalty. An 8bit counter is used to detect if the registers have been used in the past and the registers are always loaded until the value wraps to back to zero. Several versions of the assembly in entry_64.S were tested: 1. Always calling C. 2. Performing a common case check and then calling C. 3. A complex check in asm. After some benchmarking it was determined that avoiding C in the common case is a performance benefit (option 2). The full check in asm (option 3) greatly complicated that codepath for a negligible performance gain and the trade-off was deemed not worth it. Signed-off-by: Cyril Bur [mpe: Move load_vec in the struct to fill an existing hole, reword change log] Signed-off-by: Michael Ellerman fixup --- arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/entry_64.S | 21 ++++++++-- arch/powerpc/kernel/fpu.S | 4 ++ arch/powerpc/kernel/process.c | 88 ++++++++++++++++++++++++++++++++++----- arch/powerpc/kernel/vector.S | 4 ++ 5 files changed, 105 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 07cebc3..10d5eab 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -95,12 +95,14 @@ int main(void) DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); + DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp)); #ifdef CONFIG_ALTIVEC DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); + DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec)); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce..038e0a1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */ li r11,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work - cmpld r3,r11 + + andi. r0,r8,MSR_FP + beq 2f +#ifdef CONFIG_ALTIVEC + andis. r0,r8,MSR_VEC@h + bne 3f +#endif +2: addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO + +3: cmpld r3,r11 ld r5,_CCR(r1) bge- syscall_error .Lsyscall_error_cont: @@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite) /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK -#ifdef CONFIG_PPC_BOOK3E bne 1f +#ifdef CONFIG_PPC_BOOK3E /* * Check to see if the dbcr0 register is set up to debug. * Use the internal debug mode bit to do this. @@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite) mtspr SPRN_DBSR,r10 b restore #else - beq restore + addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + b restore #endif 1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2117eac..b063524 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_FP(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e0c3d2d..55c1eb0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -187,9 +187,22 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); + +static int restore_fp(struct task_struct *tsk) { + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -229,6 +242,21 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); + +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -275,6 +303,18 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); + +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -374,6 +414,36 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +void restore_math(struct pt_regs *regs) +{ + unsigned long msr; + + if (!current->thread.load_fp && !loadvec(current->thread)) + return; + + msr = regs->msr; + msr_check_and_set(msr_all_available); + + /* + * Only reload if the bit is not set in the user MSR, the bit BEING set + * indicates that the registers are hot + */ + if ((!(msr & MSR_FP)) && restore_fp(current)) + msr |= MSR_FP | current->thread.fpexc_mode; + + if ((!(msr & MSR_VEC)) && restore_altivec(current)) + msr |= MSR_VEC; + + if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && + restore_vsx(current)) { + msr |= MSR_VSX; + } + + msr_check_and_clear(msr_all_available); + + regs->msr = msr; +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { @@ -832,17 +902,9 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; - if (msr_diff & MSR_FP) { - msr_check_and_set(MSR_FP); - load_fp_state(¤t->thread.fp_state); - msr_check_and_clear(MSR_FP); - regs->msr |= current->thread.fpexc_mode; - } - if (msr_diff & MSR_VEC) { - msr_check_and_set(MSR_VEC); - load_vr_state(¤t->thread.vr_state); - msr_check_and_clear(MSR_VEC); - } + + restore_math(regs); + regs->msr |= msr_diff; } @@ -1006,6 +1068,10 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } + + if (current_thread_info()->task->thread.regs) + restore_math(current_thread_info()->task->thread.regs); + #endif /* CONFIG_PPC_BOOK3S_64 */ return last; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 162d0f7..038cff8 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_VEC(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 li r10,VRSTATE_VSCR -- cgit v1.1 From de2a20aa7237b45d3c14a2505804a8daa95a8f53 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:48 +1100 Subject: powerpc: Prepare for splitting giveup_{fpu, altivec, vsx} in two This prepares for the decoupling of saving {fpu,altivec,vsx} registers and marking {fpu,altivec,vsx} as being unused by a thread. Currently giveup_{fpu,altivec,vsx}() does both however optimisations to task switching can be made if these two operations are decoupled. save_all() will permit the saving of registers to thread structs and leave threads MSR with bits enabled. This patch introduces no functional change. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 55c1eb0..29da07f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -444,12 +444,41 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } +void save_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + + if (usermsr & MSR_FP) + __giveup_fpu(tsk); + + if (usermsr & MSR_VEC) + __giveup_altivec(tsk); + + if (usermsr & MSR_VSX) + __giveup_vsx(tsk); + + if (usermsr & MSR_SPE) + __giveup_spe(tsk); + + msr_check_and_clear(msr_all_available); +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - giveup_all(tsk); + save_all(tsk); #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) -- cgit v1.1 From 8792468da5e12e77e76e1edf081acf0392abb331 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:49 +1100 Subject: powerpc: Add the ability to save FPU without giving it up This patch adds the ability to be able to save the FPU registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch optimises the thread copy path (as a result of a fork() or clone()) so that the parent thread can return to userspace with hot registers avoiding a possibly pointless reload of FPU register state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fpu.S | 21 ++++----------------- arch/powerpc/kernel/process.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index b063524..15da2b5 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -143,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr /* - * __giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. + * save_fpu(tsk) + * Save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(__giveup_fpu) +_GLOBAL(save_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r6,0 bne 2f addi r6,r3,THREAD_FPSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, R4, R6) +2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 29da07f..a7e5061 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU +void __giveup_fpu(struct task_struct *tsk) +{ + save_fpu(tsk); + tsk->thread.regs->msr &= ~MSR_FP; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_fpu(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -459,7 +469,7 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); if (usermsr & MSR_FP) - __giveup_fpu(tsk); + save_fpu(tsk); if (usermsr & MSR_VEC) __giveup_altivec(tsk); -- cgit v1.1 From 6f515d842e8e1b205e54f44b9013bf14870b97a7 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:50 +1100 Subject: powerpc: Add the ability to save Altivec without giving it up This patch adds the ability to be able to save the VEC registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU registers in the thread copy path to avoid a possibly pointless reload of VEC state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 12 +++++++++++- arch/powerpc/kernel/vector.S | 24 ++++-------------------- 2 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a7e5061..14c09d2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -213,6 +213,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } #ifdef CONFIG_ALTIVEC #define loadvec(thr) ((thr).load_vec) +static void __giveup_altivec(struct task_struct *tsk) +{ + save_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VEC; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -472,7 +482,7 @@ void save_all(struct task_struct *tsk) save_fpu(tsk); if (usermsr & MSR_VEC) - __giveup_altivec(tsk); + save_altivec(tsk); if (usermsr & MSR_VSX) __giveup_vsx(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 038cff8..51b0c17 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec) blr /* - * __giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. + * save_altivec(tsk) + * Save the vector registers to its thread_struct */ -_GLOBAL(__giveup_altivec) +_GLOBAL(save_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r7,0 bne 2f addi r7,r3,THREAD_VRSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32VRS(0,r4,r7) +2: SAVE_32VRS(0,r4,r7) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - lis r3,(MSR_VEC|MSR_VSX)@h -FTR_SECTION_ELSE - lis r3,MSR_VEC@h -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#else - lis r3,MSR_VEC@h -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr #ifdef CONFIG_VSX -- cgit v1.1 From bf6a4d5b75d1ea87897fe68d0e45d35a2996c678 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:51 +1100 Subject: powerpc: Add the ability to save VSX without giving it up This patch adds the ability to be able to save the VSX registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU and VEC registers in the thread copy path to avoid a possibly pointless reload of VSX state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 4 ---- arch/powerpc/kernel/process.c | 42 +++++++++++++++++++++++++++++------------ arch/powerpc/kernel/vector.S | 17 ----------------- 3 files changed, 30 insertions(+), 33 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 41e1607..ef7024da 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif -#ifdef CONFIG_VSX -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 14c09d2..d7a9df5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -280,19 +280,31 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -void giveup_vsx(struct task_struct *tsk) +static void __giveup_vsx(struct task_struct *tsk) { - check_if_tm_restore_required(tsk); - - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (tsk->thread.regs->msr & MSR_FP) __giveup_fpu(tsk); if (tsk->thread.regs->msr & MSR_VEC) __giveup_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VSX; +} + +static void giveup_vsx(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); __giveup_vsx(tsk); msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -EXPORT_SYMBOL(giveup_vsx); + +static void save_vsx(struct task_struct *tsk) +{ + if (tsk->thread.regs->msr & MSR_FP) + save_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + save_altivec(tsk); +} void enable_kernel_vsx(void) { @@ -335,6 +347,7 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -478,14 +491,19 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - if (usermsr & MSR_FP) - save_fpu(tsk); - - if (usermsr & MSR_VEC) - save_altivec(tsk); + /* + * Saving the way the register space is in hardware, save_vsx boils + * down to a save_fpu() and save_altivec() + */ + if (usermsr & MSR_VSX) { + save_vsx(tsk); + } else { + if (usermsr & MSR_FP) + save_fpu(tsk); - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); + if (usermsr & MSR_VEC) + save_altivec(tsk); + } if (usermsr & MSR_SPE) __giveup_spe(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 51b0c17..1c2e7a3 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx) std r12,_MSR(r1) b fast_exception_return -/* - * __giveup_vsx(tsk) - * Disable VSX for the task given as the argument. - * Does NOT save vsx registers. - */ -_GLOBAL(__giveup_vsx) - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VSX@h - andc r4,r4,r3 /* disable VSX for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: - blr - #endif /* CONFIG_VSX */ -- cgit v1.1 From f64e8084c94bb0449177364856d8117e2f14c4c0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 12:59:20 +0530 Subject: powerpc/mm: Move hash related mmu-*.h headers to book3s/ No code changes. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_power.S | 2 +- arch/powerpc/kernel/idle_power7.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 5932219..584e119 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -15,7 +15,7 @@ #include #include #include -#include +#include /* Entry: r3 = crap, r4 = ptr to cputable entry * diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index cf4fb54..470ceeb 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -19,7 +19,7 @@ #include #include #include -#include +#include #undef DEBUG -- cgit v1.1 From e7affb1dba0e9068aeb3978e858f39753e0dc20a Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:58 +0800 Subject: powerpc/cache: add cache flush operation for various e500 Various e500 core have different cache architecture, so they need different cache flush operations. Therefore, add a callback function cpu_flush_caches to the struct cpu_spec. The cache flush operation for the specific kind of e500 is selected at init time. The callback function will flush all caches inside the current cpu. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian Signed-off-by: Scott Wood --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 112 ++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 4 ++ arch/powerpc/kernel/head_fsl_booke.S | 74 -------------------- 4 files changed, 117 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 10d5eab..0d0183d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -376,6 +376,7 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); + DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index dddba3e..462aed9 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -13,11 +13,13 @@ * */ +#include #include #include #include #include #include +#include _GLOBAL(__e500_icache_setup) mfspr r0, SPRN_L1CSR1 @@ -233,3 +235,113 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif + +/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */ +_GLOBAL(flush_dcache_L1) + mfmsr r10 + wrteei 0 + + mfspr r3,SPRN_L1CFG0 + rlwinm r5,r3,9,3 /* Extract cache block size */ + twlgti r5,1 /* Only 32 and 64 byte cache blocks + * are currently defined. + */ + li r4,32 + subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - + * log2(number of ways) + */ + slw r5,r4,r5 /* r5 = cache block size */ + + rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ + mulli r7,r7,13 /* An 8-way cache will require 13 + * loads per set. + */ + slw r7,r7,r6 + + /* save off HID0 and set DCFA */ + mfspr r8,SPRN_HID0 + ori r9,r8,HID0_DCFA@l + mtspr SPRN_HID0,r9 + isync + + LOAD_REG_IMMEDIATE(r6, KERNELBASE) + mr r4, r6 + mtctr r7 + +1: lwz r3,0(r4) /* Load... */ + add r4,r4,r5 + bdnz 1b + + msync + mr r4, r6 + mtctr r7 + +1: dcbf 0,r4 /* ...and flush. */ + add r4,r4,r5 + bdnz 1b + + /* restore HID0 */ + mtspr SPRN_HID0,r8 + isync + + wrtee r10 + + blr + +has_L2_cache: + /* skip L2 cache on P2040/P2040E as they have no L2 cache */ + mfspr r3, SPRN_SVR + /* shift right by 8 bits and clear E bit of SVR */ + rlwinm r4, r3, 24, ~0x800 + + lis r3, SVR_P2040@h + ori r3, r3, SVR_P2040@l + cmpw r4, r3 + beq 1f + + li r3, 1 + blr +1: + li r3, 0 + blr + +/* flush backside L2 cache */ +flush_backside_L2_cache: + mflr r10 + bl has_L2_cache + mtlr r10 + cmpwi r3, 0 + beq 2f + + /* Flush the L2 cache */ + mfspr r3, SPRN_L2CSR0 + ori r3, r3, L2CSR0_L2FL@l + msync + isync + mtspr SPRN_L2CSR0,r3 + isync + + /* check if it is complete */ +1: mfspr r3,SPRN_L2CSR0 + andi. r3, r3, L2CSR0_L2FL@l + bne 1b +2: + blr + +_GLOBAL(cpu_down_flush_e500v2) + mflr r0 + bl flush_dcache_L1 + mtlr r0 + blr + +_GLOBAL(cpu_down_flush_e500mc) +_GLOBAL(cpu_down_flush_e5500) + mflr r0 + bl flush_dcache_L1 + bl flush_backside_L2_cache + mtlr r0 + blr + +/* L1 Data Cache of e6500 contains no modified data, no flush is required */ +_GLOBAL(cpu_down_flush_e6500) + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index be4d730..6c662b8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2050,6 +2050,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", + .cpu_down_flush = cpu_down_flush_e500v2, }, #else { /* e500mc */ @@ -2069,6 +2070,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", + .cpu_down_flush = cpu_down_flush_e500mc, }, #endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ @@ -2093,6 +2095,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce5500", + .cpu_down_flush = cpu_down_flush_e5500, }, { /* e6500 */ .pvr_mask = 0xffff0000, @@ -2115,6 +2118,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce6500", + .cpu_down_flush = cpu_down_flush_e6500, }, #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f705171..3bfa315 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1037,80 +1037,6 @@ _GLOBAL(set_context) isync /* Force context change */ blr -_GLOBAL(flush_dcache_L1) - mfspr r3,SPRN_L1CFG0 - - rlwinm r5,r3,9,3 /* Extract cache block size */ - twlgti r5,1 /* Only 32 and 64 byte cache blocks - * are currently defined. - */ - li r4,32 - subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - - * log2(number of ways) - */ - slw r5,r4,r5 /* r5 = cache block size */ - - rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ - mulli r7,r7,13 /* An 8-way cache will require 13 - * loads per set. - */ - slw r7,r7,r6 - - /* save off HID0 and set DCFA */ - mfspr r8,SPRN_HID0 - ori r9,r8,HID0_DCFA@l - mtspr SPRN_HID0,r9 - isync - - lis r4,KERNELBASE@h - mtctr r7 - -1: lwz r3,0(r4) /* Load... */ - add r4,r4,r5 - bdnz 1b - - msync - lis r4,KERNELBASE@h - mtctr r7 - -1: dcbf 0,r4 /* ...and flush. */ - add r4,r4,r5 - bdnz 1b - - /* restore HID0 */ - mtspr SPRN_HID0,r8 - isync - - blr - -/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */ -_GLOBAL(__flush_disable_L1) - mflr r10 - bl flush_dcache_L1 /* Flush L1 d-cache */ - mtlr r10 - - mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - msync - isync - mtspr SPRN_L1CSR0, r4 - isync - -1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */ - andi. r4, r4, 2 - bne 1b - - mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - mtspr SPRN_L1CSR1, r4 - isync - - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start -- cgit v1.1 From d17799f9c10e283cccd4d598d3416e6fac336ab9 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:59 +0800 Subject: powerpc/rcpm: add RCPM driver There is a RCPM (Run Control/Power Management) in Freescale QorIQ series processors. The device performs tasks associated with device run control and power management. The driver implements some features: mask/unmask irq, enter/exit low power states, freeze time base, etc. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: remove __KERNEL__ ifdef] Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_64.S | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1b77956..6036253 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -181,6 +181,25 @@ exception_marker: #endif #ifdef CONFIG_PPC_BOOK3E +/* + * stop a thread in the same core + * input parameter: + * r3 = the thread physical id + */ +_GLOBAL(book3e_stop_thread) + cmpi 0, r3, 0 + beq 10f + cmpi 0, r3, 1 + beq 10f + /* If the thread id is invalid, just exit. */ + b 13f +10: + li r4, 1 + sld r4, r4, r3 + mtspr SPRN_TENC, r4 +13: + blr + _GLOBAL(fsl_secondary_thread_init) mfspr r4,SPRN_BUCSR -- cgit v1.1 From 2f4f1f815bc6d03ea42d4f67dd1e284525e7524e Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:01 +0800 Subject: powerpc/mpc85xx: Add hotplug support on E5500 and E500MC cores Freescale E500MC and E5500 core-based platforms, like P4080, T1040, support disabling/enabling CPU dynamically. This patch adds this feature on those platforms. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: removed unused pr_fmt] Signed-off-by: Scott Wood --- arch/powerpc/kernel/smp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ec9ec20..8575d04 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -427,7 +427,7 @@ void generic_cpu_die(unsigned int cpu) for (i = 0; i < 100; i++) { smp_rmb(); - if (per_cpu(cpu_state, cpu) == CPU_DEAD) + if (is_cpu_dead(cpu)) return; msleep(100); } @@ -454,6 +454,11 @@ int generic_check_cpu_restart(unsigned int cpu) return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } +int is_cpu_dead(unsigned int cpu) +{ + return per_cpu(cpu_state, cpu) == CPU_DEAD; +} + static bool secondaries_inhibited(void) { return kvm_hv_mode_active(); -- cgit v1.1 From 6becef7ea04a695f64299238fe13d41e41607469 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:02 +0800 Subject: powerpc/mpc85xx: Add CPU hotplug support for E6500 Support Freescale E6500 core-based platforms, like t4240. Support disabling/enabling individual CPU thread dynamically. Signed-off-by: Chenhui Zhao --- arch/powerpc/kernel/head_64.S | 78 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6036253..2916283 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,6 +40,7 @@ #include #include #include +#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -182,6 +183,45 @@ exception_marker: #ifdef CONFIG_PPC_BOOK3E /* + * The booting_thread_hwid holds the thread id we want to boot in cpu + * hotplug case. It is set by cpu hotplug code, and is invalid by default. + * The thread id is the same as the initial value of SPRN_PIR[THREAD_ID] + * bit field. + */ + .globl booting_thread_hwid +booting_thread_hwid: + .long INVALID_THREAD_HWID + .align 3 +/* + * start a thread in the same core + * input parameters: + * r3 = the thread physical id + * r4 = the entry point where thread starts + */ +_GLOBAL(book3e_start_thread) + LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) + cmpi 0, r3, 0 + beq 10f + cmpi 0, r3, 1 + beq 11f + /* If the thread id is invalid, just exit. */ + b 13f +10: + mttmr TMRN_IMSR0, r5 + mttmr TMRN_INIA0, r4 + b 12f +11: + mttmr TMRN_IMSR1, r5 + mttmr TMRN_INIA1, r4 +12: + isync + li r6, 1 + sld r6, r6, r3 + mtspr SPRN_TENS, r6 +13: + blr + +/* * stop a thread in the same core * input parameter: * r3 = the thread physical id @@ -280,6 +320,44 @@ _GLOBAL(generic_secondary_smp_init) mr r3,r24 mr r4,r25 bl book3e_secondary_core_init + +/* + * After common core init has finished, check if the current thread is the + * one we wanted to boot. If not, start the specified thread and stop the + * current thread. + */ + LOAD_REG_ADDR(r4, booting_thread_hwid) + lwz r3, 0(r4) + li r5, INVALID_THREAD_HWID + cmpw r3, r5 + beq 20f + + /* + * The value of booting_thread_hwid has been stored in r3, + * so make it invalid. + */ + stw r5, 0(r4) + + /* + * Get the current thread id and check if it is the one we wanted. + * If not, start the one specified in booting_thread_hwid and stop + * the current thread. + */ + mfspr r8, SPRN_TIR + cmpw r3, r8 + beq 20f + + /* start the specified thread */ + LOAD_REG_ADDR(r5, fsl_secondary_thread_init) + ld r4, 0(r5) + bl book3e_start_thread + + /* stop the current thread */ + mr r3, r8 + bl book3e_stop_thread +10: + b 10b +20: #endif generic_secondary_common_init: -- cgit v1.1 From a5cab83cd3d2d75d3893276cb5f27e163484ef04 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:53 +1100 Subject: powerpc: Create a helper for getting the kernel toc value Move the logic to work out the kernel toc pointer into a header. This is a good cleanup, and also means we can use it elsewhere in future. Reviewed-by: Kamalesh Babulal Reviewed-by: Torsten Duwe Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman Tested-by: Kamalesh Babulal --- arch/powerpc/kernel/paca.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 01ea0ed..93dae29 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -17,10 +17,6 @@ #include #include -/* This symbol is provided by the linker - let it fill in the paca - * field correctly */ -extern unsigned long __toc_start; - #ifdef CONFIG_PPC_BOOK3S /* @@ -149,11 +145,6 @@ EXPORT_SYMBOL(paca); void __init initialise_paca(struct paca_struct *new_paca, int cpu) { - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ - unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - #ifdef CONFIG_PPC_BOOK3S new_paca->lppaca_ptr = new_lppaca(cpu); #else @@ -161,7 +152,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; + new_paca->kernel_toc = kernel_toc_addr(); new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); -- cgit v1.1 From 136cd3450af8092f30d0e289806f08ac2aeee38f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:54 +1100 Subject: powerpc/module: Only try to generate the ftrace_caller() stub once Currently we generate the module stub for ftrace_caller() at the bottom of apply_relocate_add(). However apply_relocate_add() is potentially called more than once per module, which means we will try to generate the ftrace_caller() stub multiple times. Although the current code deals with that correctly, ie. it only generates a stub the first time, it would be clearer to only try to generate the stub once. Note also on first reading it may appear that we generate a different stub for each section that requires relocation, but that is not the case. The code in stub_for_addr() that searches for an existing stub uses sechdrs[me->arch.stubs_section], ie. the single stub section for this module. A cleaner approach is to only generate the ftrace_caller() stub once, from module_finalize(). Although the original code didn't check to see if the stub was actually generated correctly, it seems prudent to add a check, so do that. And an additional benefit is we can clean the ifdefs up a little. Finally we must propagate the const'ness of some of the pointers passed to module_finalize(), but that is also an improvement. Reviewed-by: Balbir Singh Reviewed-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module.c | 5 +++++ arch/powerpc/kernel/module_32.c | 20 ++++++++++++++------ arch/powerpc/kernel/module_64.c | 22 ++++++++++++++-------- 3 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 9547381..d1f1b35 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -47,6 +47,11 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; + int rc; + + rc = module_finalize_ftrace(me, sechdrs); + if (rc) + return rc; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2c01665..5a7a78f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -181,7 +181,7 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) /* Set up a trampoline in the PLT to bounce us to the distant function */ static uint32_t do_plt_call(void *location, Elf32_Addr val, - Elf32_Shdr *sechdrs, + const Elf32_Shdr *sechdrs, struct module *mod) { struct ppc_plt_entry *entry; @@ -294,11 +294,19 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } + + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - module->arch.tramp = - do_plt_call(module->core_layout.base, - (unsigned long)ftrace_caller, - sechdrs, module); -#endif +int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) +{ + module->arch.tramp = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_caller, + sechdrs, module); + if (!module->arch.tramp) + return -ENOENT; + return 0; } +#endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ac64ffd..599c753 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -413,7 +413,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the value maximum span in an instruction which uses a signed offset) */ -static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { return sechdrs[me->arch.toc_section].sh_addr + 0x8000; } @@ -426,7 +426,7 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) #define PPC_HA(v) PPC_HI ((v) + 0x8000) /* Patch stub to reference function and correct r2 value. */ -static inline int create_stub(Elf64_Shdr *sechdrs, +static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, struct module *me) @@ -452,7 +452,7 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Create stub to jump to function described in this OPD/ptr: we need the stub to set up the TOC ptr (r2) for the function. */ -static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, +static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) { @@ -693,12 +693,18 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - me->arch.toc = my_r2(sechdrs, me); - me->arch.tramp = stub_for_addr(sechdrs, - (unsigned long)ftrace_caller, - me); -#endif +int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) +{ + mod->arch.toc = my_r2(sechdrs, mod); + mod->arch.tramp = stub_for_addr(sechdrs, (unsigned long)ftrace_caller, mod); + + if (!mod->arch.tramp) + return -ENOENT; return 0; } +#endif -- cgit v1.1 From f17c4e01e906c568fb03a2e6c2b99e4ee4587fbf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:55 +1100 Subject: powerpc/module: Mark module stubs with a magic value When a module is loaded, calls out to the kernel go via a stub which is generated at runtime. One of these stubs is used to call _mcount(), which is the default target of tracing calls generated by the compiler with -pg. If dynamic ftrace is enabled (which it typically is), another stub is used to call ftrace_caller(), which is the target of tracing calls when ftrace is actually active. ftrace then wants to disable the calls to _mcount() at module startup, and enable/disable the calls to ftrace_caller() when enabling/disabling tracing - all of these it does by patching the code. As part of that code patching, the ftrace code wants to confirm that the branch it is about to modify, is in fact a call to a module stub which calls _mcount() or ftrace_caller(). Currently it does that by inspecting the instructions and confirming they are what it expects. Although that works, the code to do it is pretty intricate because it requires lots of knowledge about the exact format of the stub. We can make that process easier by marking the generated stubs with a magic value, and then looking for that magic value. Altough this is not as rigorous as the current method, I believe it is sufficient in practice. Reviewed-by: Balbir Singh Reviewed-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ftrace.c | 14 +++----- arch/powerpc/kernel/module_64.c | 78 ++++++++++++++--------------------------- 2 files changed, 30 insertions(+), 62 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 44d4d8e..4505cbf 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -106,10 +106,9 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; - unsigned long entry, ptr; + unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - void *tramp; + unsigned int op; /* read where this goes */ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) @@ -122,14 +121,9 @@ __ftrace_make_nop(struct module *mod, } /* lets find where the pointer goes */ - tramp = (void *)find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %p", ip, tramp); + tramp = find_bl_target(ip, op); - if (!is_module_trampoline(tramp)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } + pr_devel("ip:%lx jumps to %lx", ip, tramp); if (module_trampoline_target(mod, tramp, &ptr)) { pr_err("Failed to get trampoline target\n"); diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 599c753..9629966 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -96,6 +96,8 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #endif +#define STUB_MAGIC 0x73747562 /* stub */ + /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ @@ -105,7 +107,8 @@ struct ppc64_stub_entry * need 6 instructions on ABIv2 but we always allocate 7 so * so we don't have to modify the trampoline load instruction. */ u32 jump[7]; - u32 unused; + /* Used by ftrace to identify stubs */ + u32 magic; /* Data for the above code */ func_desc_t funcdata; }; @@ -139,70 +142,39 @@ static u32 ppc64_stub_insns[] = { }; #ifdef CONFIG_DYNAMIC_FTRACE - -static u32 ppc64_stub_mask[] = { - 0xffff0000, - 0xffff0000, - 0xffffffff, - 0xffffffff, -#if !defined(_CALL_ELF) || _CALL_ELF != 2 - 0xffffffff, -#endif - 0xffffffff, - 0xffffffff -}; - -bool is_module_trampoline(u32 *p) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - unsigned int i; - u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; - - BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - if (probe_kernel_read(insns, p, sizeof(insns))) + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); return -EFAULT; - - for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { - u32 insna = insns[i]; - u32 insnb = ppc64_stub_insns[i]; - u32 mask = ppc64_stub_mask[i]; - - if ((insna & mask) != (insnb & mask)) - return false; } - return true; -} + stub = (struct ppc64_stub_entry *)addr; -int module_trampoline_target(struct module *mod, u32 *trampoline, - unsigned long *target) -{ - u32 buf[2]; - u16 upper, lower; - long offset; - void *toc_entry; - - if (probe_kernel_read(buf, trampoline, sizeof(buf))) + if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; + } - upper = buf[0] & 0xffff; - lower = buf[1] & 0xffff; - - /* perform the addis/addi, both signed */ - offset = ((short)upper << 16) + (short)lower; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* - * Now get the address this trampoline jumps to. This - * is always 32 bytes into our trampoline stub. - */ - toc_entry = (void *)mod->arch.toc + offset + 32; + if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - if (probe_kernel_read(target, toc_entry, sizeof(*target))) - return -EFAULT; + *target = stub_func_addr(funcdata); return 0; } - #endif /* Count how many different 24-bit relocations (different symbol, @@ -447,6 +419,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + return 1; } -- cgit v1.1 From 336a7b5dd80a2a7b463dc8ede4862425940edeb5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:56 +1100 Subject: powerpc/module: Create a special stub for ftrace_caller() In order to support the new -mprofile-kernel ABI, we need to be able to call from the module back to ftrace_caller() (in the kernel) without using the module's r2. That is because the function in this module which is calling ftrace_caller() may not have setup r2, if it doesn't otherwise need it (ie. it accesses no globals). To make that work we add a new stub which is used for calling ftrace_caller(), which uses the kernel toc instead of the module toc. Reviewed-by: Balbir Singh Reviewed-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_64.c | 69 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 9629966..76c0963 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -31,6 +31,7 @@ #include #include #include +#include /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -671,10 +672,76 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE + +#ifdef CC_USING_MPROFILE_KERNEL + +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +/* + * For mprofile-kernel we use a special stub for ftrace_caller() because we + * can't rely on r2 containing this module's TOC when we enter the stub. + * + * That can happen if the function calling us didn't need to use the toc. In + * that case it won't have setup r2, and the r2 value will be either the + * kernel's toc, or possibly another modules toc. + * + * To deal with that this stub uses the kernel toc, which is always accessible + * via the paca (in r13). The target (ftrace_caller()) is responsible for + * saving and restoring the toc before returning. + */ +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +{ + struct ppc64_stub_entry *entry; + unsigned int i, num_stubs; + static u32 stub_insns[] = { + 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */ + 0x3d8c0000, /* addis r12,r12, */ + 0x398c0000, /* addi r12,r12, */ + 0x7d8903a6, /* mtctr r12 */ + 0x4e800420, /* bctr */ + }; + long reladdr; + + num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry); + + /* Find the next available stub entry */ + entry = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++); + + if (i >= num_stubs) { + pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name); + return 0; + } + + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = (unsigned long)ftrace_caller - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + + /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ + entry->funcdata = func_desc((unsigned long)ftrace_caller); + entry->magic = STUB_MAGIC; + + return (unsigned long)entry; +} +#else +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +{ + return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me); +} +#endif + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.toc = my_r2(sechdrs, mod); - mod->arch.tramp = stub_for_addr(sechdrs, (unsigned long)ftrace_caller, mod); + mod->arch.tramp = create_ftrace_stub(sechdrs, mod); if (!mod->arch.tramp) return -ENOENT; -- cgit v1.1 From c96f83856f56a66a5d7fd730958bec80c9b5a020 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Thu, 3 Mar 2016 15:26:57 +1100 Subject: powerpc/ftrace: Use generic ftrace_modify_all_code() Convert powerpc's arch_ftrace_update_code() from its own version to use the generic default functionality (without stop_machine -- our instructions are properly aligned and the replacements atomic). With this we gain error checking and the much-needed function_trace_op handling. Reviewed-by: Balbir Singh Reviewed-by: Kamalesh Babulal Signed-off-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ftrace.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 4505cbf..62899fb 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -449,20 +449,13 @@ void ftrace_replace_code(int enable) } } +/* + * Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ void arch_ftrace_update_code(int command) { - if (command & FTRACE_UPDATE_CALLS) - ftrace_replace_code(1); - else if (command & FTRACE_DISABLE_CALLS) - ftrace_replace_code(0); - - if (command & FTRACE_UPDATE_TRACE_FUNC) - ftrace_update_ftrace_func(ftrace_trace_function); - - if (command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); - else if (command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); + ftrace_modify_all_code(command); } int __init ftrace_dyn_arch_init(void) -- cgit v1.1 From 9a7841ae8d6ce9b7a7cf879c9968fcf4c9545563 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Thu, 3 Mar 2016 15:26:58 +1100 Subject: powerpc/ftrace: Use $(CC_FLAGS_FTRACE) when disabling ftrace Rather than open-coding -pg whereever we want to disable ftrace, use the existing $(CC_FLAGS_FTRACE) variable. This has the advantage that it will work in future when we use a different set of flags to enable ftrace. Signed-off-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 794f22a..2da380f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -16,14 +16,14 @@ endif ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # timers used by tracing -CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif obj-y := cputable.o ptrace.o syscalls.o \ -- cgit v1.1 From 153086644fd1fb07fb3af84d9f11542a19b1e8b6 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Thu, 3 Mar 2016 15:26:59 +1100 Subject: powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI The gcc switch -mprofile-kernel defines a new ABI for calling _mcount() very early in the function with minimal overhead. Although mprofile-kernel has been available since GCC 3.4, there were bugs which were only fixed recently. Currently it is known to work in GCC 4.9, 5 and 6. Additionally there are two possible code sequences generated by the flag, the first uses mflr/std/bl and the second is optimised to omit the std. Currently only gcc 6 has the optimised sequence. This patch supports both sequences. Initial work started by Vojtech Pavlik, used with permission. Key changes: - rework _mcount() to work for both the old and new ABIs. - implement new versions of ftrace_caller() and ftrace_graph_caller() which deal with the new ABI. - updates to __ftrace_make_nop() to recognise the new mcount calling sequence. - updates to __ftrace_make_call() to recognise the nop'ed sequence. - implement ftrace_modify_call(). - updates to the module loader to surpress the toc save in the module stub when calling mcount with the new ABI. Reviewed-by: Balbir Singh Signed-off-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 166 +++++++++++++++++++++++++++++++++++++++- arch/powerpc/kernel/ftrace.c | 103 +++++++++++++++++++++---- arch/powerpc/kernel/module_64.c | 49 +++++++++++- 3 files changed, 298 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce..ec7f8aa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - blr + mflr r12 + mtctr r12 + mtlr r0 + bctr +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflr r3 @@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub) ld r0, 128(r1) mtlr r0 addi r1, r1, 112 + +#else /* CC_USING_MPROFILE_KERNEL */ +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip & pt_regs->link */ + std r7, _NIP(r1) + std r7, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + + /* Restore original LR for return to B */ + ld r0, LRSAVE(r1) + mtlr r0 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stdu r1, -112(r1) +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) + addi r1, r1, 112 +#endif + + ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ + mtlr r0 + bctr /* jump after _mcount site */ +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(ftrace_stub) blr #else @@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL(ftrace_graph_caller) /* load r4 with local address */ ld r4, 128(r1) @@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller) addi r1, r1, 112 blr +#else /* CC_USING_MPROFILE_KERNEL */ +_GLOBAL(ftrace_graph_caller) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(return_to_handler) /* need to save return values */ std r4, -32(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 62899fb..9dac18da 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) + if (replaced != old) { + pr_err("%p: replaced (%#x) != old (%#x)", + (void *)ip, replaced, old); return -EINVAL; + } /* replace the text with the new text */ if (patch_instruction((unsigned int *)ip, new)) @@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - unsigned int op; + unsigned int op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); return -EFAULT; + } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { @@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod, * * Use a b +8 to jump over the load. */ - op = 0x48000008; /* b +8 */ - if (patch_instruction((unsigned int *)ip, op)) + pop = PPC_INST_BRANCH | 8; /* b +8 */ + + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (op != PPC_INST_LD_TOC) { + unsigned int inst; + + if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ + if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { + pr_err("Unexpected instructions around bl _mcount\n" + "when enabling dynamic ftrace!\t" + "(%08x,bl,%08x)\n", inst, op); + return -EINVAL; + } + + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + } + + if (patch_instruction((unsigned int *)ip, pop)) { + pr_err("Patching NOP failed.\n"); return -EPERM; + } return 0; } @@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod, #ifdef CONFIG_MODULES #ifdef CONFIG_PPC64 +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +#ifndef CC_USING_MPROFILE_KERNEL static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) { - unsigned int op[2]; - void *ip = (void *)rec->ip; - - /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) - return -EFAULT; - /* * We expect to see: * @@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { - pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); + if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + return 0; + return 1; +} +#else +static int +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +{ + /* look for patched "NOP" on ppc64 with -mprofile-kernel */ + if (op0 != PPC_INST_NOP) + return 0; + return 1; +} +#endif + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op[2]; + void *ip = (void *)rec->ip; + + /* read where this goes */ + if (probe_kernel_read(op, ip, sizeof(op))) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %x %x\n", + ip, op[0], op[1]); return -EINVAL; } @@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return 0; } -#else + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return ftrace_make_call(rec, addr); +} +#endif + +#else /* !CONFIG_PPC64: */ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 76c0963..848b474 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -42,7 +42,6 @@ --RR. */ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define R2_STACK_OFFSET 24 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); } #else -#define R2_STACK_OFFSET 40 /* An address is address of the OPD entry, which contains address of fn. */ typedef struct ppc64_opd_entry func_desc_t; @@ -451,17 +449,60 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } +#ifdef CC_USING_MPROFILE_KERNEL +static bool is_early_mcount_callsite(u32 *instruction) +{ + /* + * Check if this is one of the -mprofile-kernel sequences. + */ + if (instruction[-1] == PPC_INST_STD_LR && + instruction[-2] == PPC_INST_MFLR) + return true; + + if (instruction[-1] == PPC_INST_MFLR) + return true; + + return false; +} + +/* + * In case of _mcount calls, do not save the current callee's TOC (in r2) into + * the original caller's stack frame. If we did we would clobber the saved TOC + * value of the original caller. + */ +static void squash_toc_save_inst(const char *name, unsigned long addr) +{ + struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; + + /* Only for calls to _mcount */ + if (strcmp("_mcount", name) != 0) + return; + + stub->jump[2] = PPC_INST_NOP; +} +#else +static void squash_toc_save_inst(const char *name, unsigned long addr) { } + +/* without -mprofile-kernel, mcount calls are never early */ +static bool is_early_mcount_callsite(u32 *instruction) +{ + return false; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = 0xe8410000 | R2_STACK_OFFSET; + *instruction = PPC_INST_LD_TOC; return 1; } @@ -586,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOENT; if (!restore_r2((u32 *)location + 1, me)) return -ENOEXEC; + + squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); -- cgit v1.1 From 4eb0799ff93b54dd40c0c5bb06f60f1eb015635b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 9 Feb 2016 15:50:23 +1100 Subject: powerpc/eeh: Reworked eeh_pe_bus_get() The original implementation is ugly: unnecessary if statements and "out" tag. This reworks the function to avoid above weaknesses. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_pe.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 98f8180..9066afc 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -920,25 +920,21 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe) */ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { - struct pci_bus *bus = NULL; struct eeh_dev *edev; struct pci_dev *pdev; - if (pe->type & EEH_PE_PHB) { - bus = pe->phb->bus; - } else if (pe->type & EEH_PE_BUS || - pe->type & EEH_PE_DEVICE) { - if (pe->state & EEH_PE_PRI_BUS) { - bus = pe->bus; - goto out; - } + if (pe->type & EEH_PE_PHB) + return pe->phb->bus; - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - pdev = eeh_dev_to_pci_dev(edev); - if (pdev) - bus = pdev->bus; - } + /* The primary bus might be cached during probe time */ + if (pe->state & EEH_PE_PRI_BUS) + return pe->bus; + + /* Retrieve the parent PCI bus of first (top) PCI device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + return pdev->bus; -out: - return bus; + return NULL; } -- cgit v1.1 From 971427f5827d5a013965878e196d6930a977e8a7 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:05 +1100 Subject: powerpc/pci: Remove VFs prior to PF As commit ac205b7bb72f ("PCI: make sriov work with hotplug remove") indicates, VFs which is on the same PCI bus as their PF, should be removed before the PF. Otherwise, we might run into kernel crash at PCI unplugging time. This applies the above pattern to powerpc PCI hotplug path. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 7f9ed0c..59c4361 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -55,7 +55,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus) pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) { pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } -- cgit v1.1 From 51c0e87e9a48d081d7ccb40d7454a0fa2935a424 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:06 +1100 Subject: powerpc/eeh: Cache normal BARs, not windows or IOV BARs This restricts the EEH address cache to use only the first 7 BARs. This makes __eeh_addr_cache_insert_dev() ignore PCI bridge window and IOV BARs. As the result of this change, eeh_addr_cache_get_dev() will return VFs from VF's resource addresses instead of parent PFs. This also removes PCI bridge check as we limit __eeh_addr_cache_insert_dev() to 7 BARs and this effectively excludes PCI bridges from being cached. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_cache.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index a1e86e1..ddbcfab 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -195,8 +195,11 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) return; } - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + /* + * Walk resources on this device, poke the first 7 (6 normal BAR and 1 + * ROM BAR) into the tree. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { resource_size_t start = pci_resource_start(dev,i); resource_size_t end = pci_resource_end(dev,i); unsigned long flags = pci_resource_flags(dev,i); @@ -222,10 +225,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev) { unsigned long flags; - /* Ignore PCI bridges */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) - return; - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); __eeh_addr_cache_insert_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -- cgit v1.1 From 39218cd00ebf08b16edf015adc363de42d9ad612 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:07 +1100 Subject: powerpc/eeh: EEH device for VF VFs and their corresponding pdn are created and released dynamically when their PF's SRIOV capability is enabled and disabled. This creates and releases EEH devices for VFs when creating and releasing their pdn instances, which means EEH devices and pdn instances have same life cycle. Also, VF's EEH device is identified by (struct eeh_dev::physfn). Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_dn.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index b3b4df9..e23bdf7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -179,6 +179,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; + struct eeh_dev *edev; int i; /* Only support IOV for now */ @@ -204,6 +205,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) __func__, i); return NULL; } + + /* Create the EEH device for the VF */ + eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); + edev = pdn_to_eeh_dev(pdn); + BUG_ON(!edev); + edev->physfn = pdev; } #endif /* CONFIG_PCI_IOV */ @@ -215,6 +222,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; + struct eeh_dev *edev; int i; /* @@ -256,6 +264,13 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; + /* Release EEH device for the VF */ + edev = pdn_to_eeh_dev(pdn); + if (edev) { + pdn->edev = NULL; + kfree(edev); + } + if (!list_empty(&pdn->list)) list_del(&pdn->list); -- cgit v1.1 From c29fa27d26e3189009ba42786f0c0dce14a90940 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:08 +1100 Subject: powerpc/eeh: Create PE for VFs This creates PEs for VFs in the weak function pcibios_bus_add_device(). Those PEs for VFs are identified with newly introduced flag EEH_PE_VF so that we treat them differently during EEH recovery. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_pe.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 9066afc..eea48d8 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) * EEH device already having associated PE, but * the direct parent EEH device doesn't have yet. */ - pdn = pdn ? pdn->parent : NULL; + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; while (pdn) { /* We're poking out of PCI territory */ parent = pdn_to_eeh_dev(pdn); @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) } /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (edev->physfn) + pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + else + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; -- cgit v1.1 From 9312bc5bab5907937db20c9f8c094d0c02dd78db Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:09 +1100 Subject: powerpc/powernv: Support EEH reset for VF PE PEs for VFs don't have primary bus. So they have to have their own reset backend, which is used during EEH recovery. The patch implements the reset backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained in the PE. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8c6005c..0d72462 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -761,7 +761,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); eeh_unfreeze_pe(pe, false); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; @@ -769,14 +770,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: -- cgit v1.1 From 67086e32b56481531ab1292b284e074b1a8d764c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:11 +1100 Subject: powerpc/eeh: powerpc/eeh: Support error recovery for VF PE PFs are enumerated on PCI bus, while VFs are created by PF's driver. In EEH recovery, it has two cases: 1. Device and driver is EEH aware, error handlers are called. 2. Device and driver is not EEH aware, un-plug the device and plug it again by enumerating it. The special thing happens on the second case. For a PF, we could use the original pci core to enumerate the bus, while for VF we need to record the VFs which aer un-plugged then plug it again. Also The patch caches the VF index in pci_dn, which can be used to calculate VF's bus, device and function number. Those information helps to locate the VF's PCI device instance when doing hotplug during EEH recovery if necessary. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 8 +++ arch/powerpc/kernel/eeh_dev.c | 1 + arch/powerpc/kernel/eeh_driver.c | 137 ++++++++++++++++++++++++++++++++------- arch/powerpc/kernel/pci_dn.c | 4 +- 4 files changed, 124 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0d72462..b7338a9 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1246,6 +1246,14 @@ void eeh_remove_device(struct pci_dev *dev) * from the parent PE during the BAR resotre. */ edev->pdev = NULL; + + /* + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. + */ + edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) eeh_rmv_from_parent_pe(edev); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index aabba94..7815095 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) edev->pdn = pdn; edev->phb = phb; INIT_LIST_HEAD(&edev->list); + INIT_LIST_HEAD(&edev->rmv_list); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 650cfb3..c0fe7a6 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -34,6 +34,11 @@ #include #include +struct eeh_rmv_data { + struct list_head edev_list; + int removed; +}; + /** * eeh_pcid_name - Retrieve name of PCI device driver * @pdev: PCI device @@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; eeh_pcid_put(dev); return NULL; } @@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || + (!edev->in_error)) { eeh_pcid_put(dev); return NULL; } @@ -326,6 +333,7 @@ static void *eeh_report_resume(void *data, void *userdata) { struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + bool was_in_error; struct pci_driver *driver; if (!dev || eeh_dev_removed(edev)) @@ -335,11 +343,13 @@ static void *eeh_report_resume(void *data, void *userdata) driver = eeh_pcid_get(dev); if (!driver) return NULL; + was_in_error = edev->in_error; + edev->in_error = false; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; @@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_add_virt_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!(edev->physfn)) { + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", + __func__, edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + return NULL; + } + + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + +#ifdef CONFIG_PPC_POWERNV + pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0); +#endif + return NULL; +} + static void *eeh_rmv_device(void *data, void *userdata) { struct pci_driver *driver; struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - int *removed = (int *)userdata; + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; + int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -416,7 +454,8 @@ static void *eeh_rmv_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); - if (driver->err_handler && + if (removed && + driver->err_handler && driver->err_handler->error_detected && driver->err_handler->slot_reset) return NULL; @@ -427,11 +466,29 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_name(dev)); edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - (*removed)++; + if (removed) + (*removed)++; - pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(dev); - pci_unlock_rescan_remove(); + if (edev->physfn) { +#ifdef CONFIG_PPC_POWERNV + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); + edev->pdev = NULL; + + /* + * We have to set the VF PE number to invalid one, which is + * required to plug the VF successfully. + */ + pdn->pe_number = IODA_INVALID_PE; +#endif + if (rmv_data) + list_add(&edev->rmv_list, &rmv_data->edev_list); + } else { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); + } return NULL; } @@ -545,11 +602,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + struct eeh_rmv_data *rmv_data) { struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc, removed = 0; + int cnt, rc; + struct eeh_dev *edev; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -563,12 +622,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); + } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); } /* @@ -610,14 +673,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(bus); - } else if (frozen_bus && removed) { + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(bus); + } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(frozen_bus); + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -636,8 +707,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; + struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { @@ -692,7 +765,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus, NULL); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -744,7 +817,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL); + rc = eeh_reset_device(pe, NULL, &rmv_data); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -764,6 +837,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) goto hard_fail; } + /* + * For those hot removed VFs, we should add back them after PF get + * recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { + eeh_add_virt_device(edev, NULL); + list_del(&edev->rmv_list); + } + /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); @@ -803,12 +885,17 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); - pci_unlock_rescan_remove(); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e23bdf7..38102cb 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, struct pci_dev *pdev, + int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->busno = busno; pdn->devfn = devfn; #ifdef CONFIG_PPC_POWERNV + pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; #endif INIT_LIST_HEAD(&pdn->child_list); @@ -197,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { - pdn = add_one_dev_pci_data(parent, NULL, + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { -- cgit v1.1 From 2311cca55589ae7889071e11e18c9260b68314e2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 4 Mar 2016 10:53:12 +1100 Subject: powerpc/eeh: Don't propagate error to guest When EEH error happened to the parent PE of those PEs that have been passed through to guest, the error is propagated to guest domain and the VFIO driver's error handlers are called. It's not correct as the error in the host domain shouldn't be propagated to guests and affect them. This adds one more limitation when calling EEH error handlers. If the PE has been passed through to guest, the error handlers won't be called. Signed-off-by: Gavin Shan Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index c0fe7a6..6c59de8 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -195,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_frozen; @@ -237,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; driver = eeh_pcid_get(dev); @@ -277,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; @@ -336,7 +336,7 @@ static void *eeh_report_resume(void *data, void *userdata) bool was_in_error; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; @@ -375,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_perm_failure; -- cgit v1.1 From 3fa7bf7229993eac65fd6ade2ffc5f75150b40e1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 4 Mar 2016 10:53:13 +1100 Subject: powerpc/eeh: Don't remove passed VFs When we have partial hotplug as part of the error recovery on PF, the VFs that are bound with vfio-pci driver will experience hotplug. That's not allowed. This checks if the VF PE is passed or not. If it does, we leave the VF without removing it. Signed-off-by: Gavin Shan Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6c59de8..fb6207d 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -455,6 +455,9 @@ static void *eeh_rmv_device(void *data, void *userdata) if (driver) { eeh_pcid_put(dev); if (removed && + eeh_pe_passed(edev->pe)) + return NULL; + if (removed && driver->err_handler && driver->err_handler->error_detected && driver->err_handler->slot_reset) -- cgit v1.1 From eca036ee1b4ea0ce4f311f4d8cb73731ef2f0b26 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 4 Mar 2016 10:53:14 +1100 Subject: powerpc/eeh: Synchronize recovery in host/guest When passing through SRIOV VFs to guest, we possibly encounter EEH error on PF. In this case, the VF PEs are put into frozen state. The error could be reported to guest before it's captured by the host. That means the guest could attempt to recover errors on VFs before host gets chance to recover errors on PFs. The VFs won't be recovered successfully. This enforces the recovery order for above case: the recovery on child PE in guest is hold until the recovery on parent PE in host is completed. Signed-off-by: Gavin Shan Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b7338a9..2e39a4d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1548,6 +1548,17 @@ int eeh_pe_get_state(struct eeh_pe *pe) if (!eeh_ops || !eeh_ops->get_state) return -ENOENT; + /* + * If the parent PE is owned by the host kernel and is undergoing + * error recovery, we should return the PE state as temporarily + * unavailable so that the error recovery on the guest is suspended + * until the recovery completes on the host. + */ + if (pe->parent && + !(pe->state & EEH_PE_REMOVED) && + (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return EEH_PE_STATE_UNAVAIL; + result = eeh_ops->get_state(pe, NULL); rst_active = !!(result & EEH_STATE_RESET_ACTIVE); dma_en = !!(result & EEH_STATE_DMA_ENABLED); -- cgit v1.1 From b6c7347f2f6176fa3225903fbfe63b1ccd01ec9d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 26 Feb 2016 11:14:00 +1100 Subject: powerpc/eeh: Remove duplicated check in eeh_dump_pe_log() When eeh_dump_pe_log() is only called by eeh_slot_error_detail(), we already have the check that the PE isn't in PCI config blocked state in eeh_slot_error_detail(). So we needn't the duplicated check in eeh_dump_pe_log(). This removes the duplicated check in eeh_dump_pe_log(). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2e39a4d..5591f05a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag) struct eeh_dev *edev, *tmp; size_t *plen = flag; - /* If the PE's config space is blocked, 0xFF's will be - * returned. It's pointless to collect the log in this - * case. - */ - if (pe->state & EEH_PE_CFG_BLOCKED) - return NULL; - eeh_pe_for_each_dev(pe, edev, tmp) *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, EEH_PCI_REGS_LOG_LEN - *plen); -- cgit v1.1 From 949e9b827eb4736d96df520c67d07a54c64e99b8 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 23 Oct 2015 17:19:46 +1100 Subject: powerpc/eeh: eeh_pci_enable(): fix checking of post-request state In eeh_pci_enable(), after making the request to set the new options, we call eeh_ops->wait_state() to check that the request finished successfully. At the moment, if eeh_ops->wait_state() returns 0, we return 0 without checking that it reflects the expected outcome. This can lead to callers further up the chain incorrectly assuming the slot has been successfully unfrozen and continuing to attempt recovery. On powernv, this will occur if pnv_eeh_get_pe_state() or pnv_eeh_get_phb_state() return 0, which in turn occurs if the relevant OPAL call returns OPAL_EEH_STOPPED_MMIO_DMA_FREEZE or OPAL_EEH_PHB_ERROR respectively. On pseries, this will occur if pseries_eeh_get_state() returns 0, which in turn occurs if RTAS reports that the PE is in the MMIO Stopped and DMA Stopped states. Obviously, none of these cases represent a successful completion of a request to thaw MMIO or DMA. Fix the check so that a wait_state() return value of 0 won't be considered successful for the EEH_OPT_THAW_MMIO or EEH_OPT_THAW_DMA cases. Signed-off-by: Andrew Donnellan Acked-by: Gavin Shan Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 5591f05a..6544017 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -670,7 +670,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) /* Check if the request is finished successfully */ if (active_flag) { rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) + if (rc < 0) return rc; if (rc & active_flag) -- cgit v1.1 From 921fff351c3a7d2895f22d4a7a36b2f667709b9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 3 Feb 2016 23:34:21 +0100 Subject: powerpc/8xx: CONFIG_DEBUG_PAGEALLOC requires ITLBmiss for kernel addresses When CONFIG_DEBUG_PAGEALLOC is activated, the initial TLB mapping gets flushed to track accesses to wrong areas. Therefore, kernel addresses will also generate ITLB misses. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 78c1eba..e629e28 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -329,7 +329,7 @@ InstructionTLBMiss: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ -- cgit v1.1 From 913a6b3d10d85a032eff2f31254c35e0976f5e32 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:07:48 +0100 Subject: powerpc/8xx: Save r3 all the time in DTLB miss handler We are spending between 40 and 160 cycles with a mean of 65 cycles in the DTLB handling routine (measured with mftbl) so make it more simple althought it adds one instruction. With this modification, we get three registers available at all time, which will help with following patch. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e629e28..a89492e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -385,23 +385,20 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: -#ifdef CONFIG_8xx_CPU6 mtspr SPRN_SPRG_SCRATCH2, r3 -#endif EXCEPTION_PROLOG_0 - mfcr r10 + mfcr r3 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - mfspr r11, SPRN_MD_EPN - IS_KERNEL(r11, r11) + mfspr r10, SPRN_MD_EPN + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r10 - mfspr r10, SPRN_MD_EPN + mtcr r3 /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -453,9 +450,7 @@ DataStoreTLBMiss: MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#ifdef CONFIG_8xx_CPU6 mfspr r3, SPRN_SPRG_SCRATCH2 -#endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi -- cgit v1.1 From a372acfac51e0d5858f8f6f84da52defcabf054b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:07:50 +0100 Subject: powerpc/8xx: Map linear kernel RAM with 8M pages On a live running system (VoIP gateway for Air Trafic Control), over a 10 minutes period (with 277s idle), we get 87 millions DTLB misses and approximatly 35 secondes are spent in DTLB handler. This represents 5.8% of the overall time and even 10.8% of the non-idle time. Among those 87 millions DTLB misses, 15% are on user addresses and 85% are on kernel addresses. And within the kernel addresses, 93% are on addresses from the linear address space and only 7% are on addresses from the virtual address space. MPC8xx has no BATs but it has 8Mb page size. This patch implements mapping of kernel RAM using 8Mb pages, on the same model as what is done on the 40x. In 4k pages mode, each PGD entry maps a 4Mb area: we map every two entries to the same 8Mb physical page. In each second entry, we add 4Mb to the page physical address to ease life of the FixupDAR routine. This is just ignored by HW. In 16k pages mode, each PGD entry maps a 64Mb area: each PGD entry will point to the first page of the area. The DTLB handler adds the 3 bits from EPN to map the correct page. With this patch applied, we now get only 13 millions TLB misses during the 10 minutes period. The idle time has increased to 313s and the overall time spent in DTLB miss handler is 6.3s, which represents 1% of the overall time and 2.2% of non-idle time. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a89492e..87d1f5f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -398,11 +398,13 @@ DataStoreTLBMiss: BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r3 /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt- 28,DTLBMiss8M /* bit 28 = Large page (8M) */ + mtcr r3 /* We have a pte table, so load fetch the pte from the table. */ @@ -455,6 +457,29 @@ DataStoreTLBMiss: EXCEPTION_EPILOG_0 rfi +DTLBMiss8M: + mtcr r3 + ori r11, r11, MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) +#ifdef CONFIG_PPC_16K_PAGES + /* + * In 16k pages mode, each PGD entry defines a 64M block. + * Here we select the 8M page within the block. + */ + rlwimi r11, r10, 0, 0x03800000 +#endif + rlwinm r10, r11, 0, 0xff800000 + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + + li r11, RPN_PATTERN + mfspr r3, SPRN_SPRG_SCRATCH2 + mtspr SPRN_DAR, r11 /* Tag DAR */ + EXCEPTION_EPILOG_0 + rfi + + /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. @@ -532,13 +557,15 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt 28,200f /* bit 28 = Large page (8M) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwz r11, 0(r11) /* Get the pte */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 - lwz r11,0(r11) +201: lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ @@ -557,6 +584,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ + /* concat physical page address(r11) and page offset(r10) */ +200: rlwimi r11, r10, 0, 32 - (PAGE_SHIFT << 1), 31 + b 201b + 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 -- cgit v1.1 From 63e9e1c28fa6fe714364c7817e60dbaed3cec95e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:16 +0100 Subject: powerpc/8xx: remove special handling of CPU6 errata in set_dec() CPU6 ERRATA is now handled directly in mtspr(), so we can use the standard set_dec() fonction in all cases. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 87d1f5f..4a5e56d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -926,24 +926,6 @@ _GLOBAL(set_context) SYNC blr -#ifdef CONFIG_8xx_CPU6 -/* It's here because it is unique to the 8xx. - * It is important we get called with interrupts disabled. I used to - * do that, but it appears that all code that calls this already had - * interrupt disabled. - */ - .globl set_dec_cpu6 -set_dec_cpu6: - lis r7, cpu6_errata_word@h - ori r7, r7, cpu6_errata_word@l - li r4, 0x2c00 - stw r4, 8(r7) - lwz r4, 8(r7) - mtspr 22, r3 /* Update Decrementer */ - SYNC - blr -#endif - /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the data segment, -- cgit v1.1 From a7761fe48993f103d6deac6037bf786bd1db0501 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:18 +0100 Subject: powerpc/8xx: rewrite set_context() in C There is no real need to have set_context() in assembly. Now that we have mtspr() handling CPU6 ERRATA directly, we can rewrite set_context() in C language for easier maintenance. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 44 ------------------------------------------ 1 file changed, 44 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4a5e56d..80c6947 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -883,50 +883,6 @@ initial_mmu: /* - * Set up to use a given MMU context. - * r3 is context number, r4 is PGD pointer. - * - * We place the physical address of the new task page directory loaded - * into the MMU base register, and set the ASID compare register with - * the new "context." - */ -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lis r5, KERNELBASE@h - lwz r5, 0xf0(r5) - stw r4, 0x4(r5) -#endif - - /* Register M_TW will contain base address of level 1 table minus the - * lower part of the kernel PGDIR base address, so that all accesses to - * level 1 table are done relative to lower part of kernel PGDIR base - * address. - */ - li r5, (swapper_pg_dir-PAGE_OFFSET)@l - sub r4, r4, r5 - tophys (r4, r4) -#ifdef CONFIG_8xx_CPU6 - lis r6, cpu6_errata_word@h - ori r6, r6, cpu6_errata_word@l - li r7, 0x3f80 - stw r7, 12(r6) - lwz r7, 12(r6) -#endif - mtspr SPRN_M_TW, r4 /* Update pointeur to level 1 table */ -#ifdef CONFIG_8xx_CPU6 - li r7, 0x3380 - stw r7, 12(r6) - lwz r7, 12(r6) -#endif - mtspr SPRN_M_CASID, r3 /* Update context */ - SYNC - blr - -/* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the data segment, * which is page-aligned. -- cgit v1.1 From 766d45cbeecc383b8ee230370b316d0b1e30d915 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:21 +0100 Subject: powerpc/8xx: rewrite flush_instruction_cache() in C On PPC8xx, flushing instruction cache is performed by writing in register SPRN_IC_CST. This registers suffers CPU6 ERRATA. The patch rewrites the fonction in C so that CPU6 ERRATA will be handled transparently Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/misc_32.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index be8edd6..7d1284f 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -296,12 +296,9 @@ _GLOBAL(real_writeb) * Flush instruction cache. * This is a no-op on the 601. */ +#ifndef CONFIG_PPC_8xx _GLOBAL(flush_instruction_cache) -#if defined(CONFIG_8xx) - isync - lis r5, IDC_INVALL@h - mtspr SPRN_IC_CST, r5 -#elif defined(CONFIG_4xx) +#if defined(CONFIG_4xx) #ifdef CONFIG_403GCX li r3, 512 mtctr r3 @@ -334,9 +331,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) mfspr r3,SPRN_HID0 ori r3,r3,HID0_ICFI mtspr SPRN_HID0,r3 -#endif /* CONFIG_8xx/4xx */ +#endif /* CONFIG_4xx */ isync blr +#endif /* CONFIG_PPC_8xx */ /* * Write any modified data cache blocks out to memory -- cgit v1.1 From 5736f96d12dd4204d3aac43bf7b512ab434b904f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:25 +0100 Subject: powerpc32: Remove clear_pages() and define clear_page() inline clear_pages() is never used expect by clear_page, and PPC32 is the only architecture (still) having this function. Neither PPC64 nor any other architecture has it. This patch removes clear_pages() and moves clear_page() function inline (same as PPC64) as it only is a few isns Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/misc_32.S | 16 ---------------- arch/powerpc/kernel/ppc_ksyms_32.c | 1 - 2 files changed, 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7d1284f..181afc1 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -517,22 +517,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #endif /* CONFIG_BOOKE */ /* - * Clear pages using the dcbz instruction, which doesn't cause any - * memory traffic (except to write out any cache lines which get - * displaced). This only works on cacheable memory. - * - * void clear_pages(void *page, int order) ; - */ -_GLOBAL(clear_pages) - li r0,PAGE_SIZE/L1_CACHE_BYTES - slw r0,r0,r4 - mtctr r0 -1: dcbz 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - blr - -/* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of * the destination into cache). This requires that the destination diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c index 30ddd8a..2bfaafe 100644 --- a/arch/powerpc/kernel/ppc_ksyms_32.c +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -10,7 +10,6 @@ #include #include -EXPORT_SYMBOL(clear_pages); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); -- cgit v1.1 From affe587bacf48e328fb8d4c5ef9007b9c555b128 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:27 +0100 Subject: powerpc32: move xxxxx_dcache_range() functions inline flush/clean/invalidate _dcache_range() functions are all very similar and are quite short. They are mainly used in __dma_sync() perf_event locate them in the top 3 consumming functions during heavy ethernet activity They are good candidate for inlining, as __dma_sync() does almost nothing but calling them Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/misc_32.S | 65 ----------------------------------------- arch/powerpc/kernel/ppc_ksyms.c | 2 ++ 2 files changed, 2 insertions(+), 65 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 181afc1..09e1e5d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -375,71 +375,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) isync blr /* - * Write any modified data cache blocks out to memory. - * Does not invalidate the corresponding cache lines (especially for - * any corresponding instruction cache). - * - * clean_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(clean_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbst 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ - blr - -/* - * Write any modified data cache blocks out to memory and invalidate them. - * Does not invalidate the corresponding instruction cache blocks. - * - * flush_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbf 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ - blr - -/* - * Like above, but invalidate the D-cache. This is used by the 8xx - * to invalidate the cache so the PPC core doesn't get stale data - * from the CPM (no cache snooping here :-). - * - * invalidate_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(invalidate_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbi 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbi's to get to ram */ - blr - -/* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* * snoop from the data cache. diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef7024da..9f01e28 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -6,7 +6,9 @@ #include #include +#ifdef CONFIG_PPC64 EXPORT_SYMBOL(flush_dcache_range); +#endif EXPORT_SYMBOL(flush_icache_range); EXPORT_SYMBOL(empty_zero_page); -- cgit v1.1 From 716fa91d19fb122a1392b362e85929e8385b1fd2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:31 +0100 Subject: powerpc32: small optimisation in flush_icache_range() Inlining of _dcache_range() functions has shown that the compiler does the same thing a bit better with one insn less Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/misc_32.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 09e1e5d..3ec5a22 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -348,10 +348,9 @@ BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 + rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT subf r4,r3,r4 - add r4,r4,r5 + addi r4,r4,L1_CACHE_BYTES - 1 srwi. r4,r4,L1_CACHE_SHIFT beqlr mtctr r4 -- cgit v1.1 From 737b01fca3f853a1f3373a79c06e4ee3a574e5b7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:33 +0100 Subject: powerpc32: Remove one insn in mulhdu Remove one instruction in mulhdu Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/misc_32.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 3ec5a22..bf5160f 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -91,17 +91,16 @@ _GLOBAL(mulhdu) addc r7,r0,r7 addze r4,r4 1: beqlr cr1 /* all done if high part of A is 0 */ - mr r10,r3 mullw r9,r3,r5 - mulhwu r3,r3,r5 + mulhwu r10,r3,r5 beq 2f - mullw r0,r10,r6 - mulhwu r8,r10,r6 + mullw r0,r3,r6 + mulhwu r8,r3,r6 addc r7,r0,r7 adde r4,r4,r8 - addze r3,r3 + addze r10,r10 2: addc r4,r4,r9 - addze r3,r3 + addze r3,r10 blr /* -- cgit v1.1 From 7a25d91214cb22e642b9ed6e4434bfaf74adad28 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 15 Mar 2016 01:47:38 -0500 Subject: powerpc/book3e-64: Use hardcoded mttmr opcode This preserves the ability to build using older binutils (reportedly <= 2.22). Fixes: 6becef7ea04a ("powerpc/mpc85xx: Add CPU hotplug support for E6500") Signed-off-by: Scott Wood Cc: chenhui.zhao@freescale.com Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_64.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2916283..4286775 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -41,6 +41,7 @@ #include #include #include +#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -207,12 +208,12 @@ _GLOBAL(book3e_start_thread) /* If the thread id is invalid, just exit. */ b 13f 10: - mttmr TMRN_IMSR0, r5 - mttmr TMRN_INIA0, r4 + MTTMR(TMRN_IMSR0, 5) + MTTMR(TMRN_INIA0, 4) b 12f 11: - mttmr TMRN_IMSR1, r5 - mttmr TMRN_INIA1, r4 + MTTMR(TMRN_IMSR1, 5) + MTTMR(TMRN_INIA1, 4) 12: isync li r6, 1 -- cgit v1.1 From 6e669f085d595cb6053920832c89f1a13067db44 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Wed, 16 Mar 2016 13:29:30 +1100 Subject: powerpc: Fix unrecoverable SLB miss during restore_math() Commit 70fe3d9 "powerpc: Restore FPU/VEC/VSX if previously used" introduces a call to restore_math() late in the syscall return path, after MSR_RI has been cleared. The MSR_RI flag is used to indicate whether the kernel can take another exception or not. A cleared MSR_RI flag indicates that the kernel cannot. Unfortunately when a machine is under SLB pressure an SLB miss can occur in restore_math() which (with MSR_RI cleared) leads to an unrecoverable exception. Unrecoverable exception 4100 at c0000000000088d8 cpu 0x0: Vector: 4100 at [c0000003fa473b20] pc: c0000000000088d8: .load_vr_state+0x70/0x110 lr: c00000000000f710: .restore_math+0x130/0x188 sp: c0000003fa473da0 msr: 9000000002003030 current = 0xc0000007f876f180 paca = 0xc00000000fff0000 softe: 0 irq_happened: 0x01 pid = 1944, comm = K08umountfs [link register ] c00000000000f710 .restore_math+0x130/0x188 [c0000003fa473da0] c0000003fa473e30 (unreliable) [c0000003fa473e30] c000000000007b6c system_call+0x84/0xfc The clearing of MSR_RI is actually an optimisation to avoid multiple MSR writes, what must be disabled are interrupts. See comment in entry_64.S: /* * For performance reasons we clear RI the same time that we * clear EE. We only need to clear RI just before we restore r13 * below, but batching it with EE saves us one expensive mtmsrd call. * We have to be careful to restore RI if we branch anywhere from * here (eg syscall_exit_work). */ At the point of calling restore_math() r13 has not been restored, as such, the quick fix of turning MSR_RI back on for the call to restore_math() will eliminate the occurrence of an unrecoverable exception. We'd like to do a better fix in future. Fixes: 70fe3d980f5f ("powerpc: Restore FPU/VEC/VSX if previously used") Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b9b1253..9916d15 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -218,7 +218,16 @@ system_call: /* label this so stack traces look sane */ bne 3f #endif 2: addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_BOOK3S + mtmsrd r10,1 /* Restore RI */ +#endif bl restore_math +#ifdef CONFIG_PPC_BOOK3S + ld r10,PACAKMSR(r13) + li r9,MSR_RI + andc r11,r10,r9 /* Re-clear RI */ + mtmsrd r11,1 +#endif ld r8,_MSR(r1) ld r3,RESULT(r1) li r11,-MAX_ERRNO -- cgit v1.1