diff options
Diffstat (limited to 'arch/powerpc')
222 files changed, 3287 insertions, 1228 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ccd2556..c7628e9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -141,7 +141,9 @@ AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) + CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) +CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) ifeq ($(CONFIG_PPC_BOOK3S_64),y) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e..26d5d2a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts index 8626615..deb52e4 100644 --- a/arch/powerpc/boot/dts/acadia.dts +++ b/arch/powerpc/boot/dts/acadia.dts @@ -219,6 +219,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts index 0839847..7f5ff41 100644 --- a/arch/powerpc/boot/dts/adder875-redboot.dts +++ b/arch/powerpc/boot/dts/adder875-redboot.dts @@ -178,6 +178,6 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; }; diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts index e4554ca..bd9f33c 100644 --- a/arch/powerpc/boot/dts/adder875-uboot.dts +++ b/arch/powerpc/boot/dts/adder875-uboot.dts @@ -177,6 +177,6 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; }; diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index 7467792..8a7a101 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -410,6 +410,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts index 49ac36b..7124301 100644 --- a/arch/powerpc/boot/dts/amigaone.dts +++ b/arch/powerpc/boot/dts/amigaone.dts @@ -168,6 +168,6 @@ }; chosen { - linux,stdout-path = "/pci@80000000/isa@7/serial@3f8"; + stdout-path = "/pci@80000000/isa@7/serial@3f8"; }; }; diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts index 9198745..e987b5a 100644 --- a/arch/powerpc/boot/dts/asp834x-redboot.dts +++ b/arch/powerpc/boot/dts/asp834x-redboot.dts @@ -304,7 +304,7 @@ chosen { bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2"; - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts index aa68911..538e42b 100644 --- a/arch/powerpc/boot/dts/bamboo.dts +++ b/arch/powerpc/boot/dts/bamboo.dts @@ -295,6 +295,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts index 27f169e..c5beb72 100644 --- a/arch/powerpc/boot/dts/c2k.dts +++ b/arch/powerpc/boot/dts/c2k.dts @@ -361,6 +361,6 @@ }; }; chosen { - linux,stdout-path = &MPSC0; + stdout-path = &MPSC0; }; }; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index f2ad581..a04a4fc 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -237,6 +237,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index c280e75..c3922fc 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -78,7 +78,7 @@ }; rtc@56 { - compatible = "mc,rv3029c2"; + compatible = "microcrystal,rv3029"; reg = <0x56>; }; diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts index ec2d142..5d11e6e 100644 --- a/arch/powerpc/boot/dts/ebony.dts +++ b/arch/powerpc/boot/dts/ebony.dts @@ -332,6 +332,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts index 48bcf71..7a1231d 100644 --- a/arch/powerpc/boot/dts/eiger.dts +++ b/arch/powerpc/boot/dts/eiger.dts @@ -421,7 +421,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts index 53ef06c..4ac9c5a 100644 --- a/arch/powerpc/boot/dts/ep405.dts +++ b/arch/powerpc/boot/dts/ep405.dts @@ -225,6 +225,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts index e2d306a..721cb53 100644 --- a/arch/powerpc/boot/dts/fsl/mvme7100.dts +++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts @@ -146,7 +146,7 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts index 6560283..9311b86 100644 --- a/arch/powerpc/boot/dts/fsp2.dts +++ b/arch/powerpc/boot/dts/fsp2.dts @@ -607,7 +607,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@b0020000"; + stdout-path = "/plb/opb/serial@b0020000"; bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug"; }; }; diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts index 43e6f0c..02bd304 100644 --- a/arch/powerpc/boot/dts/holly.dts +++ b/arch/powerpc/boot/dts/holly.dts @@ -191,6 +191,6 @@ }; chosen { - linux,stdout-path = "/tsi109@c0000000/serial@7808"; + stdout-path = "/tsi109@c0000000/serial@7808"; }; }; diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts index 71d3bb4..b93bf2d 100644 --- a/arch/powerpc/boot/dts/hotfoot.dts +++ b/arch/powerpc/boot/dts/hotfoot.dts @@ -291,6 +291,6 @@ }; chosen { - linux,stdout-path = &UART0; + stdout-path = &UART0; }; }; diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts index 9c94fd7..2e6e3a7 100644 --- a/arch/powerpc/boot/dts/icon.dts +++ b/arch/powerpc/boot/dts/icon.dts @@ -442,6 +442,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@f0000200"; + stdout-path = "/plb/opb/serial@f0000200"; }; }; diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts index 23e9d9b..f706319 100644 --- a/arch/powerpc/boot/dts/iss4xx-mpic.dts +++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts @@ -150,6 +150,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts index 4ff6555..5533aff 100644 --- a/arch/powerpc/boot/dts/iss4xx.dts +++ b/arch/powerpc/boot/dts/iss4xx.dts @@ -111,6 +111,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000200"; + stdout-path = "/plb/opb/serial@40000200"; }; }; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index f913dbe..02629e1 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -505,6 +505,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@f0000200"; + stdout-path = "/plb/opb/serial@f0000200"; }; }; diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts index 8c94290..d9613b7 100644 --- a/arch/powerpc/boot/dts/klondike.dts +++ b/arch/powerpc/boot/dts/klondike.dts @@ -222,6 +222,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@50001000"; + stdout-path = "/plb/opb/serial@50001000"; }; }; diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts index 5d68236..fe6c17c 100644 --- a/arch/powerpc/boot/dts/ksi8560.dts +++ b/arch/powerpc/boot/dts/ksi8560.dts @@ -339,6 +339,6 @@ chosen { - linux,stdout-path = "/soc/cpm/serial@91a00"; + stdout-path = "/soc/cpm/serial@91a00"; }; }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index b5413cb..843f156 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -25,7 +25,7 @@ }; chosen { - linux,stdout-path = &console; + stdout-path = &console; }; cpus { diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts index 6d2cddf..98282c1 100644 --- a/arch/powerpc/boot/dts/mpc8272ads.dts +++ b/arch/powerpc/boot/dts/mpc8272ads.dts @@ -262,6 +262,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@11a00"; + stdout-path = "/soc/cpm/serial@11a00"; }; }; diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts index 34c1f48..4443fac 100644 --- a/arch/powerpc/boot/dts/mpc866ads.dts +++ b/arch/powerpc/boot/dts/mpc866ads.dts @@ -185,6 +185,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@a80"; + stdout-path = "/soc/cpm/serial@a80"; }; }; diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts index 4e93bd9..5b037f5 100644 --- a/arch/powerpc/boot/dts/mpc885ads.dts +++ b/arch/powerpc/boot/dts/mpc885ads.dts @@ -227,6 +227,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@a80"; + stdout-path = "/soc/cpm/serial@a80"; }; }; diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts index 1ecb341..a7eb6d2 100644 --- a/arch/powerpc/boot/dts/mvme5100.dts +++ b/arch/powerpc/boot/dts/mvme5100.dts @@ -179,7 +179,7 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts index 18e7d79..d10b041 100644 --- a/arch/powerpc/boot/dts/obs600.dts +++ b/arch/powerpc/boot/dts/obs600.dts @@ -309,6 +309,6 @@ }; }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts index 0c525ff..a477615 100644 --- a/arch/powerpc/boot/dts/pq2fads.dts +++ b/arch/powerpc/boot/dts/pq2fads.dts @@ -242,6 +242,6 @@ }; chosen { - linux,stdout-path = "/soc/cpm/serial@11a00"; + stdout-path = "/soc/cpm/serial@11a00"; }; }; diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts index 9684c80..e59829c 100644 --- a/arch/powerpc/boot/dts/rainier.dts +++ b/arch/powerpc/boot/dts/rainier.dts @@ -344,7 +344,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; bootargs = "console=ttyS0,115200"; }; }; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index d86a3a4..f3e046f 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -381,7 +381,7 @@ chosen { - linux,stdout-path = "/plb/opb/serial@ef600200"; + stdout-path = "/plb/opb/serial@ef600200"; }; }; diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts index 088361c..7d15f18 100644 --- a/arch/powerpc/boot/dts/sam440ep.dts +++ b/arch/powerpc/boot/dts/sam440ep.dts @@ -288,6 +288,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts index e41b88a..60d211d 100644 --- a/arch/powerpc/boot/dts/sequoia.dts +++ b/arch/powerpc/boot/dts/sequoia.dts @@ -406,7 +406,7 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; bootargs = "console=ttyS0,115200"; }; }; diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts index 2a55573..99f6f54 100644 --- a/arch/powerpc/boot/dts/storcenter.dts +++ b/arch/powerpc/boot/dts/storcenter.dts @@ -137,6 +137,6 @@ }; chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts index 1657ad0..803f1bf 100644 --- a/arch/powerpc/boot/dts/taishan.dts +++ b/arch/powerpc/boot/dts/taishan.dts @@ -422,6 +422,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@40000300"; + stdout-path = "/plb/opb/serial@40000300"; }; }; diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts index 391a4e2..66f1c63 100644 --- a/arch/powerpc/boot/dts/virtex440-ml507.dts +++ b/arch/powerpc/boot/dts/virtex440-ml507.dts @@ -32,7 +32,7 @@ } ; chosen { bootargs = "console=ttyS0 root=/dev/ram"; - linux,stdout-path = &RS232_Uart_1; + stdout-path = &RS232_Uart_1; } ; cpus { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts index 81201d3..3b736ca 100644 --- a/arch/powerpc/boot/dts/virtex440-ml510.dts +++ b/arch/powerpc/boot/dts/virtex440-ml510.dts @@ -26,7 +26,7 @@ } ; chosen { bootargs = "console=ttyS0 root=/dev/ram"; - linux,stdout-path = "/plb@0/serial@83e00000"; + stdout-path = "/plb@0/serial@83e00000"; } ; cpus { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts index 4a9f726..0872862 100644 --- a/arch/powerpc/boot/dts/walnut.dts +++ b/arch/powerpc/boot/dts/walnut.dts @@ -241,6 +241,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index ea9053e..b4f3274 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -304,6 +304,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 646acfb..d5e1442 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -503,6 +503,6 @@ /* Needed for dtbImage boot wrapper compatibility */ chosen { - linux,stdout-path = &serial0; + stdout-path = &serial0; }; }; diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts index 30bb475..5650878 100644 --- a/arch/powerpc/boot/dts/yosemite.dts +++ b/arch/powerpc/boot/dts/yosemite.dts @@ -327,6 +327,6 @@ }; chosen { - linux,stdout-path = "/plb/opb/serial@ef600300"; + stdout-path = "/plb/opb/serial@ef600300"; }; }; diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7330150..d9713ad 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs); void single_step_exception(struct pt_regs *regs); void program_check_exception(struct pt_regs *regs); void alignment_exception(struct pt_regs *regs); +void slb_miss_bad_addr(struct pt_regs *regs); void StackOverflow(struct pt_regs *regs); void nonrecoverable_exception(struct pt_regs *regs); void kernel_fp_unavailable_exception(struct pt_regs *regs); @@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx, long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs); +int sys_debug_setcontext(struct ucontext __user *ctx, + int ndbg, struct sig_dbg_op __user *dbg, + int r6, int r7, int r8, + struct pt_regs *regs); +int +ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp); +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); #endif + +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); long sys_switch_endian(void); notrace unsigned int __check_irq_replay(void); void notrace restore_interrupts(void); @@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64); void _mcount(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 10daa1d..c7c6395 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -35,7 +35,8 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#ifdef __SUBARCH_HAS_LWSYNC +/* The sub-arch has lwsync */ +#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 30a155c..c615abd 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -16,6 +16,7 @@ #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 949d691..4b54230 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -11,6 +11,12 @@ #define H_PUD_INDEX_SIZE 9 #define H_PGD_INDEX_SIZE 9 +/* + * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB + * Hence also limit max EA bits to 64TB. + */ +#define MAX_EA_BITS_PER_CONTEXT 46 + #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) @@ -34,6 +40,14 @@ #define H_PAGE_COMBO 0x0 #define H_PTE_FRAG_NR 0 #define H_PTE_FRAG_SIZE_SHIFT 0 + +/* memory key bits, only 8 keys supported */ +#define H_PTE_PKEY_BIT0 0 +#define H_PTE_PKEY_BIT1 0 +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 + /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ @@ -63,7 +77,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd) * keeping the prototype consistent across the two formats. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, unsigned long hidx, + int offset) { return (hidx << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 338b7da..eb39313 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -4,10 +4,16 @@ #define H_PTE_INDEX_SIZE 8 #define H_PMD_INDEX_SIZE 10 -#define H_PUD_INDEX_SIZE 7 +#define H_PUD_INDEX_SIZE 10 #define H_PGD_INDEX_SIZE 8 /* + * Each context is 512TB size. SLB miss for first context/default context + * is handled in the hotpath. + */ +#define MAX_EA_BITS_PER_CONTEXT 49 + +/* * 64k aligned address free up few of the lower bits of RPN for us * We steal that here. For more deatils look at pte_pfn/pfn_pte() */ @@ -16,6 +22,13 @@ #define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ +/* memory key bits. */ +#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 +#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 + /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details @@ -45,7 +58,7 @@ * generic accessors and iterators here */ #define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) { real_pte_t rpte; unsigned long *hidxp; @@ -59,7 +72,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) */ smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + hidxp = (unsigned long *)(ptep + offset); rpte.hidx = *hidxp; return rpte; } @@ -86,9 +99,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) * expected to modify the PTE bits accordingly and commit the PTE to memory. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, + unsigned long hidx, int offset) { - unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + unsigned long *hidxp = (unsigned long *)(ptep + offset); rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); @@ -140,13 +154,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a } #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE) #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ (sizeof(unsigned long) << PMD_INDEX_SIZE)) #else #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#ifdef CONFIG_HUGETLB_PAGE +#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \ + (sizeof(unsigned long) << PUD_INDEX_SIZE)) +#else #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#endif #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index b1ace96..cc8cd65 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -23,7 +23,8 @@ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ + defined(CONFIG_PPC_64K_PAGES) /* * only with hash 64k we need to use the second half of pmd page table * to store pointer to deposited pgtable_t @@ -33,6 +34,16 @@ #define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE #endif /* + * We store the slot details in the second half of page table. + * Increase the pud level table so that hugetlb ptes can be stored + * at pud level. + */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1) +#else +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) +#endif +/* * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 0abeb0e..5094696 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -80,20 +80,53 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; + typedef struct { - mm_context_id_t id; + union { + /* + * We use id as the PIDR content for radix. On hash we can use + * more than one id. The extended ids are used when we start + * having address above 512TB. We allocate one extended id + * for each 512TB. The new id is then used with the 49 bit + * EA to build a new VA. We always use ESID_BITS_1T_MASK bits + * from EA and new context ids to build the new VAs. + */ + mm_context_id_t id; + mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; + }; u16 user_psize; /* page size index */ /* Number of bits in the mm_cpumask */ atomic_t active_cpus; + /* Number of users of the external (Nest) MMU */ + atomic_t copros; + /* NPU NMMU context */ struct npu_context *npu_context; #ifdef CONFIG_PPC_MM_SLICES - u64 low_slices_psize; /* SLB page size encodings */ + /* SLB page size encodings*/ + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long slb_addr_limit; +# ifdef CONFIG_PPC_64K_PAGES + struct slice_mask mask_64k; +# endif + struct slice_mask mask_4k; +# ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_16m; + struct slice_mask mask_16g; +# endif #else u16 sllp; /* SLB page size encoding */ #endif @@ -174,5 +207,25 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +static inline int get_ea_context(mm_context_t *ctx, unsigned long ea) +{ + int index = ea >> MAX_EA_BITS_PER_CONTEXT; + + if (likely(index < ARRAY_SIZE(ctx->extended_id))) + return ctx->extended_id[index]; + + /* should never happen */ + WARN_ON(1); + return 0; +} + +static inline unsigned long get_user_vsid(mm_context_t *ctx, + unsigned long ea, int ssize) +{ + unsigned long context = get_ea_context(ctx, ea); + + return get_vsid(context, ea, ssize); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 1fcfa42..558a159 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -73,10 +73,26 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + pgd_t *pgd; + if (radix_enabled()) return radix__pgd_alloc(mm); - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); + + pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); + /* + * With hugetlb, we don't clear the second half of the page table. + * If we share the same slab cache with the pmd or pud level table, + * we need to make sure we zero out the full table on alloc. + * With 4K we don't store slot in the second half. Hence we don't + * need to do this for 4k. + */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \ + ((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) || \ + (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX)) + memset(pgd, 0, PGD_TABLE_SIZE); +#endif + return pgd; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -93,13 +109,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), pgtable_gfp_flags(mm, GFP_KERNEL)); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -115,7 +131,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, * ahead and flush the page walk cache */ flush_tlb_pgtable(tlb, address); - pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); + pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5101772..47b5ffc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -60,25 +60,6 @@ /* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 57 -#ifdef CONFIG_PPC_MEM_KEYS -#ifdef CONFIG_PPC_64K_PAGES -#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 -#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 -#else /* CONFIG_PPC_64K_PAGES */ -#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ -#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ -#endif /* CONFIG_PPC_64K_PAGES */ -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 -#else /* CONFIG_PPC_MEM_KEYS */ -#define H_PTE_PKEY_BIT0 0 -#define H_PTE_PKEY_BIT1 0 -#define H_PTE_PKEY_BIT2 0 -#define H_PTE_PKEY_BIT3 0 -#define H_PTE_PKEY_BIT4 0 -#endif /* CONFIG_PPC_MEM_KEYS */ - /* * Max physical address bit we will use for now. * @@ -232,11 +213,13 @@ extern unsigned long __pmd_index_size; extern unsigned long __pud_index_size; extern unsigned long __pgd_index_size; extern unsigned long __pmd_cache_index; +extern unsigned long __pud_cache_index; #define PTE_INDEX_SIZE __pte_index_size #define PMD_INDEX_SIZE __pmd_index_size #define PUD_INDEX_SIZE __pud_index_size #define PGD_INDEX_SIZE __pgd_index_size #define PMD_CACHE_INDEX __pmd_cache_index +#define PUD_CACHE_INDEX __pud_cache_index /* * Because of use of pte fragments and THP, size of page table * are not always derived out of index size above. @@ -348,7 +331,7 @@ extern unsigned long pci_io_base; */ #ifndef __real_pte -#define __real_pte(e,p) ((real_pte_t){(e)}) +#define __real_pte(e, p, o) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h new file mode 100644 index 0000000..db0deda --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H +#define _ASM_POWERPC_BOOK3S_64_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 28 +#define SLICE_LOW_TOP (0x100000000ul) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 40 +#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) +#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) + +#else /* CONFIG_PPC_MM_SLICES */ + +#define get_slice_psize(mm, addr) ((mm)->context.user_psize) +#define slice_set_user_psize(mm, psize) \ +do { \ + (mm)->context.user_psize = (psize); \ + (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ +} while (0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8eea90f..19b45ba 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -47,9 +47,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); -extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size); -extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index b77f036..11843e3 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start, #ifdef CONFIG_PPC64 extern void flush_dcache_range(unsigned long start, unsigned long stop); extern void flush_inval_dcache_range(unsigned long start, unsigned long stop); -extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); #endif #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a2c5c95..66eba1e 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { } /* CPU kernel features */ -/* Retain the 32b definitions all use bottom half of word */ +/* Definitions for features that we have on both 32-bit and 64-bit chips */ #define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001) -#define CPU_FTR_L2CR ASM_CONST(0x00000002) -#define CPU_FTR_SPEC7450 ASM_CONST(0x00000004) -#define CPU_FTR_ALTIVEC ASM_CONST(0x00000008) -#define CPU_FTR_TAU ASM_CONST(0x00000010) -#define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020) -#define CPU_FTR_USE_TB ASM_CONST(0x00000040) -#define CPU_FTR_L2CSR ASM_CONST(0x00000080) -#define CPU_FTR_601 ASM_CONST(0x00000100) -#define CPU_FTR_DBELL ASM_CONST(0x00000200) -#define CPU_FTR_CAN_NAP ASM_CONST(0x00000400) -#define CPU_FTR_L3CR ASM_CONST(0x00000800) -#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000) -#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000) -#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000) -#define CPU_FTR_NO_DPM ASM_CONST(0x00008000) -#define CPU_FTR_476_DD2 ASM_CONST(0x00010000) -#define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000) -#define CPU_FTR_NO_BTIC ASM_CONST(0x00040000) -#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000) -#define CPU_FTR_PPC_LE ASM_CONST(0x00200000) -#define CPU_FTR_REAL_LE ASM_CONST(0x00400000) -#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000) -#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000) -#define CPU_FTR_SPE ASM_CONST(0x02000000) -#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000) -#define CPU_FTR_LWSYNC ASM_CONST(0x08000000) -#define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000) -#define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000) -#define CPU_FTR_EMB_HV ASM_CONST(0x40000000) +#define CPU_FTR_ALTIVEC ASM_CONST(0x00000002) +#define CPU_FTR_DBELL ASM_CONST(0x00000004) +#define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) +#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) +#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) +#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) +#define CPU_FTR_LWSYNC ASM_CONST(0x00000080) +#define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) +#define CPU_FTR_EMB_HV ASM_CONST(0x00000200) + +/* Definitions for features that only exist on 32-bit chips */ +#ifdef CONFIG_PPC32 +#define CPU_FTR_601 ASM_CONST(0x00001000) +#define CPU_FTR_L2CR ASM_CONST(0x00002000) +#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000) +#define CPU_FTR_TAU ASM_CONST(0x00008000) +#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000) +#define CPU_FTR_USE_RTC ASM_CONST(0x00020000) +#define CPU_FTR_L3CR ASM_CONST(0x00040000) +#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000) +#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000) +#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000) +#define CPU_FTR_NO_DPM ASM_CONST(0x00400000) +#define CPU_FTR_476_DD2 ASM_CONST(0x00800000) +#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000) +#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000) +#define CPU_FTR_PPC_LE ASM_CONST(0x04000000) +#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000) +#define CPU_FTR_SPE ASM_CONST(0x10000000) +#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000) +#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000) + +#else /* CONFIG_PPC32 */ +/* Define these to 0 for the sake of tests in common code */ +#define CPU_FTR_601 (0) +#define CPU_FTR_PPC_LE (0) +#endif /* - * Add the 64-bit processor unique features in the top half of the word; + * Definitions for the 64-bit processor unique features; * on 32-bit, make the names available but defined to be 0. */ #ifdef __powerpc64__ @@ -174,37 +181,41 @@ static inline void cpu_feature_keys_init(void) { } #define LONG_ASM_CONST(x) 0 #endif -#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000) -#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) -#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) -#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000) -#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) -#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) -#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) -#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000) -#define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000) -#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000) -#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000) -#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000) -#define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000) -#define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000) -#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000) -#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000) -#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000) -#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) -#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) -#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) -#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) -#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) -#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) -#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) -#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000) -#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) -#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) -#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) -#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) -#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) +#define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000) +#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000) +#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000000004000) +#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000) +#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000) +#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000) +#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000) +#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000) +#define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000) +#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000) +#define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000) +#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000) +#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) +#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) +#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) +#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) +#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) +#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) +#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) +#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) +#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) +#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) +#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) +#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) +#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) +#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000) +#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000) +#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000) +#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000) +#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000) +#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) +#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) +#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) #ifndef __ASSEMBLY__ @@ -285,21 +296,19 @@ static inline void cpu_feature_keys_init(void) { } #endif #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) -#define CPU_FTRS_603 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC) +#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_604 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_PPC_LE) +#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE) #define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_740 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) #define CPU_FTRS_750 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) #define CPU_FTRS_750CL (CPU_FTRS_750) @@ -308,125 +317,118 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX) #define CPU_FTRS_750GX (CPU_FTRS_750FX) #define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_7400 (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) #define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \ CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ + CPU_FTR_NEED_PAIRED_STWCX | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) #define CPU_FTRS_7455 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \ CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7447A (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_7448 (CPU_FTR_COMMON | \ - CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) -#define CPU_FTRS_82XX (CPU_FTR_COMMON | \ - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB) +#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON) #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) -#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) -#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) +#define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) +#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) +#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ +#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \ CPU_FTR_DEBUG_LVL_EXC) -#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) /* * e5500/e6500 erratum A-006958 is a timebase bug that can use the * same workaround as CPU_FTR_CELL_TB_BUG. */ -#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER4 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_STCX_CHECKS_ADDRESS) -#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ CPU_FTR_HVMODE | CPU_FTR_DABRX) -#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX) -#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -434,7 +436,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \ CPU_FTR_DABRX) -#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -443,7 +445,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) -#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -455,7 +457,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) -#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ @@ -463,22 +465,24 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ - CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY) + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ + CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) -#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_P9_TM_HV_ASSIST | \ + CPU_FTR_P9_TM_XER_SO_BUG) +#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX) -#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) -#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) +#define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E @@ -489,7 +493,8 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1) + CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ + CPU_FTRS_POWER9_DD2_2) #endif #else enum { diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index fc97404..ce5da21 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } void set_breakpoint(struct arch_hw_breakpoint *brk); void __set_breakpoint(struct arch_hw_breakpoint *brk); +bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fd37cc1..c2266ca 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } +static inline bool eeh_state_active(int state) +{ + return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) + == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); +} + typedef void *(*eeh_traverse_func)(void *data, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 1e551a2..9884e87 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -34,7 +34,8 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); void eeh_remove_event(struct eeh_pe *pe, bool force); -void eeh_handle_event(struct eeh_pe *pe); +void eeh_handle_normal_event(struct eeh_pe *pe); +void eeh_handle_special_event(void); #endif /* __KERNEL__ */ #endif /* ASM_POWERPC_EEH_EVENT_H */ diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 9086324..d3a7e36 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) { - unsigned long register r0 asm("r0"); - unsigned long register r3 asm("r3") = in[0]; - unsigned long register r4 asm("r4") = in[1]; - unsigned long register r5 asm("r5") = in[2]; - unsigned long register r6 asm("r6") = in[3]; - unsigned long register r7 asm("r7") = in[4]; - unsigned long register r8 asm("r8") = in[5]; - unsigned long register r9 asm("r9") = in[6]; - unsigned long register r10 asm("r10") = in[7]; - unsigned long register r11 asm("r11") = nr; - unsigned long register r12 asm("r12"); + register unsigned long r0 asm("r0"); + register unsigned long r3 asm("r3") = in[0]; + register unsigned long r4 asm("r4") = in[1]; + register unsigned long r5 asm("r5") = in[2]; + register unsigned long r6 asm("r6") = in[3]; + register unsigned long r7 asm("r7") = in[4]; + register unsigned long r8 asm("r8") = in[5]; + register unsigned long r9 asm("r9") = in[6]; + register unsigned long r10 asm("r10") = in[7]; + register unsigned long r11 asm("r11") = nr; + register unsigned long r12 asm("r12"); asm volatile("bl epapr_hypercall_start" : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 176dfb7..471b227 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 511acfd..535add3 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,7 +52,7 @@ #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) -#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 1a4847f..48f2ed2 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, void flush_dcache_icache_hugepage(struct page *page); -#if defined(CONFIG_PPC_MM_SLICES) -int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, +int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); -#else + static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) + return slice_is_hugepage_only_range(mm, addr, len); return 0; } -#endif void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index eca3f9c..2e2ddda 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -88,6 +88,7 @@ #define H_P8 -61 #define H_P9 -62 #define H_TOO_BIG -64 +#define H_UNSUPPORTED -67 #define H_OVERLAP -68 #define H_INTERRUPT -69 #define H_BAD_DATA -70 @@ -337,6 +338,9 @@ #define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 #define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 #define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 +#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 +#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 +#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ac6432d..90c708e 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void arch_unregister_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); @@ -82,6 +83,7 @@ static inline void hw_breakpoint_disable(void) __set_breakpoint(&brk); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); +int hw_breakpoint_handler(struct die_args *args); #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 88e5e8f..855e17d 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,6 +30,16 @@ #define PACA_IRQ_PMI 0x40 /* + * Some soft-masked interrupts must be hard masked until they are replayed + * (e.g., because the soft-masked handler does not clear the exception). + */ +#ifdef CONFIG_PPC_BOOK3S +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) +#else +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) +#endif + +/* * flags for paca->irq_soft_mask */ #define IRQS_ENABLED 0 @@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void) static inline void may_hard_irq_enable(void) { get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - if (!(get_paca()->irq_happened & PACA_IRQ_EE)) + if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) __hard_irq_enable(); } diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index e8e3a0a..ee39ce5 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -66,6 +66,7 @@ extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index c6d3078..b8b0be8 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } +extern void arch_irq_work_raise(void); #endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 9dcbfa6..d8b1e8e 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void) return false; } +static inline void crash_ipi_callback(struct pt_regs *regs) { } + +static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ +} + #endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 09a802b..a790d5c 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -108,6 +108,8 @@ /* book3s_hv */ +#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500 + /* * Special trap used to indicate to host that this is a * passthrough interrupt that could not be handled diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 376ae80..4c02a73 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); +extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu); +extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu); +extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu); + extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 998f7b7..c424e44 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i, set_bit_le(i, map); } +static inline u64 sanitize_msr(u64 msr) +{ + msr &= ~MSR_HV; + msr |= MSR_ME; + return msr; +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cr = vcpu->arch.cr_tm; + vcpu->arch.xer = vcpu->arch.xer_tm; + vcpu->arch.lr = vcpu->arch.lr_tm; + vcpu->arch.ctr = vcpu->arch.ctr_tm; + vcpu->arch.amr = vcpu->arch.amr_tm; + vcpu->arch.ppr = vcpu->arch.ppr_tm; + vcpu->arch.dscr = vcpu->arch.dscr_tm; + vcpu->arch.tar = vcpu->arch.tar_tm; + memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm, + sizeof(vcpu->arch.gpr)); + vcpu->arch.fp = vcpu->arch.fp_tm; + vcpu->arch.vr = vcpu->arch.vr_tm; + vcpu->arch.vrsave = vcpu->arch.vrsave_tm; +} + +static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cr_tm = vcpu->arch.cr; + vcpu->arch.xer_tm = vcpu->arch.xer; + vcpu->arch.lr_tm = vcpu->arch.lr; + vcpu->arch.ctr_tm = vcpu->arch.ctr; + vcpu->arch.amr_tm = vcpu->arch.amr; + vcpu->arch.ppr_tm = vcpu->arch.ppr; + vcpu->arch.dscr_tm = vcpu->arch.dscr; + vcpu->arch.tar_tm = vcpu->arch.tar; + memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr, + sizeof(vcpu->arch.gpr)); + vcpu->arch.fp_tm = vcpu->arch.fp; + vcpu->arch.vr_tm = vcpu->arch.vr; + vcpu->arch.vrsave_tm = vcpu->arch.vrsave; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index ab386af..d978fdf 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -119,6 +119,7 @@ struct kvmppc_host_state { u8 host_ipi; u8 ptid; /* thread number within subcore when split */ u8 tid; /* thread number within whole core */ + u8 fake_suspend; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; void __iomem *xics_phys; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1f53b56..deb5429 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -610,6 +610,7 @@ struct kvm_vcpu_arch { u64 tfhar; u64 texasr; u64 tfiar; + u64 orig_texasr; u32 cr_tm; u64 xer_tm; diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 2f806e3..4f54775 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -186,11 +186,32 @@ #define M_APG2 0x00000040 #define M_APG3 0x00000060 +#ifdef CONFIG_PPC_MM_SLICES +#include <asm/nohash/32/slice.h> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +#endif + #ifndef __ASSEMBLY__ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, 0); +}; + typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u16 user_psize; /* page size index */ + unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; + unsigned char high_slices_psize[0]; + unsigned long slb_addr_limit; + struct slice_mask mask_base_psize; /* 4k or 16k */ +# ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_512k; + struct slice_mask mask_8m; +# endif +#endif } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 051b3d6..1835ca1 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void); extern void hash__reserve_context_id(int id); extern void __destroy_context(int context_id); static inline void mmu_context_init(void) { } + +static inline int alloc_extended_context(struct mm_struct *mm, + unsigned long ea) +{ + int context_id; + + int index = ea >> MAX_EA_BITS_PER_CONTEXT; + + context_id = hash__alloc_context_id(); + if (context_id < 0) + return context_id; + + VM_WARN_ON(mm->context.extended_id[index]); + mm->context.extended_id[index] = context_id; + return context_id; +} + +static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) +{ + int context_id; + + context_id = get_ea_context(&mm->context, ea); + if (!context_id) + return true; + return false; +} + #else extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); extern unsigned long __init_new_context(void); extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); +static inline int alloc_extended_context(struct mm_struct *mm, + unsigned long ea) +{ + /* non book3s_64 should never find this called */ + WARN_ON(1); + return -ENOMEM; +} + +static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) +{ + return false; +} #endif #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) @@ -92,15 +131,23 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm) static inline void mm_context_add_copro(struct mm_struct *mm) { /* - * On hash, should only be called once over the lifetime of - * the context, as we can't decrement the active cpus count - * and flush properly for the time being. + * If any copro is in use, increment the active CPU count + * in order to force TLB invalidations to be global as to + * propagate to the Nest MMU. */ - inc_mm_active_cpus(mm); + if (atomic_inc_return(&mm->context.copros) == 1) + inc_mm_active_cpus(mm); } static inline void mm_context_remove_copro(struct mm_struct *mm) { + int c; + + c = atomic_dec_if_positive(&mm->context.copros); + + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + /* * Need to broadcast a global flush of the full mm before * decrementing active_cpus count, as the next TLBI may be @@ -111,7 +158,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * for the time being. Invalidations will remain global if * used on hash. */ - if (radix_enabled()) { + if (c == 0 && radix_enabled()) { flush_all_mm(mm); dec_mm_active_cpus(mm); } diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 504a3c36..03bbd11 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -24,6 +24,7 @@ extern int icache_44x_need_flush; #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h new file mode 100644 index 0000000..777d62e --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H +#define _ASM_POWERPC_NOHASH_32_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 26 /* 64 slices */ +#define SLICE_LOW_TOP (0x100000000ull) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 0 +#define SLICE_NUM_HIGH 0ul +#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index abddf58..5c5f75d 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -27,6 +27,7 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif +#define PUD_CACHE_INDEX PUD_INDEX_SIZE /* * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h new file mode 100644 index 0000000..ad0d6e3 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/64/slice.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H +#define _ASM_POWERPC_NOHASH_64_SLICE_H + +#ifdef CONFIG_PPC_64K_PAGES +#define get_slice_psize(mm, addr) MMU_PAGE_64K +#else /* CONFIG_PPC_64K_PAGES */ +#define get_slice_psize(mm, addr) MMU_PAGE_4K +#endif /* !CONFIG_PPC_64K_PAGES */ +#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) + +#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 94bd1bf..d886a5b 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -204,7 +204,9 @@ #define OPAL_NPU_SPA_SETUP 159 #define OPAL_NPU_SPA_CLEAR_CACHE 160 #define OPAL_NPU_TL_SET 161 -#define OPAL_LAST 161 +#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 +#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_LAST 165 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 12e70fb..dde6008 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); +int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr); +int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 2d04c55..4185f1c 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -32,6 +32,7 @@ #include <asm/accounting.h> #include <asm/hmi.h> #include <asm/cpuidle.h> +#include <asm/atomic.h> register struct paca_struct *local_paca asm("r13"); @@ -145,7 +146,7 @@ struct paca_struct { #ifdef CONFIG_PPC_BOOK3S mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES - u64 mm_ctx_low_slices_psize; + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long mm_ctx_slb_addr_limit; #else @@ -185,6 +186,8 @@ struct paca_struct { u8 thread_mask; /* Mask to denote subcore sibling threads */ u8 subcore_sibling_mask; + /* Flag to request this thread not to stop */ + atomic_t dont_stop; /* * Pointer to an array which contains pointer * to the sibling threads' paca. diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 8da5d4c..dec9ce5 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -126,7 +126,15 @@ extern long long virt_phys_offset; #ifdef CONFIG_FLATMEM #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) -#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr) +#ifndef __ASSEMBLY__ +extern unsigned long max_mapnr; +static inline bool pfn_valid(unsigned long pfn) +{ + unsigned long min_pfn = ARCH_PFN_OFFSET; + + return pfn >= min_pfn && pfn < max_mapnr; +} +#endif #endif #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) @@ -344,5 +352,6 @@ typedef struct page *pgtable_t; #include <asm-generic/memory_model.h> #endif /* __ASSEMBLY__ */ +#include <asm/slice.h> #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 56234c6..af04acd 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -86,65 +86,6 @@ extern u64 ppc64_pft_size; #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 28 -#define SLICE_HIGH_SHIFT 40 - -#define SLICE_LOW_TOP (0x100000000ul) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) - -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) -#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) - -#ifndef __ASSEMBLY__ -struct mm_struct; - -extern unsigned long slice_get_unmapped_area(unsigned long addr, - unsigned long len, - unsigned long flags, - unsigned int psize, - int topdown); - -extern unsigned int get_slice_psize(struct mm_struct *mm, - unsigned long addr); - -extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); -extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, - unsigned long len, unsigned int psize); - -#endif /* __ASSEMBLY__ */ -#else -#define slice_init() -#ifdef CONFIG_PPC_BOOK3S_64 -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) -#else /* !CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while(0) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#define slice_set_range_psize(mm, start, len, psize) \ - slice_set_user_psize((mm), (psize)) -#endif /* CONFIG_PPC_MM_SLICES */ - -#ifdef CONFIG_HUGETLB_PAGE - -#ifdef CONFIG_PPC_MM_SLICES -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif - -#endif /* !CONFIG_HUGETLB_PAGE */ - #define VM_DATA_DEFAULT_FLAGS \ (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 723bf48..67a8a95 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -53,6 +53,8 @@ struct power_pmu { [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; + int n_blacklist_ev; + int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; }; diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 540785d..96c1a46 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -11,14 +11,6 @@ #include <asm/paca.h> #include <asm/page.h> -/* Get state of physical CPU from query_cpu_stopped */ -int smp_query_cpu_stopped(unsigned int pcpu); -#define QCSS_STOPPED 0 -#define QCSS_STOPPING 1 -#define QCSS_NOT_STOPPED 2 -#define QCSS_HARDWARE_ERROR -1 -#define QCSS_HARDWARE_BUSY -2 - static inline long poll_pending(void) { return plpar_hcall_norets(H_POLL_PENDING); @@ -313,17 +305,17 @@ static inline long enable_little_endian_exceptions(void) return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); } -static inline long plapr_set_ciabr(unsigned long ciabr) +static inline long plpar_set_ciabr(unsigned long ciabr) { return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); } -static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) +static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } -static inline long plapr_signal_sys_reset(long cpu) +static inline long plpar_signal_sys_reset(long cpu) { return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } @@ -342,6 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +#else /* !CONFIG_PPC_PSERIES */ + +static inline long plpar_set_ciabr(unsigned long ciabr) +{ + return 0; +} #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 3e5cf25..d2d8c28 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc); +extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind); +extern int pnv_pci_disable_tunnel(struct pci_dev *dev); +extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, + int enable); +extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, + u32 *pid, u32 *tid); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index dc5f6a5..d1c2d2e6 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context, } static inline void pnv_tm_init(void) { } +static inline void pnv_power9_force_smt4(void) { } #endif #endif /* _ASM_POWERNV_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index f1083bc..18883b8 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -232,6 +232,7 @@ #define PPC_INST_MSGSYNC 0x7c0006ec #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MSGCLRP 0x7c00015c +#define PPC_INST_MTMSRD 0x7c000164 #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 #define PPC_INST_PASTE 0x7c20070d @@ -239,8 +240,10 @@ #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_POPCNTD 0x7c0003f4 #define PPC_INST_POPCNTW 0x7c0002f4 +#define PPC_INST_RFEBB 0x4c000124 #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e +#define PPC_INST_RFID 0x4c000024 #define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 @@ -271,12 +274,14 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_VPMSUMW 0x10000488 #define PPC_INST_VPMSUMD 0x100004c8 +#define PPC_INST_VPERMXOR 0x1000002d #define PPC_INST_XXLOR 0xf0000490 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd #define PPC_INST_TRECLAIM 0x7c00075d #define PPC_INST_TABORT 0x7c00071d +#define PPC_INST_TSR 0x7c0005dd #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 @@ -517,6 +522,11 @@ #define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ VSX_XX3((t), (a), (b)))) +#define VPERMXOR(vrt, vra, vrb, vrc) \ + stringify_in_c(.long (PPC_INST_VPERMXOR | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) + #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) #define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 01299cdc..bb9cb25 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -109,6 +109,13 @@ void release_thread(struct task_struct *); #define TASK_SIZE_64TB (0x0000400000000000UL) #define TASK_SIZE_128TB (0x0000800000000000UL) #define TASK_SIZE_512TB (0x0002000000000000UL) +#define TASK_SIZE_1PB (0x0004000000000000UL) +#define TASK_SIZE_2PB (0x0008000000000000UL) +/* + * With 52 bits in the address we can support + * upto 4PB of range. + */ +#define TASK_SIZE_4PB (0x0010000000000000UL) /* * For now 512TB is only supported with book3s and 64K linux page size. @@ -117,11 +124,17 @@ void release_thread(struct task_struct *); /* * Max value currently used: */ -#define TASK_SIZE_USER64 TASK_SIZE_512TB +#define TASK_SIZE_USER64 TASK_SIZE_4PB #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB +#define TASK_CONTEXT_SIZE TASK_SIZE_512TB #else #define TASK_SIZE_USER64 TASK_SIZE_64TB #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB +/* + * We don't need to allocate extended context ids for 4K page size, because + * we limit the max effective address on this config to 64TB. + */ +#define TASK_CONTEXT_SIZE TASK_SIZE_64TB #endif /* diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e6c7ead..cb0f272 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -156,6 +156,8 @@ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ #define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ +#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ +#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ @@ -237,7 +239,12 @@ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ +#define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */ +#define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */ +#define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */ +#define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ +#define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h new file mode 100644 index 0000000..400a905 --- /dev/null +++ b/arch/powerpc/include/asm/security_features.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Security related feature bit definitions. + * + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_SECURITY_FEATURES_H +#define _ASM_POWERPC_SECURITY_FEATURES_H + + +extern unsigned long powerpc_security_features; +extern bool rfi_flush; + +static inline void security_ftr_set(unsigned long feature) +{ + powerpc_security_features |= feature; +} + +static inline void security_ftr_clear(unsigned long feature) +{ + powerpc_security_features &= ~feature; +} + +static inline bool security_ftr_enabled(unsigned long feature) +{ + return !!(powerpc_security_features & feature); +} + + +// Features indicating support for Spectre/Meltdown mitigations + +// The L1-D cache can be flushed with ori r30,r30,0 +#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull + +// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2) +#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull + +// ori r31,r31,0 acts as a speculation barrier +#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull + +// Speculation past bctr is disabled +#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull + +// Entries in L1-D are private to a SMT thread +#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull + +// Indirect branch prediction cache disabled +#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull + + +// Features indicating need for Spectre/Meltdown mitigations + +// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest) +#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull + +// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace) +#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull + +// A speculation barrier should be used for bounds checks (Spectre variant 1) +#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull + +// Firmware configuration indicates user favours security over performance +#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull + +#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d2bf233..27fa52e 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -50,7 +50,7 @@ enum l1d_flush_type { L1D_FLUSH_MTTRIG = 0x8, }; -void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h new file mode 100644 index 0000000..e40406cf5 --- /dev/null +++ b/arch/powerpc/include/asm/slice.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SLICE_H +#define _ASM_POWERPC_SLICE_H + +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/slice.h> +#elif defined(CONFIG_PPC64) +#include <asm/nohash/64/slice.h> +#elif defined(CONFIG_PPC_MMU_NOHASH) +#include <asm/nohash/32/slice.h> +#endif + +#ifdef CONFIG_PPC_MM_SLICES + +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#ifndef __ASSEMBLY__ + +struct mm_struct; + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown); + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize); + +void slice_init_new_context_exec(struct mm_struct *mm); + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_SLICE_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index c3ca42c..be8c9fa 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void) msr_check_and_clear(MSR_FP); } #else -static inline void __giveup_fpu(struct task_struct *t) { } static inline void save_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index 63e7f5a..6ec5460 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -6,10 +6,6 @@ #include <linux/stringify.h> #include <asm/feature-fixups.h> -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) -#define __SUBARCH_HAS_LWSYNC -#endif - #ifndef __ASSEMBLY__ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; extern void do_lwsync_fixups(unsigned long value, void *fixup_start, diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 4a12c00..5964145db 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)val; } +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index b240666..db546c0 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm); extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); +extern void hdec_interrupt(struct pt_regs *regs); /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; @@ -46,7 +47,7 @@ struct div_result { /* Accessor functions for the timebase (RTC on 601) registers. */ /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #ifdef CONFIG_6xx -#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB)) +#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC)) #else #define __USE_RTC() 0 #endif @@ -204,6 +205,7 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); extern void secondary_cpu_time_init(void); +extern void __init time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 88187c2..9f42164 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; +} + static inline int early_cpu_to_node(int cpu) { int nid; @@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked) { return 0; } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int start_topology_update(void); extern int stop_topology_update(void); extern int prrn_is_enabled(void); +extern int find_and_online_cpu_nid(int cpu); #else static inline int start_topology_update(void) { @@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void) { return 0; } +static inline int find_and_online_cpu_nid(int cpu) +{ + return 0; +} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 51bfeb8..a62ee66 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -47,9 +47,13 @@ #else -#define __access_ok(addr, size, segment) \ - (((addr) <= (segment).seg) && \ - (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr))))) +static inline int __access_ok(unsigned long addr, unsigned long size, + mm_segment_t seg) +{ + if (addr > seg.seg) + return 0; + return (size == 0 || size - 1 <= seg.seg - addr); +} #endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1b6bc7f..d458c45 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bbde55f..6bee65f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -573,6 +573,7 @@ int main(void) OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar); OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar); OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr); + OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr); OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm); OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr); OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr); @@ -655,6 +656,7 @@ int main(void) HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_TID, tid); + HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); @@ -764,6 +766,7 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); + OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) STOP_SPR(STOP_PID, pid); STOP_SPR(STOP_LDBAR, ldbar); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index c5e5a94..a9f3970 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -226,7 +226,7 @@ BEGIN_FTR_SECTION beq 1f END_FTR_SECTION_IFSET(CPU_FTR_L3CR) lwz r6,CPU_SPEC_FEATURES(r4) - andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h beq 1f li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 462aed9..8d142e5 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500) * the feature on the primary core, avoid doing it on the * secondary core. */ - andis. r6, r3, CPU_FTR_EMB_HV@h + andi. r6, r3, CPU_FTR_EMB_HV beq 2f rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV stw r3, CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c40a9fc..b3de017 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -553,11 +553,30 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, - { /* Power9 DD 2.1 or later (see DD2.0 above) */ + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.2 or later */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_features = CPU_FTRS_POWER9_DD2_2, .cpu_user_features = COMMON_USER_POWER9, .cpu_user_features2 = COMMON_USER2_POWER9, .mmu_features = MMU_FTRS_POWER9, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 945e2c2..4313ff0 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -54,8 +54,7 @@ struct dt_cpu_feature { }; #define CPU_FTRS_BASE \ - (CPU_FTR_USE_TB | \ - CPU_FTR_LWSYNC | \ + (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE |\ CPU_FTR_NODSISRALIGN |\ CPU_FTR_NOEXECUTE |\ @@ -590,6 +589,8 @@ static struct dt_cpu_feature_match __initdata {"virtual-page-class-key-protection", feat_enable, 0}, {"transactional-memory", feat_enable_tm, CPU_FTR_TM}, {"transactional-memory-v3", feat_enable_tm, 0}, + {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, + {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, {"idle-stop", feat_enable_idle_stop, 0}, @@ -709,6 +710,14 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + else if ((version & 0xffffefff) == 0x004e0202) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST | + CPU_FTR_P9_TM_XER_SO_BUG; + + if ((version & 0xffff0000) == 0x004e0000) { + cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } } static void __init cpufeatures_setup_finished(void) @@ -720,6 +729,9 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; } + /* Make sure powerpc_base_platform is non-NULL */ + powerpc_base_platform = cur_cpu_spec->platform; + system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2b9df00..bc640e4 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Check PHB state */ ret = eeh_ops->get_state(phb_pe, NULL); if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { ret = 0; goto out; } @@ -433,7 +431,6 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; @@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * state, PE is in good state. */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - ((ret & active_flags) == active_flags)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) /* Frozen parent PE ? */ ret = eeh_ops->get_state(parent_pe, NULL); - if (ret > 0 && - (ret & active_flags) != active_flags) + if (ret > 0 && !eeh_state_active(ret)) pe = parent_pe; /* Next parent level */ @@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag) */ int eeh_pe_reset_full(struct eeh_pe *pe) { - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; unsigned int freset = 0; @@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe) /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & active_flags) == active_flags) + if (eeh_state_active(state)) break; if (state < 0) { @@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; struct pci_device_id *id; - int flags, ret; + int ret; /* Check PE state */ - flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); ret = eeh_ops->get_state(pe, NULL); if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) return 0; /* Unfrozen PE, nothing to do */ - if ((ret & flags) == flags) + if (eeh_state_active(ret)) return 0; /* Frozen PE, check if it needs PE level reset */ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index d4cc266..201943d 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset + * that implements this address. I/O port numbers are assumed to be offset * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index beea218..43ceb62 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif return NULL; } @@ -618,17 +619,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) /** * eeh_reset_device - Perform actual reset of a pci slot + * @driver_eeh_aware: Does the device's driver provide EEH support? * @pe: EEH PE * @bus: PCI bus corresponding to the isolcated slot + * @rmv_data: Optional, list to record removed devices * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, - struct eeh_rmv_data *rmv_data) + struct eeh_rmv_data *rmv_data, + bool driver_eeh_aware) { - struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); time64_t tstamp; int cnt, rc; struct eeh_dev *edev; @@ -644,16 +647,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - } else { - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - } - } else if (frozen_bus) { + if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); + } else { + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); } /* @@ -688,8 +687,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * the device up before the scripts have taken it down, * potentially weird things happen. */ - if (bus) { - pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); + if (!driver_eeh_aware || rmv_data->removed) { + pr_info("EEH: Sleep 5s ahead of %s hotplug\n", + (driver_eeh_aware ? "partial" : "complete")); ssleep(5); /* @@ -702,19 +702,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + if (!driver_eeh_aware) + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); } - } else if (frozen_bus && rmv_data->removed) { - pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); - ssleep(5); - - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) - eeh_add_virt_device(edev, NULL); - else - pci_hp_add_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -732,28 +723,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /** * eeh_handle_normal_event - Handle EEH events on a specific PE - * @pe: EEH PE + * @pe: EEH PE - which should not be used after we return, as it may + * have been invalidated. * * Attempts to recover the given PE. If recovery fails or the PE has failed * too many times, remove the PE. * - * Returns true if @pe should no longer be used, else false. + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). */ -static bool eeh_handle_normal_event(struct eeh_pe *pe) +void eeh_handle_normal_event(struct eeh_pe *pe) { - struct pci_bus *frozen_bus; + struct pci_bus *bus; struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; - frozen_bus = eeh_pe_bus_get(pe); - if (!frozen_bus) { + bus = eeh_pe_bus_get(pe); + if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return false; + return; } + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { @@ -805,7 +810,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus, NULL); + rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -857,7 +862,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL, &rmv_data); + rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -890,7 +895,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return false; + goto final; hard_fail: /* @@ -915,23 +920,21 @@ hard_fail: * all removed devices correctly to avoid access * the their PCI config any more. */ - if (frozen_bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - - pci_lock_rescan_remove(); - pci_hp_remove_devices(frozen_bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - /* The passed PE should no longer be used */ - return true; - } + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } - return false; +final: + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } /** @@ -941,7 +944,7 @@ hard_fail: * specific PE. Iterates through possible failures and handles them as * necessary. */ -static void eeh_handle_special_event(void) +void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; @@ -1004,15 +1007,7 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - /* - * eeh_handle_normal_event() can make the PE stale if it - * determines that the PE cannot possibly be recovered. - * Don't modify the PE state if that's the case. - */ - if (eeh_handle_normal_event(pe)) - continue; - - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { @@ -1048,28 +1043,3 @@ static void eeh_handle_special_event(void) break; } while (rc != EEH_NEXT_ERR_NONE); } - -/** - * eeh_handle_event - Reset a PCI device after hard lockup. - * @pe: EEH PE - * - * While PHB detects address or data parity errors on particular PCI - * slot, the associated PE will be frozen. Besides, DMA's occurring - * to wild addresses (which usually happen due to bugs in device - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, - * #PERR or other misc PCI-related errors also can trigger EEH errors. - * - * Recovery process consists of unplugging the device driver (which - * generated hotplug events to userspace), then issuing a PCI #RST to - * the device, then reconfiguring the PCI config space for all bridges - * & devices under this slot, and then finally restarting the device - * drivers (which cause a second set of hotplug events to go out to - * userspace). - */ -void eeh_handle_event(struct eeh_pe *pe) -{ - if (pe) - eeh_handle_normal_event(pe); - else - eeh_handle_special_event(); -} diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index accbf8b..61c9356 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy) /* We might have event without binding PE */ pe = event->pe; if (pe) { - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); @@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy) pr_info("EEH: Detected PCI bus error on " "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); - eeh_handle_event(pe); - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { - eeh_handle_event(NULL); + eeh_handle_special_event(); } kfree(event); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ee832d34..9b6e653 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -943,6 +943,8 @@ kernel_dbg_exc: /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so + * keep these in synch. */ .macro masked_interrupt_book3e paca_irq full_mask diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 243d072..1a0aa70 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -621,7 +621,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 - beq- 8f /* if bad address, make full stack frame */ + /* + * Large address, check whether we have to allocate new contexts. + */ + beq- 8f bne- cr5,2f /* if unrecoverable exception, oops */ @@ -685,7 +688,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mr r3,r12 mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 - LOAD_HANDLER(r10,bad_addr_slb) + LOAD_HANDLER(r10, large_addr_slb) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 @@ -700,17 +703,17 @@ EXC_COMMON_BEGIN(unrecov_slb) bl unrecoverable_exception b 1b -EXC_COMMON_BEGIN(bad_addr_slb) +EXC_COMMON_BEGIN(large_addr_slb) EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) ld r3, PACA_EXSLB+EX_DAR(r13) std r3, _DAR(r1) beq cr6, 2f - li r10, 0x480 /* fix trap number for I-SLB miss */ + li r10, 0x481 /* fix trap number for I-SLB miss */ std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD - bl slb_miss_bad_addr + bl slb_miss_large_addr b ret_from_except EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1273,7 +1276,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) bne+ denorm_assist #endif - KVMTEST_PR(0x1500) + KVMTEST_HV(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) @@ -1285,7 +1288,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100) EXC_VIRT_NONE(0x5500, 0x100) #endif -TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) +TRAMP_KVM_HV(PACA_EXGEN, 0x1500) #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) @@ -1426,7 +1429,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode * and it won't refire. - * - else we hard disable and return. + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ #define MASKED_INTERRUPT(_H) \ @@ -1441,8 +1444,8 @@ masked_##_H##interrupt: \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ b MASKED_DEC_HANDLER_LABEL; \ -1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ - bne 2f; \ +1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \ + beq 2f; \ mfspr r10,SPRN_##_H##SRR1; \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 53b9c1d..4c1012b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,6 +33,7 @@ #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> +#include <asm/debug.h> #include <linux/uaccess.h> /* @@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the * 'symbolsize' should satisfy the check below. */ + if (!ppc_breakpoint_available()) + return -ENODEV; length_max = 8; /* DABR */ if (cpu_has_feature(CPU_FTR_DAWR)) { length_max = 512 ; /* 64 doublewords */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 01e1c19..89157cf 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -339,6 +339,7 @@ power_enter_stop: bne .Lhandle_esl_ec_set PPC_STOP li r3,0 /* Since we didn't lose state, return 0 */ + std r3, PACA_REQ_PSSCR(r13) /* * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so @@ -429,11 +430,29 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * r3 contains desired PSSCR register value. */ _GLOBAL(power9_idle_stop) +BEGIN_FTR_SECTION + lwz r5, PACA_DONT_STOP(r13) + cmpwi r5, 0 + bne 1f std r3, PACA_REQ_PSSCR(r13) + sync + lwz r5, PACA_DONT_STOP(r13) + cmpwi r5, 0 + bne 1f +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r4,power_enter_stop) b pnv_powersave_common /* No return */ +1: + /* + * We get here when TM / thread reconfiguration bug workaround + * code wants to get the CPU into SMT4 mode, and therefore + * we are being asked not to stop. + */ + li r3, 0 + std r3, PACA_REQ_PSSCR(r13) + blr /* return 0 for wakeup cause / SRR1 value */ /* * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, @@ -584,6 +603,8 @@ FTR_SECTION_ELSE_NESTED(71) mfspr r5, SPRN_PSSCR rldicl r5,r5,4,60 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) + li r0, 0 /* clear requested_psscr to say we're awake */ + std r0, PACA_REQ_PSSCR(r13) cmpd cr4,r5,r4 bge cr4,pnv_wakeup_tb_loss /* returns to caller */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f880388..061aa0f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -476,6 +476,14 @@ void force_external_irq_replay(void) */ WARN_ON(!arch_irqs_disabled()); + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* Indicate in the PACA that we have an interrupt to replay */ local_paca->irq_happened |= PACA_IRQ_EE; } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index ca5d5a0..e4c5bf3 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } kretprobe_assert(ri, orig_ret_address, trampoline_address); - regs->nip = orig_ret_address; + /* - * Make LR point to the orig_ret_address. - * When the 'nop' inside the kretprobe_trampoline - * is optimized, we can do a 'blr' after executing the - * detour buffer code. + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * kretprobe_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. */ + regs->nip = orig_ret_address - 4; regs->link = orig_ret_address; - reset_current_kprobe(); kretprobe_hash_unlock(current, &flags); - preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + + return 0; } NOKPROBE_SYMBOL(trampoline_probe_handler); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 3280953..fa267e9 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range) blr EXPORT_SYMBOL(flush_dcache_range) -/* - * Like above, but works on non-mapped physical addresses. - * Use only for non-LPAR setups ! It also assumes real mode - * is cacheable. Used for flushing out the DART before using - * it as uncacheable memory - * - * flush_dcache_phys_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL(flush_dcache_phys_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mfmsr r5 /* Disable MMU Data Relocation */ - ori r0,r5,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsr r0 - sync - isync - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - mtmsr r5 /* Re-enable MMU Data Relocation */ - sync - isync - blr - _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 496d639..ba681da 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part, tmp_index = part->index; - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); return rc; @@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, tmp_index = part->index; if (part->os_partition) { - rc = ppc_md.nvram_read((char *)&info, - sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); return rc; @@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void) "detected: 0-length partition\n"); goto out; } - tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL); err = -ENOMEM; if (!tmp_part) { printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0f7e2be..0ee3e6d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -268,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm->context.slb_addr_limit); get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_low_slices_psize, + &context->low_slices_psize, sizeof(context->low_slices_psize)); memcpy(&get_paca()->mm_ctx_high_slices_psize, &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); #else /* CONFIG_PPC_MM_SLICES */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1738c41..24a591b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU -void __giveup_fpu(struct task_struct *tsk) +static void __giveup_fpu(struct task_struct *tsk) { unsigned long msr; @@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } -void save_all(struct task_struct *tsk) +static void save_all(struct task_struct *tsk) { unsigned long usermsr; @@ -827,6 +827,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk) preempt_enable(); } +/* Check if we have DAWR or DABR hardware */ +bool ppc_breakpoint_available(void) +{ + if (cpu_has_feature(CPU_FTR_DAWR)) + return true; /* POWER8 DAWR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return false; /* POWER9 with DAWR disabled */ + /* DABR: Everything but POWER8 and POWER9 */ + return true; +} +EXPORT_SYMBOL_GPL(ppc_breakpoint_available); + #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e19f5e3..9dbed48 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node) static void __init check_cpu_feature_properties(unsigned long node) { - unsigned long i; + int i; struct feature_property *fp = feature_properties; const __be32 *prop; - for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { + for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { prop = of_get_flat_dt_prop(node, fp->name, NULL); if (prop && be32_to_cpup(prop) >= fp->min_value) { cur_cpu_spec->cpu_features |= fp->cpu_feature; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index adf044d..0323e07 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, - .byte22 = OV5_FEAT(OV5_DRC_INFO), }, /* option vector 6: IBM PAPR hints */ @@ -1111,7 +1110,8 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse) { + if (supported.radix_mmu && supported.radix_gtse && + IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { /* Radix preferred - but we require GTSE for now */ prom_debug("Asking for radix with GTSE\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ca72d73..d23cf63 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -41,6 +41,7 @@ #include <asm/switch_to.h> #include <asm/tm.h> #include <asm/asm-prototypes.h> +#include <asm/debug.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, struct perf_event_attr attr; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #ifndef CONFIG_PPC_ADV_DEBUG_REGS + bool set_bp = true; struct arch_hw_breakpoint hw_brk; #endif @@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; - if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { + if (!set_bp) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; @@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, return PTR_ERR(bp); } +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child, if (child->thread.hw_brk.address) return -ENOSPC; + if (!ppc_breakpoint_available()) + return -ENODEV; + child->thread.hw_brk = brk; return 1; @@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request, #endif #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ dbginfo.num_instruction_bps = 0; - dbginfo.num_data_bps = 1; + if (ppc_breakpoint_available()) + dbginfo.num_data_bps = 1; + else + dbginfo.num_data_bps = 0; dbginfo.num_condition_regs = 0; #ifdef CONFIG_PPC64 dbginfo.data_bp_alignment = 8; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c new file mode 100644 index 0000000..2cee3dc --- /dev/null +++ b/arch/powerpc/kernel/security.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Security related flags and so on. +// +// Copyright 2018, Michael Ellerman, IBM Corporation. + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/seq_buf.h> + +#include <asm/security_features.h> + + +unsigned long powerpc_security_features __read_mostly = \ + SEC_FTR_L1D_FLUSH_HV | \ + SEC_FTR_L1D_FLUSH_PR | \ + SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_FAVOUR_SECURITY; + + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool thread_priv; + + thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); + + if (rfi_flush || thread_priv) { + struct seq_buf s; + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + seq_buf_printf(&s, "Mitigation: "); + + if (rfi_flush) + seq_buf_printf(&s, "RFI Flush"); + + if (rfi_flush && thread_priv) + seq_buf_printf(&s, ", "); + + if (thread_priv) + seq_buf_printf(&s, "L1D private per thread"); + + seq_buf_printf(&s, "\n"); + + return s.len; + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool bcs, ccd, ori; + struct seq_buf s; + + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); + ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); + ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); + + if (bcs || ccd) { + seq_buf_printf(&s, "Mitigation: "); + + if (bcs) + seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + + if (bcs && ccd) + seq_buf_printf(&s, ", "); + + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else + seq_buf_printf(&s, "Vulnerable"); + + if (ori) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + + return s.len; +} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index af7a47c..56f7a2b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -947,6 +947,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PPC64 if (!radix_enabled()) init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#elif defined(CONFIG_PPC_8xx) + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; #else #error "context.addr_limit not initialized." #endif diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 51ebc01..7445748 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,6 +39,7 @@ #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/cpu_has_feature.h> +#include <asm/asm-prototypes.h> #define DBG(fmt...) @@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr) } /* Checks "l2cr=xxxx" command-line option */ -int __init ppc_setup_l2cr(char *str) +static int __init ppc_setup_l2cr(char *str) { if (cpu_has_feature(CPU_FTR_L2CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str) __setup("l2cr=", ppc_setup_l2cr); /* Checks "l3cr=xxxx" command-line option */ -int __init ppc_setup_l3cr(char *str) +static int __init ppc_setup_l3cr(char *str) { if (cpu_has_feature(CPU_FTR_L3CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync); #endif /* CONFIG_NVRAM */ -int __init ppc_init(void) +static int __init ppc_init(void) { /* clear the progress line */ if (ppc_md.progress) @@ -192,7 +193,6 @@ int __init ppc_init(void) } return 0; } - arch_initcall(ppc_init); void __init irqstack_early_init(void) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 16ea71f..66f2b62 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -871,9 +871,6 @@ static void do_nothing(void *unused) void rfi_flush_enable(bool enable) { - if (rfi_flush == enable) - return; - if (enable) { do_rfi_flush_fixups(enabled_flush_types); on_each_cpu(do_nothing, NULL, 1); @@ -888,6 +885,10 @@ static void init_fallback_flush(void) u64 l1d_size, limit; int cpu; + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + l1d_size = ppc64_caches.l1d.size; limit = min(ppc64_bolted_size(), ppc64_rma_size); @@ -906,18 +907,18 @@ static void init_fallback_flush(void) } } -void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +void setup_rfi_flush(enum l1d_flush_type types, bool enable) { if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: Using fallback displacement flush\n"); + pr_info("rfi-flush: fallback displacement flush available\n"); init_fallback_flush(); } if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: Using ori type flush\n"); + pr_info("rfi-flush: ori type flush available\n"); if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: Using mttrig type flush\n"); + pr_info("rfi-flush: mttrig type flush available\n"); enabled_flush_types = types; @@ -928,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { + bool enable; + if (val == 1) - rfi_flush_enable(true); + enable = true; else if (val == 0) - rfi_flush_enable(false); + enable = false; else return -EINVAL; + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + return 0; } @@ -953,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void) } device_initcall(rfi_flush_debugfs_init); #endif - -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -{ - if (rfi_flush) - return sprintf(buf, "Mitigation: RFI Flush\n"); - - return sprintf(buf, "Vulnerable\n"); -} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 7c59d88..a6467f8 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ +extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); +extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); + static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a46de00..492f034 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { - unsigned char tmp; + unsigned char tmp __maybe_unused; int ctx_has_vsx_region = 0; #ifdef CONFIG_PPC64 @@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, { struct sig_dbg_op op; int i; - unsigned char tmp; + unsigned char tmp __maybe_unused; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.debug.dbcr0; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 9f32748..755dc98 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -786,7 +786,8 @@ static int register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_create_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ @@ -871,7 +872,8 @@ static int unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_remove_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a32823d..f7d96a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1234,7 +1234,7 @@ void calibrate_delay(void) static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) { ppc_md.get_rtc_time(tm); - return rtc_valid_tm(tm); + return 0; } static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1e48d15..f200bfd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1495,18 +1495,6 @@ bail: exception_exit(prev_state); } -void slb_miss_bad_addr(struct pt_regs *regs) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); - else - bad_page_fault(regs, regs->dar, SIGSEGV); - - exception_exit(prev_state); -} - void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 22b01a3..b44ec10 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = { CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, "__kernel_sync_dicache", "__kernel_sync_dicache_p5" }, +#ifdef CONFIG_PPC32 { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_gettimeofday", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_gettime", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_getres", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_get_tbfreq", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_time", NULL }, +#endif }; /* diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 85ba80d..4b19da8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -74,9 +74,15 @@ kvm-hv-y += \ book3s_64_mmu_hv.o \ book3s_64_mmu_radix.o +kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm.o + kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o book3s_hv_rm_xive.o +kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm_builtin.o + ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_hmi.o \ @@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) endif diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c85481..0837b97 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9b48d4a..1e1211c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -742,6 +742,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, case H_SET_MODE_RESOURCE_SET_DAWR: if (!kvmppc_power8_compatible(vcpu)) return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; if (mflags) return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) @@ -1207,6 +1209,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; } break; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case BOOK3S_INTERRUPT_HV_SOFTPATCH: + /* + * This occurs for various TM-related instructions that + * we need to emulate on POWER9 DD2.2. We have already + * handled the cases where the guest was in real-suspend + * mode and was transitioning to transactional state. + */ + r = kvmhv_p9_tm_emulation(vcpu); + break; +#endif + case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; break; @@ -1979,7 +1994,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, * turn off the HFSCR bit, which causes those instructions to trap. */ vcpu->arch.hfscr = mfspr(SPRN_HFSCR); - if (!cpu_has_feature(CPU_FTR_TM)) + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) + vcpu->arch.hfscr |= HFSCR_TM; + else if (!cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr &= ~HFSCR_TM; if (cpu_has_feature(CPU_FTR_ARCH_300)) vcpu->arch.hfscr &= ~HFSCR_MSGP; @@ -2243,6 +2260,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.ptid = cpu - vc->pcpu; + tpaca->kvm_hstate.fake_suspend = 0; /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ smp_wmb(); tpaca->kvm_hstate.kvm_vcore = vc; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8888e62..e1c083f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], kvm->arch.lpid, 0, 0, 0); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index a1c6ea2..95c616f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -786,12 +786,18 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Load guest PMU registers */ @@ -885,8 +891,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX(r4) ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) + /* + * Handle broken DAWR case by not writing it. This means we + * can still store the DAWR register for migration. + */ +BEGIN_FTR_SECTION mtspr SPRN_DAWR, r5 mtspr SPRN_DAWRX, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) @@ -914,11 +926,14 @@ BEGIN_FTR_SECTION mtspr SPRN_ACOP, r6 mtspr SPRN_CSIGR, r7 mtspr SPRN_TACR, r8 + nop FTR_SECTION_ELSE /* POWER9-only registers */ ld r5, VCPU_TID(r4) ld r6, VCPU_PSSCR(r4) + lbz r8, HSTATE_FAKE_SUSPEND(r13) oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG ld r7, VCPU_HFSCR(r4) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 @@ -1369,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* For softpatch interrupt, go off and do TM instruction emulation */ + cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + beq kvmppc_tm_emul +#endif + /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi @@ -1728,12 +1749,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Increment yield count if they have a VPA */ @@ -1833,6 +1860,10 @@ BEGIN_FTR_SECTION ld r6, STACK_SLOT_DAWR(r1) ld r7, STACK_SLOT_DAWRX(r1) mtspr SPRN_CIABR, r5 + /* + * If the DAWR doesn't work, it's ok to write these here as + * this value should always be zero + */ mtspr SPRN_DAWR, r6 mtspr SPRN_DAWRX, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) @@ -2053,6 +2084,42 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) mtlr r0 blr +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Softpatch interrupt for transactional memory emulation cases + * on POWER9 DD2.2. This is early in the guest exit path - we + * haven't saved registers or done a treclaim yet. + */ +kvmppc_tm_emul: + /* Save instruction image in HEIR */ + mfspr r3, SPRN_HEIR + stw r3, VCPU_HEIR(r9) + + /* + * The cases we want to handle here are those where the guest + * is in real suspend mode and is trying to transition to + * transactional mode. + */ + lbz r0, HSTATE_FAKE_SUSPEND(r13) + cmpwi r0, 0 /* keep exiting guest if in fake suspend */ + bne guest_exit_cont + rldicl r3, r11, 64 - MSR_TS_S_LG, 62 + cmpwi r3, 1 /* or if not in suspend state */ + bne guest_exit_cont + + /* Call C code to do the emulation */ + mr r3, r9 + bl kvmhv_p9_tm_emulation_early + nop + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + cmpwi r3, 0 + beq guest_exit_cont /* continue exiting if not handled */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + b fast_interrupt_c_return /* go back to guest if handled */ +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing @@ -2505,8 +2572,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3,0 blr +2: +BEGIN_FTR_SECTION + /* POWER9 with disabled DAWR */ + li r3, H_UNSUPPORTED + blr +END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ -2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW + rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) @@ -2586,13 +2659,19 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ ld r9, HSTATE_KVM_VCPU(r13) bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* @@ -2699,12 +2778,18 @@ kvm_end_cede: #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* load up FP state */ @@ -3031,6 +3116,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) kvmppc_save_tm: mflr r0 std r0, PPC_LR_STKOFF(r1) + stdu r1, -PPC_MIN_STKFRM(r1) /* Turn on TM. */ mfmsr r8 @@ -3045,6 +3131,24 @@ kvmppc_save_tm: std r1, HSTATE_HOST_R1(r13) li r3, TM_CAUSE_KVM_RESCHED +BEGIN_FTR_SECTION + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ + cmpwi r0, 0 + beq 3f + rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ + beq 4f +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_catch +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + b 6f +3: + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ + mfspr r6, SPRN_TEXASR + std r6, VCPU_ORIG_TEXASR(r9) +6: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ li r5, 0 mtmsrd r5, 1 @@ -3056,6 +3160,43 @@ kvmppc_save_tm: SET_SCRATCH0(r13) GET_PACA(r13) std r9, PACATMSCRATCH(r13) + + /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ +BEGIN_FTR_SECTION + lbz r9, HSTATE_FAKE_SUSPEND(r13) + cmpwi r9, 0 + beq 2f + /* + * We were in fake suspend, so we are not going to save the + * register state as the guest checkpointed state (since + * we already have it), therefore we can now use any volatile GPR. + */ + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + HMT_MEDIUM + ld r6, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r6 +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_release +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + +4: + mfspr r3, SPRN_PSSCR + /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ + li r0, PSSCR_FAKE_SUSPEND + andc r3, r3, r0 + mtspr SPRN_PSSCR, r3 + ld r9, HSTATE_KVM_VCPU(r13) + /* Don't save TEXASR, use value from last exit in real suspend state */ + b 11f +2: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + ld r9, HSTATE_KVM_VCPU(r13) /* Get a few more GPRs free. */ @@ -3126,13 +3267,15 @@ kvmppc_save_tm: * change these outside of a transaction, so they must always be * context switched. */ + mfspr r7, SPRN_TEXASR + std r7, VCPU_TEXASR(r9) +11: mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) + addi r1, r1, PPC_MIN_STKFRM ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr @@ -3167,6 +3310,8 @@ kvmppc_restore_tm: mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 + li r0, 0 + stb r0, HSTATE_FAKE_SUSPEND(r13) ld r5, VCPU_MSR(r4) rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ @@ -3181,6 +3326,15 @@ kvmppc_restore_tm: mtspr SPRN_TEXASR, r7 /* + * If we are doing TM emulation for the guest on a POWER9 DD2, + * then we don't actually do a trechkpt -- we either set up + * fake-suspend mode, or emulate a TM rollback. + */ +BEGIN_FTR_SECTION + b .Ldo_tm_fake_load +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + + /* * We need to load up the checkpointed state for the guest. * We need to do this early as it will blow away any GPRs, VSRs and * some SPRs. @@ -3252,10 +3406,24 @@ kvmppc_restore_tm: /* Set the MSR RI since we have our registers back. */ li r5, MSR_RI mtmsrd r5, 1 - +9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr + +.Ldo_tm_fake_load: + cmpwi r5, 1 /* check for suspended state */ + bgt 10f + stb r5, HSTATE_FAKE_SUSPEND(r13) + b 9b /* and return */ +10: stdu r1, -PPC_MIN_STKFRM(r1) + /* guest is in transactional state, so simulate rollback */ + mr r3, r4 + bl kvmhv_emulate_tm_rollback + nop + ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ + addi r1, r1, PPC_MIN_STKFRM + b 9b #endif /* diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c new file mode 100644 index 0000000..bf710ad --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -0,0 +1,216 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) +{ + u64 texasr, tfiar; + u64 msr = vcpu->arch.shregs.msr; + + tfiar = vcpu->arch.pc & ~0x3ull; + texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) + texasr |= TEXASR_SUSP; + if (msr & MSR_PR) { + texasr |= TEXASR_PR; + tfiar |= 1; + } + vcpu->arch.tfiar = tfiar; + /* Preserve ROT and TL fields of existing TEXASR */ + vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr; +} + +/* + * This gets called on a softpatch interrupt on POWER9 DD2.2 processors. + * We expect to find a TM-related instruction to be emulated. The + * instruction image is in vcpu->arch.emul_inst. If the guest was in + * TM suspended or transactional state, the checkpointed state has been + * reclaimed and is in the vcpu struct. The CPU is in virtual mode in + * host context. + */ +int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 msr = vcpu->arch.shregs.msr; + u64 newmsr, bescr; + int ra, rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return RESUME_GUEST; + + case PPC_INST_RFEBB: + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_EBB_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + bescr = vcpu->arch.bescr; + /* expect to see a S->T transition requested */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + ((bescr >> 30) & 3) == 2)); + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + vcpu->arch.bescr = bescr; + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.ebbrr; + return RESUME_GUEST; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + /* check this is a Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return RESUME_GUEST; + + case PPC_INST_TSR: + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + /* L=1 => tresume, L=0 => tsuspend */ + if (instr & (1 << 21)) { + if (MSR_TM_SUSPENDED(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + } else { + if (MSR_TM_TRANSACTIONAL(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; + } + vcpu->arch.shregs.msr = msr; + return RESUME_GUEST; + + case PPC_INST_TRECLAIM: + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If no transaction active, generate TM bad thing */ + if (!MSR_TM_ACTIVE(msr)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + /* If failure was not previously recorded, recompute TEXASR */ + if (!(vcpu->arch.orig_texasr & TEXASR_FS)) { + ra = (instr >> 16) & 0x1f; + if (ra) + ra = kvmppc_get_gpr(vcpu, ra) & 0xff; + emulate_tx_failure(vcpu, ra); + } + + copy_from_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; + return RESUME_GUEST; + + case PPC_INST_TRECHKPT: + /* XXX do we need to check for PR=0 here? */ + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If transaction active or TEXASR[FS] = 0, bad thing */ + if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + + copy_to_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr = msr | MSR_TS_S; + return RESUME_GUEST; + } + + /* What should we do here? We didn't recognize the instruction */ + WARN_ON_ONCE(1); + return RESUME_GUEST; +} diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c new file mode 100644 index 0000000..d98ccfd --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +/* + * This handles the cases where the guest is in real suspend mode + * and we want to get back to the guest without dooming the transaction. + * The caller has checked that the guest is in real-suspend mode + * (MSR[TS] = S and the fake-suspend flag is not set). + */ +int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 newmsr, msr, bescr; + int rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return 1; + + case PPC_INST_RFEBB: + /* check for PR=1 and arch 2.06 bit set in PCR */ + msr = vcpu->arch.shregs.msr; + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB) || + ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB))) + return 0; + bescr = mfspr(SPRN_BESCR); + /* expect to see a S->T transition requested */ + if (((bescr >> 30) & 3) != 2) + return 0; + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + mtspr(SPRN_BESCR, bescr); + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = mfspr(SPRN_EBBRR); + return 1; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + msr = vcpu->arch.shregs.msr; + /* check this is a Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return 1; + + case PPC_INST_TSR: + /* we know the MSR has the TS field = S (0b01) here */ + msr = vcpu->arch.shregs.msr; + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM)) + return 0; + /* L=1 => tresume => set TS to T (0b10) */ + if (instr & (1 << 21)) + vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + /* Set CR0 to 0b0010 */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; + return 1; + } + + return 0; +} + +/* + * This is called when we are returning to a guest in TM transactional + * state. We roll the guest state back to the checkpointed state. + */ +void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) +{ + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ + vcpu->arch.pc = vcpu->arch.tfhar; + copy_from_checkpoint(vcpu); + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; +} diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4..f9818d7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) if (!qpage) { pr_err("Failed to allocate queue %d for VCPU %d\n", prio, xc->server_num); - return -ENOMEM;; + return -ENOMEM; } memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642..4e38764 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled; break; #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_CAP_PPC_HTM: r = hv_enabled && - (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP); + (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; +#endif default: r = 0; break; @@ -1345,7 +1348,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian) { - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_DONE; while (vcpu->arch.mmio_vmx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, 8, @@ -1608,7 +1611,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC out: +#endif vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 73697c4..35f80ab 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) patch_instruction(dest + 2, instrs[2]); } - printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 70274b7..34d68f1 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest, * Copy from userspace to a buffer, using the largest possible * aligned accesses, up to sizeof(long). */ -static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb, +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int err = 0; @@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val, * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb, +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int err = 0; diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 849f50c..cf77d75 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd) mtspr(SPRN_M_TW, __pa(pgd) - offset); /* Update context */ - mtspr(SPRN_M_CASID, id); + mtspr(SPRN_M_CASID, id - 1); /* sync */ mb(); } diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 697b70a..7d0945b 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) return 1; psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 1604110..3f18036 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, dr_cell->base_addr = cpu_to_be64(lmb->base_addr); dr_cell->drc_index = cpu_to_be32(lmb->drc_index); dr_cell->aa_index = cpu_to_be32(lmb->aa_index); - dr_cell->flags = cpu_to_be32(lmb->flags); + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb)); } static int drmem_update_dt_v2(struct device_node *memory, @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) lmb_sets++; prev_lmb = lmb; @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) { + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) { /* end of one set, start of another */ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); dr_cell++; @@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, u32 i, n_lmbs; n_lmbs = of_read_number(prop++, 1); + if (n_lmbs == 0) + return; for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); @@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, u32 i, j, lmb_sets; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) struct drmem_lmb *lmb; drmem_info->n_lmbs = of_read_number(prop++, 1); + if (drmem_info->n_lmbs == 0) + return; drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), GFP_KERNEL); @@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) int lmb_index; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; /* first pass, calculate the number of LMBs */ p = prop; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 5a69b51..d573d7d 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * need to add in 0x1 if it's a read-only user page */ rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -117,7 +117,7 @@ repeat: return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2253bbc..e601d95 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; vpn = hpt_vpn(ea, vsid, ssize); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); /* *None of the sub 4k page is hashed */ @@ -214,7 +214,7 @@ repeat: return -1; } - new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; *ptep = __pte(new_pte & ~H_PAGE_BUSY); @@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -327,7 +327,7 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a0675e9..1d049c7 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -278,6 +287,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -771,7 +781,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -843,6 +853,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) @@ -852,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_register_proc_table(unsigned long base, unsigned long page_size, - unsigned long table_size) -{ - unsigned long patb1 = base << 25; /* VSID */ - - patb1 |= (page_size << 5); /* sllp */ - patb1 |= table_size; - - partition_tb->patb1 = cpu_to_be64(patb1); - return 0; -} - void __init hpte_init_native(void) { mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; @@ -875,7 +877,4 @@ void __init hpte_init_native(void) mmu_hash_ops.hpte_clear_all = native_hpte_clear; mmu_hash_ops.flush_hash_range = native_flush_hash_range; mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ceb5494..7587a2e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -875,6 +875,12 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; + /* + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall + * to inform the hypervisor that we wish to use the HPT. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + register_process_table(0, 0, 0); #ifdef CONFIG_FA_DUMP /* * If firmware assisted dump is active firmware preserves @@ -1008,6 +1014,7 @@ void __init hash__early_init_mmu(void) __pmd_index_size = H_PMD_INDEX_SIZE; __pud_index_size = H_PUD_INDEX_SIZE; __pgd_index_size = H_PGD_INDEX_SIZE; + __pud_cache_index = H_PUD_CACHE_INDEX; __pmd_cache_index = H_PMD_CACHE_INDEX; __pte_table_size = H_PTE_TABLE_SIZE; __pmd_table_size = H_PMD_TABLE_SIZE; @@ -1109,19 +1116,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES static unsigned int get_paca_psize(unsigned long addr) { - u64 lpsizes; - unsigned char *hpsizes; + unsigned char *psizes; unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - lpsizes = get_paca()->mm_ctx_low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xF; + } else { + psizes = get_paca()->mm_ctx_high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = get_paca()->mm_ctx_high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF; } #else @@ -1261,7 +1267,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, } psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); break; case VMALLOC_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); @@ -1526,7 +1532,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Get VSID */ ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); if (!vsid) return; /* diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 12511f5..b320f50 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; - long slot; + long slot, offset; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + if (unlikely(mmu_psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; + rpte = __real_pte(__pte(old_pte), ptep, offset); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset); } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 876da2b..f4153f2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); +#ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); +#endif return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif @@ -563,10 +565,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { #ifdef CONFIG_PPC_MM_SLICES - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); /* With radix we don't use slice, so derive it from vma*/ - if (!radix_enabled()) + if (!radix_enabled()) { + unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); + return 1UL << mmu_psize_to_shift(psize); + } #endif if (!is_vm_hugetlb_page(vma)) return PAGE_SIZE; diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index eb8c6c8..2b656e6 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -100,6 +100,6 @@ void pgtable_cache_init(void) * same size as either the pgd or pmd index except with THP enabled * on book3s 64 */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) + pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor); } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6419b33..a2bf696 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -99,7 +99,7 @@ unsigned long __max_low_memory = MAX_LOW_MEM; /* * Check for command-line options that affect what MMU_init will do. */ -void __init MMU_setup(void) +static void __init MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ if (strstr(boot_command_line, "nobats")) { diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fdb424a..63470b0 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -68,12 +68,6 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3S_64 -#if H_PGTABLE_RANGE > USER_VSID_RANGE -#warning Limited user VSID range means pagetable space is wasted -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ - phys_addr_t memstart_addr = ~0; EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f50ce66..e2f5025 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, +int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; @@ -148,7 +148,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 929d9ef..b75194d 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm) return index; /* - * In the case of exec, use the default limit, - * otherwise inherit it from the mm we are duplicating. - */ - if (!mm->context.slb_addr_limit) - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; - - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been * forced down to 4K. @@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm) * check against 0 is OK. */ if (mm->context.id == 0) - slice_set_user_psize(mm, mmu_virtual_psize); + slice_init_new_context_exec(mm); subpage_prot_init_new_context(mm); @@ -173,6 +166,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); + atomic_set(&mm->context.copros, 0); return 0; } @@ -185,6 +179,19 @@ void __destroy_context(int context_id) } EXPORT_SYMBOL_GPL(__destroy_context); +static void destroy_contexts(mm_context_t *ctx) +{ + int index, context_id; + + spin_lock(&mmu_context_lock); + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { + context_id = ctx->extended_id[index]; + if (context_id) + ida_remove(&mmu_context_ida, context_id); + } + spin_unlock(&mmu_context_lock); +} + #ifdef CONFIG_PPC_64K_PAGES static void destroy_pagetable_page(struct mm_struct *mm) { @@ -223,7 +230,7 @@ void destroy_context(struct mm_struct *mm) else subpage_prot_free(mm); destroy_pagetable_page(mm); - __destroy_context(mm->context.id); + destroy_contexts(&mm->context); mm->context.id = MMU_NO_CONTEXT; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 4554d65..be8f5c9 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) { pr_hard("initing context for mm @%p\n", mm); +#ifdef CONFIG_PPC_MM_SLICES + /* + * We have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + */ + if (mm->context.id == 0) + slice_init_new_context_exec(mm); +#endif mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; return 0; @@ -428,8 +439,8 @@ void __init mmu_context_init(void) * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { - first_context = 0; - last_context = 15; + first_context = 1; + last_context = 16; no_selective_tlbil = true; } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 1eec1bc..57a5029 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void) numa_cpu_lookup_table[cpu] = -1; } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ - numa_cpu_lookup_table[cpu] = node; -} - static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index c736280..518518f 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -155,7 +155,7 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) { if (radix_enabled()) return radix__create_section_mapping(start, end, nid); @@ -163,7 +163,7 @@ int create_section_mapping(unsigned long start, unsigned long end, int nid) return hash__create_section_mapping(start, end, nid); } -int remove_section_mapping(unsigned long start, unsigned long end) +int __meminit remove_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) return radix__remove_section_mapping(start, end); diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 469808e..199bfda 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -24,6 +24,10 @@ #define CREATE_TRACE_POINTS #include <trace/events/thp.h> +#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) +#warning Limited user VSID range means pagetable space is wasted +#endif + #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * vmemmap is the starting address of the virtual address space where @@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); + vsid = get_user_vsid(&mm->context, addr, ssize); WARN_ON(vsid == 0); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index a425636..7095384 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,9 +17,11 @@ #include <linux/of_fdt.h> #include <linux/mm.h> #include <linux/string_helpers.h> +#include <linux/stop_machine.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/mmu_context.h> #include <asm/dma.h> #include <asm/machdep.h> #include <asm/mmu.h> @@ -396,6 +398,22 @@ void __init radix_init_pgtable(void) "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + + /* + * The init_mm context is given the first available (non-zero) PID, + * which is the "guard PID" and contains no page table. PIDR should + * never be set to zero because that duplicates the kernel address + * space at the 0x0... offset (quadrant 0)! + * + * An arbitrary PID that may later be allocated by the PID allocator + * for userspace processes must not be used either, because that + * would cause stale user mappings for that PID on CPUs outside of + * the TLB invalidation scheme (because it won't be in mm_cpumask). + * + * So permanently carve out one PID for the purpose of a guard PID. + */ + init_mm.context.id = mmu_base_pid; + mmu_base_pid++; } static void __init radix_init_partition_table(void) @@ -598,6 +616,7 @@ void __init radix__early_init_mmu(void) __pmd_index_size = RADIX_PMD_INDEX_SIZE; __pud_index_size = RADIX_PUD_INDEX_SIZE; __pgd_index_size = RADIX_PGD_INDEX_SIZE; + __pud_cache_index = RADIX_PUD_INDEX_SIZE; __pmd_cache_index = RADIX_PMD_INDEX_SIZE; __pte_table_size = RADIX_PTE_TABLE_SIZE; __pmd_table_size = RADIX_PMD_TABLE_SIZE; @@ -642,7 +661,8 @@ void __init radix__early_init_mmu(void) radix_init_iamr(); radix_init_pgtable(); - + /* Switch to the guard PID before turning on MMU */ + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -667,6 +687,7 @@ void radix__early_init_mmu_secondary(void) } radix_init_iamr(); + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -729,6 +750,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } +struct change_mapping_params { + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long aligned_start; + unsigned long aligned_end; +}; + +static int __meminit stop_machine_change_mapping(void *data) +{ + struct change_mapping_params *params = + (struct change_mapping_params *)data; + + if (!data) + return -1; + + spin_unlock(&init_mm.page_table_lock); + pte_clear(&init_mm, params->aligned_start, params->pte); + create_physical_mapping(params->aligned_start, params->start, -1); + create_physical_mapping(params->end, params->aligned_end, -1); + spin_lock(&init_mm.page_table_lock); + return 0; +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -757,6 +802,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } +/* + * clear the pte and potentially split the mapping helper + */ +static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, + unsigned long size, pte_t *pte) +{ + unsigned long mask = ~(size - 1); + unsigned long aligned_start = addr & mask; + unsigned long aligned_end = addr + size; + struct change_mapping_params params; + bool split_region = false; + + if ((end - addr) < size) { + /* + * We're going to clear the PTE, but not flushed + * the mapping, time to remap and flush. The + * effects if visible outside the processor or + * if we are running in code close to the + * mapping we cleared, we are in trouble. + */ + if (overlaps_kernel_text(aligned_start, addr) || + overlaps_kernel_text(end, aligned_end)) { + /* + * Hack, just return, don't pte_clear + */ + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " + "text, not splitting\n", addr, end); + return; + } + split_region = true; + } + + if (split_region) { + params.pte = pte; + params.start = addr; + params.end = end; + params.aligned_start = addr & ~(size - 1); + params.aligned_end = min_t(unsigned long, aligned_end, + (unsigned long)__va(memblock_end_of_DRAM())); + stop_machine(stop_machine_change_mapping, ¶ms, NULL); + return; + } + + pte_clear(&init_mm, addr, pte); +} + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -772,13 +863,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_huge(*pmd)) { - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pmd); + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -803,13 +888,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_huge(*pud)) { - if (!IS_ALIGNED(addr, PUD_SIZE) || - !IS_ALIGNED(next, PUD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pud); + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -819,7 +898,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, } } -static void remove_pagetable(unsigned long start, unsigned long end) +static void __meminit remove_pagetable(unsigned long start, unsigned long end) { unsigned long addr, next; pud_t *pud_base; @@ -835,13 +914,7 @@ static void remove_pagetable(unsigned long start, unsigned long end) continue; if (pgd_huge(*pgd)) { - if (!IS_ALIGNED(addr, PGDIR_SIZE) || - !IS_ALIGNED(next, PGDIR_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pgd); + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } @@ -853,12 +926,12 @@ static void remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __ref radix__create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { return create_physical_mapping(start, end, nid); } -int radix__remove_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) { remove_pagetable(start, end); return 0; @@ -889,7 +962,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, } #ifdef CONFIG_MEMORY_HOTPLUG -void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) +void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { remove_pagetable(start, start + page_size); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c9a623c..9bf659d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -57,11 +57,6 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3S_64 -#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) -#error TASK_SIZE_USER64 exceeds user VSID range -#endif -#endif #ifdef CONFIG_PPC_BOOK3S_64 /* @@ -82,6 +77,8 @@ unsigned long __pgd_index_size; EXPORT_SYMBOL(__pgd_index_size); unsigned long __pmd_cache_index; EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pud_cache_index; +EXPORT_SYMBOL(__pud_cache_index); unsigned long __pte_table_size; EXPORT_SYMBOL(__pte_table_size); unsigned long __pmd_table_size; @@ -471,12 +468,15 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index ba71c54..328737b 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -308,9 +308,9 @@ void thread_pkey_regs_init(struct thread_struct *thread) if (static_branch_likely(&pkey_disabled)) return; - write_amr(read_amr() & pkey_amr_uamor_mask); - write_iamr(read_iamr() & pkey_iamr_mask); - write_uamor(read_uamor() & pkey_amr_uamor_mask); + thread->amr = read_amr() & pkey_amr_uamor_mask; + thread->iamr = read_iamr() & pkey_iamr_mask; + thread->uamor = read_uamor() & pkey_amr_uamor_mask; } static inline bool pkey_allows_readwrite(int pkey) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 13cfe41..66577cc 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/smp.h> #include <linux/compiler.h> +#include <linux/context_tracking.h> #include <linux/mm_types.h> #include <asm/udbg.h> @@ -340,3 +341,110 @@ void slb_initialize(void) asm volatile("isync":::"memory"); } + +static void insert_slb_entry(unsigned long vsid, unsigned long ea, + int bpsize, int ssize) +{ + unsigned long flags, vsid_data, esid_data; + enum slb_index index; + int slb_cache_index; + + /* + * We are irq disabled, hence should be safe to access PACA. + */ + index = get_paca()->stab_rr; + + /* + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. + */ + if (index < (mmu_slb_size - 1)) + index++; + else + index = SLB_NUM_BOLTED; + + get_paca()->stab_rr = index; + + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; + vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); + esid_data = mk_esid_data(ea, ssize, index); + + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) + : "memory"); + + /* + * Now update slb cache entries + */ + slb_cache_index = get_paca()->slb_cache_ptr; + if (slb_cache_index < SLB_CACHE_ENTRIES) { + /* + * We have space in slb cache for optimized switch_slb(). + * Top 36 bits from esid_data as per ISA + */ + get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; + get_paca()->slb_cache_ptr++; + } else { + /* + * Our cache is full and the current cache content strictly + * doesn't indicate the active SLB conents. Bump the ptr + * so that switch_slb() will ignore the cache. + */ + get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; + } +} + +static void handle_multi_context_slb_miss(int context_id, unsigned long ea) +{ + struct mm_struct *mm = current->mm; + unsigned long vsid; + int bpsize; + + /* + * We are always above 1TB, hence use high user segment size. + */ + vsid = get_vsid(context_id, ea, mmu_highuser_ssize); + bpsize = get_slice_psize(mm, ea); + insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); +} + +void slb_miss_large_addr(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + unsigned long ea = regs->dar; + int context; + + if (REGION_ID(ea) != USER_REGION_ID) + goto slb_bad_addr; + + /* + * Are we beyound what the page table layout supports ? + */ + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + goto slb_bad_addr; + + /* Lower address should have been handled by asm code */ + if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) + goto slb_bad_addr; + + /* + * consider this as bad access if we take a SLB miss + * on an address above addr limit. + */ + if (ea >= current->mm->context.slb_addr_limit) + goto slb_bad_addr; + + context = get_ea_context(¤t->mm->context, ea); + if (!context) + goto slb_bad_addr; + + handle_multi_context_slb_miss(context, ea); + exception_exit(prev_state); + return; + +slb_bad_addr: + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + else + bad_page_fault(regs, ea, SIGSEGV); + exception_exit(prev_state); +} diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 2cf5ef3..a83fbd2 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) */ _GLOBAL(slb_allocate) /* - * check for bad kernel/user address - * (ea & ~REGION_MASK) >= PGTABLE_RANGE + * Check if the address falls within the range of the first context, or + * if we may need to handle multi context. For the first context we + * allocate the slb entry via the fast path below. For large address we + * branch out to C-code and see if additional contexts have been + * allocated. + * The test here is: + * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) */ - rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) + rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) bne- 8f srdi r9,r3,60 /* get region */ @@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 5: /* * Handle lpsizes - * r9 is get_paca()->context.low_slices_psize, r11 is index + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index */ - ld r9,PACALOWSLICESPSIZE(r13) - mr r11,r10 + srdi r11,r10,1 /* index */ + addi r9,r11,PACALOWSLICESPSIZE + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ 6: sldi r11,r11,2 /* index * 4 */ /* Extract the psize and multiply to get an array offset */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 23ec2c5..9cd87d1 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -37,32 +37,25 @@ #include <asm/hugetlb.h> static DEFINE_SPINLOCK(slice_convert_lock); -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; #ifdef DEBUG int _slice_debug = 1; -static void slice_print_mask(const char *label, struct slice_mask mask) +static void slice_print_mask(const char *label, const struct slice_mask *mask) { if (!_slice_debug) return; - pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices); - pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices); + pr_devel("%s low_slice: %*pbl\n", label, + (int)SLICE_NUM_LOW, &mask->low_slices); + pr_devel("%s high_slice: %*pbl\n", label, + (int)SLICE_NUM_HIGH, mask->high_slices); } #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0) #else -static void slice_print_mask(const char *label, struct slice_mask mask) {} +static void slice_print_mask(const char *label, const struct slice_mask *mask) {} #define slice_dbg(fmt...) #endif @@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, unsigned long end = start + len - 1; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); if (start < SLICE_LOW_TOP) { - unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) - (1u << GET_LOW_SLICE_INDEX(start)); @@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); +#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) start = SLICE_LOW_TOP; +#endif return !slice_area_is_free(mm, start, end - start); } @@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, unsigned long i; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); for (i = 0; i < SLICE_NUM_LOW; i++) if (!slice_low_has_vma(mm, i)) @@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, - unsigned long high_limit) +#ifdef CONFIG_PPC_BOOK3S_64 +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) { - unsigned char *hpsizes; - int index, mask_index; - unsigned long i; - u64 lpsizes; - - ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &mm->context.mask_64k; +#endif + if (psize == MMU_PAGE_4K) + return &mm->context.mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &mm->context.mask_16m; + if (psize == MMU_PAGE_16G) + return &mm->context.mask_16g; +#endif + BUG(); +} +#elif defined(CONFIG_PPC_8xx) +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) +{ + if (psize == mmu_virtual_psize) + return &mm->context.mask_base_psize; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_512K) + return &mm->context.mask_512k; + if (psize == MMU_PAGE_8M) + return &mm->context.mask_8m; +#endif + BUG(); +} +#else +#error "Must define the slice masks for page sizes supported by the platform" +#endif - lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == psize) - ret->low_slices |= 1u << i; +static bool slice_check_range_fits(struct mm_struct *mm, + const struct slice_mask *available, + unsigned long start, unsigned long len) +{ + unsigned long end = start + len - 1; + u64 low_slices = 0; - if (high_limit <= SLICE_LOW_TOP) - return; + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - __set_bit(i, ret->high_slices); + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(start)); } -} + if ((low_slices & available->low_slices) != low_slices) + return false; -static int slice_check_fit(struct mm_struct *mm, - struct slice_mask mask, struct slice_mask available) -{ - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - /* - * Make sure we just do bit compare only to the max - * addr limit and not the full bit map size. - */ - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; + unsigned long i; - bitmap_and(result, mask.high_slices, - available.high_slices, slice_count); + for (i = start_index; i < start_index + count; i++) { + if (!test_bit(i, available->high_slices)) + return false; + } + } - return (mask.low_slices & available.low_slices) == mask.low_slices && - bitmap_equal(result, mask.high_slices, slice_count); + return true; } static void slice_flush_segments(void *parm) { +#ifdef CONFIG_PPC64 struct mm_struct *mm = parm; unsigned long flags; @@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm) local_irq_save(flags); slb_flush_and_rebolt(); local_irq_restore(flags); +#endif } -static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +static void slice_convert(struct mm_struct *mm, + const struct slice_mask *mask, int psize) { int index, mask_index; /* Write the new slice psize bits */ - unsigned char *hpsizes; - u64 lpsizes; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *psize_mask, *old_mask; unsigned long i, flags; + int old_psize; slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); + psize_mask = slice_mask_for_size(mm, psize); + /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ spin_lock_irqsave(&slice_convert_lock, flags); lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (mask.low_slices & (1u << i)) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); + for (i = 0; i < SLICE_NUM_LOW; i++) { + if (!(mask->low_slices & (1u << i))) + continue; + + mask_index = i & 0x1; + index = i >> 1; - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + /* Update the slice_mask */ + old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + old_mask->low_slices &= ~(1u << i); + psize_mask->low_slices |= 1u << i; + + /* Update the sizes array */ + lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); + } hpsizes = mm->context.high_slices_psize; for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + if (!test_bit(i, mask->high_slices)) + continue; + mask_index = i & 0x1; index = i >> 1; - if (test_bit(i, mask.high_slices)) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | + + /* Update the slice_mask */ + old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + __clear_bit(i, old_mask->high_slices); + __set_bit(i, psize_mask->high_slices); + + /* Update the sizes array */ + hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } @@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz * 'available' slice_mark. */ static bool slice_scan_available(unsigned long addr, - struct slice_mask available, - int end, - unsigned long *boundary_addr) + const struct slice_mask *available, + int end, unsigned long *boundary_addr) { unsigned long slice; if (addr < SLICE_LOW_TOP) { slice = GET_LOW_SLICE_INDEX(addr); *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; - return !!(available.low_slices & (1u << slice)); + return !!(available->low_slices & (1u << slice)); } else { slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!test_bit(slice, available.high_slices); + return !!test_bit(slice, available->high_slices); } } static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, - struct slice_mask mask, int psize, + const struct slice_mask *mask, int psize, int topdown, unsigned long high_limit) { if (topdown) @@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, high_limit); } -static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_copy_mask(struct slice_mask *dst, + const struct slice_mask *src) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices |= src->low_slices; - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); + dst->low_slices = src->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH); } -static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_or_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices &= ~src->low_slices; + dst->low_slices = src1->low_slices | src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); +} - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); +static inline void slice_andnot_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) +{ + dst->low_slices = src1->low_slices & ~src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); } #ifdef CONFIG_PPC_64K_PAGES @@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown) { - struct slice_mask mask; struct slice_mask good_mask; struct slice_mask potential_mask; - struct slice_mask compat_mask; + const struct slice_mask *maskp; + const struct slice_mask *compat_maskp = NULL; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long page_size = 1UL << pshift; @@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, } if (high_limit > mm->context.slb_addr_limit) { + /* + * Increasing the slb_addr_limit does not require + * slice mask cache to be recalculated because it should + * be already initialised beyond the old address limit. + */ mm->context.slb_addr_limit = high_limit; + on_each_cpu(slice_flush_segments, mm, 1); } - /* - * init different masks - */ - mask.low_slices = 0; - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); - - /* silence stupid warning */; - potential_mask.low_slices = 0; - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); - - compat_mask.low_slices = 0; - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); - /* Sanity checks */ BUG_ON(mm->task_size == 0); BUG_ON(mm->context.slb_addr_limit == 0); @@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask, high_limit); - slice_print_mask(" good_mask", good_mask); + maskp = slice_mask_for_size(mm, psize); /* * Here "good" means slices that are already the right page size, @@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * search in good | compat | free, found => convert free. */ -#ifdef CONFIG_PPC_64K_PAGES - /* If we support combo pages, we can allow 64k pages in 4k slices */ - if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); + /* + * If we support combo pages, we can allow 64k pages in 4k slices + * The mask copies could be avoided in most cases here if we had + * a pointer to good mask for the next code to use. + */ + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); if (fixed) - slice_or_mask(&good_mask, &compat_mask); + slice_or_mask(&good_mask, maskp, compat_maskp); + else + slice_copy_mask(&good_mask, maskp); + } else { + slice_copy_mask(&good_mask, maskp); } -#endif + + slice_print_mask(" good_mask", &good_mask); + if (compat_maskp) + slice_print_mask(" compat_mask", compat_maskp); /* First check hint if it's valid or if we have MAP_FIXED */ if (addr != 0 || fixed) { - /* Build a mask for the requested range */ - slice_range_to_mask(addr, len, &mask); - slice_print_mask(" mask", mask); - /* Check if we fit in the good mask. If we do, we just return, * nothing else to do */ - if (slice_check_fit(mm, mask, good_mask)) { + if (slice_check_range_fits(mm, &good_mask, addr, len)) { slice_dbg(" fits good !\n"); - return addr; + newaddr = addr; + goto return_addr; } } else { /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, + newaddr = slice_find_area(mm, len, &good_mask, psize, topdown, high_limit); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, * we thus return directly */ slice_dbg(" found area at 0x%lx\n", newaddr); - return newaddr; + goto return_addr; } } /* @@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * empty and thus can be converted */ slice_mask_for_free(mm, &potential_mask, high_limit); - slice_or_mask(&potential_mask, &good_mask); - slice_print_mask(" potential", potential_mask); + slice_or_mask(&potential_mask, &potential_mask, &good_mask); + slice_print_mask(" potential", &potential_mask); - if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + if (addr != 0 || fixed) { + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { + slice_dbg(" fits potential !\n"); + newaddr = addr; + goto convert; + } } /* If we have MAP_FIXED and failed the above steps, then error out */ @@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, - psize, topdown, high_limit); - if (addr != -ENOMEM) { - slice_dbg(" found area at 0x%lx\n", addr); - return addr; + newaddr = slice_find_area(mm, len, &good_mask, + psize, topdown, high_limit); + if (newaddr != -ENOMEM) { + slice_dbg(" found area at 0x%lx\n", newaddr); + goto return_addr; } } /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, - psize, topdown, high_limit); + newaddr = slice_find_area(mm, len, &potential_mask, + psize, topdown, high_limit); #ifdef CONFIG_PPC_64K_PAGES - if (addr == -ENOMEM && psize == MMU_PAGE_64K) { + if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ - slice_or_mask(&potential_mask, &compat_mask); - addr = slice_find_area(mm, len, potential_mask, - psize, topdown, high_limit); + slice_or_mask(&potential_mask, &potential_mask, compat_maskp); + newaddr = slice_find_area(mm, len, &potential_mask, + psize, topdown, high_limit); } #endif - if (addr == -ENOMEM) + if (newaddr == -ENOMEM) return -ENOMEM; - slice_range_to_mask(addr, len, &mask); - slice_dbg(" found potential area at 0x%lx\n", addr); - slice_print_mask(" mask", mask); + slice_range_to_mask(newaddr, len, &potential_mask); + slice_dbg(" found potential area at 0x%lx\n", newaddr); + slice_print_mask(" mask", &potential_mask); convert: - slice_andnot_mask(&mask, &good_mask); - slice_andnot_mask(&mask, &compat_mask); - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { - slice_convert(mm, mask, psize); + /* + * Try to allocate the context before we do slice convert + * so that we handle the context allocation failure gracefully. + */ + if (need_extra_context(mm, newaddr)) { + if (alloc_extended_context(mm, newaddr) < 0) + return -ENOMEM; + } + + slice_andnot_mask(&potential_mask, &potential_mask, &good_mask); + if (compat_maskp && !fixed) + slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp); + if (potential_mask.low_slices || + (SLICE_NUM_HIGH && + !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { + slice_convert(mm, &potential_mask, psize); if (psize > MMU_PAGE_BASE) on_each_cpu(slice_flush_segments, mm, 1); } - return addr; + return newaddr; +return_addr: + if (need_extra_context(mm, newaddr)) { + if (alloc_extended_context(mm, newaddr) < 0) + return -ENOMEM; + } + return newaddr; } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); @@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) { - unsigned char *hpsizes; + unsigned char *psizes; int index, mask_index; - /* - * Radix doesn't use slice, but can get enabled along with MMU_SLICE - */ - if (radix_enabled()) { -#ifdef CONFIG_PPC_64K_PAGES - return MMU_PAGE_64K; -#else - return MMU_PAGE_4K; -#endif - } + VM_BUG_ON(radix_enabled()); + if (addr < SLICE_LOW_TOP) { - u64 lpsizes; - lpsizes = mm->context.low_slices_psize; + psizes = mm->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xf; + } else { + psizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = mm->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf; } EXPORT_SYMBOL_GPL(get_slice_psize); -/* - * This is called by hash_page when it needs to do a lazy conversion of - * an address space from real 64K pages to combo 4K pages (typically - * when hitting a non cacheable mapping on a processor or hypervisor - * that won't allow them for 64K pages). - * - * This is also called in init_new_context() to change back the user - * psize from whatever the parent context had it set to - * N.B. This may be called before mm->context.id has been set. - * - * This function will only change the content of the {low,high)_slice_psize - * masks, it will not flush SLBs as this shall be handled lazily by the - * caller. - */ -void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) +void slice_init_new_context_exec(struct mm_struct *mm) { - int index, mask_index; - unsigned char *hpsizes; - unsigned long flags, lpsizes; - unsigned int old_psize; - int i; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *mask; + unsigned int psize = mmu_virtual_psize; - slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); + slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm); - VM_BUG_ON(radix_enabled()); - spin_lock_irqsave(&slice_convert_lock, flags); - - old_psize = mm->context.user_psize; - slice_dbg(" old_psize=%d\n", old_psize); - if (old_psize == psize) - goto bail; + /* + * In the case of exec, use the default limit. In the + * case of fork it is just inherited from the mm being + * duplicated. + */ +#ifdef CONFIG_PPC64 + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#else + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; +#endif mm->context.user_psize = psize; - wmb(); + /* + * Set all slice psizes to the default. + */ lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == old_psize) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | - (((unsigned long)psize) << (mask_index * 4)); - } - - - - - slice_dbg(" lsps=%lx, hsps=%lx\n", - (unsigned long)mm->context.low_slices_psize, - (unsigned long)mm->context.high_slices_psize); + memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); - bail: - spin_unlock_irqrestore(&slice_convert_lock, flags); + /* + * Slice mask cache starts zeroed, fill the default size cache. + */ + mask = slice_mask_for_size(mm, psize); + mask->low_slices = ~0UL; + if (SLICE_NUM_HIGH) + bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, @@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, VM_BUG_ON(radix_enabled()); slice_range_to_mask(start, len, &mask); - slice_convert(mm, mask, psize); + slice_convert(mm, &mask, psize); } #ifdef CONFIG_HUGETLB_PAGE @@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, * for now as we only use slices with hugetlbfs enabled. This should * be fixed as the generic code gets fixed. */ -int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, +int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - struct slice_mask mask, available; + const struct slice_mask *maskp; unsigned int psize = mm->context.user_psize; - unsigned long high_limit = mm->context.slb_addr_limit; - if (radix_enabled()) - return 0; + VM_BUG_ON(radix_enabled()); - slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available, high_limit); + maskp = slice_mask_for_size(mm, psize); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { - struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); - slice_or_mask(&available, &compat_mask); + const struct slice_mask *compat_maskp; + struct slice_mask available; + + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + slice_or_mask(&available, maskp, compat_maskp); + return !slice_check_range_fits(mm, &available, addr, len); } #endif -#if 0 /* too verbose */ - slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", - mm, addr, len); - slice_print_mask(" mask", mask); - slice_print_mask(" available", available); -#endif - return !slice_check_fit(mm, mask, available); + return !slice_check_range_fits(mm, maskp, addr, len); } #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index e6016f4..a8b178d 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -112,13 +112,56 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -151,24 +194,25 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { asm volatile("ptesync": : :"memory"); - __tlbie_pid(pid, ric); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); -} - -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 1, rb, rs, ric, prs, r); + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + } + fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); } static inline void __tlbiel_va_range(unsigned long start, unsigned long end, @@ -203,22 +247,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, asm volatile("ptesync": : :"memory"); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static inline void __tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -237,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -248,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -311,6 +341,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_needs_flush_escalation(struct mm_struct *mm) +{ + /* + * P9 nest MMU has issues with the page walk cache + * caching PTEs and not flushing them properly when + * RIC = 0 for a PID/LPID invalidate + */ + return atomic_read(&mm->context.copros) != 0; +} + #ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { @@ -321,9 +361,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_TLB); - else + if (!mm_is_thread_local(mm)) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } @@ -435,10 +478,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } if (full) { - if (local) + if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } } else { bool hflush = false; unsigned long hstart, hend; @@ -465,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (hflush) __tlbie_va_range(hstart, hend, pid, HPAGE_PMD_SIZE, MMU_PAGE_2M); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } @@ -548,6 +596,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, } if (full) { + if (!local && mm_needs_flush_escalation(mm)) + also_pwc = true; + if (local) _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); else @@ -603,46 +654,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size) -{ - unsigned long rb,rs,prs,r; - unsigned long ap; - unsigned long ric = RIC_FLUSH_TLB; - - ap = mmu_get_ap(radix_get_mmu_psize(page_size)); - rb = gpa & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid_va); - -void radix__flush_tlb_lpid(unsigned long lpid) -{ - unsigned long rb,rs,prs,r; - unsigned long ric = RIC_FLUSH_ALL; - - rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* partition scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid); - void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -657,7 +668,7 @@ void radix__flush_tlb_all(void) rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ prs = 0; /* partition scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ asm volatile("ptesync": : :"memory"); diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 881ebd5..87d71dd 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, unsigned int psize; int ssize; real_pte_t rpte; - int i; + int i, offset; i = batch->index; @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, psize = get_slice_psize(mm, addr); /* Mask the address for the correct page size */ addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); + if (unlikely(psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ @@ -78,20 +82,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, * support 64k pages, this might be different from the * hardware page size encoded in the slice table. */ addr &= PAGE_MASK; + offset = PTRS_PER_PTE; } /* Build full vaddr */ if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); + vsid = get_user_vsid(&mm->context, addr, ssize); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); - rpte = __real_pte(__pte(pte), ptep); + rpte = __real_pte(__pte(pte), ptep, offset); /* * Check if we have an active batch on this CPU. If not, just diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 872d1f6..a9636d8 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; + case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ + PPC_LWZ_OFFS(r_A, r_skb, K); + break; case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 44d67b1..2668cc4 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId) /* Create cached_info and set spu_info[spu->number] to point to it. * spu->number is a system-wide value, not a per-node value. */ - info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { printk(KERN_ERR "SPU_PROF: " "%s, line %d: create vma_map failed\n", diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c index c579b16..f40e373 100644 --- a/arch/powerpc/oprofile/cell/vma_map.c +++ b/arch/powerpc/oprofile/cell/vma_map.c @@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, unsigned int size, unsigned int offset, unsigned int guard_ptr, unsigned int guard_val) { - struct vma_to_fileoffset_map *new = - kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); + struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", __func__, __LINE__); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index f89bbd5..e032aef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); + + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(mfspr(SPRN_SDAR))) + *addrp = 0; } static bool regs_sihv(struct pt_regs *regs) @@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) /* invalid entry */ continue; + /* + * BHRB rolling buffer could very much contain the kernel + * addresses at this point. Check the privileges before + * exporting it to userspace (avoid exposure of regions + * where we could have speculative execution) + */ + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(addr)) + continue; + /* Branches are read most recent first (ie. mfbhrb 0 is * the most recent branch). * There are two types of valid entries: @@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu) */ write_mmcr0(cpuhw, val); mb(); + isync(); /* * Disable instruction sampling if it was enabled @@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mb(); + isync(); } cpuhw->disabled = 1; cpuhw->n_added = 0; ebb_switch_out(mmcr0); + +#ifdef CONFIG_PPC64 + /* + * These are readable by userspace, may contain kernel + * addresses and are not switched by context switch, so clear + * them now to avoid leaking anything to userspace in general + * including to another process. + */ + if (ppmu->flags & PPMU_ARCH_207S) { + mtspr(SPRN_SDAR, 0); + mtspr(SPRN_SIAR, 0); + } +#endif } local_irq_restore(flags); @@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } +static bool is_event_blacklisted(u64 ev) +{ + int i; + + for (i=0; i < ppmu->n_blacklist_ev; i++) { + if (ppmu->blacklist_ev[i] == ev) + return true; + } + + return false; +} + static int power_pmu_event_init(struct perf_event *event) { u64 ev; @@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event) ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return -EOPNOTSUPP; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) return err; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; break; case PERF_TYPE_RAW: ev = event->attr.config; + + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) + return -EINVAL; break; default: return -ENOENT; diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index e99c6bf..7de344b 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT, 0x10012) EVENT(PM_BR_2PATH, 0x20036) /* ALternate branch event that are not strongly biased */ EVENT(PM_BR_2PATH_ALT, 0x40036) + +/* Blacklisted events */ +EVENT(PM_MRK_ST_DONE_L2, 0x10134) +EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056) +EVENT(PM_FLOP_CMPL, 0x100f4) +EVENT(PM_MRK_NTF_FIN, 0x20112) +EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024) +EVENT(PM_IFETCH_THROTTLE, 0x3405e) +EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c) +EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056) +EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c) +EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014) +EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c) +EVENT(PM_DTLB_MISS_16G, 0x1c058) +EVENT(PM_DERAT_MISS_2M, 0x1c05a) +EVENT(PM_DTLB_MISS_2M, 0x1c05c) +EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c) +EVENT(PM_DTLB_MISS_4K, 0x2c056) +EVENT(PM_DERAT_MISS_1G, 0x2c05a) +EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152) +EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156) +EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e) +EVENT(PM_DTLB_MISS_64K, 0x3c056) +EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152) +EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156) +EVENT(PM_DTLB_MISS_16M, 0x4c056) +EVENT(PM_DTLB_MISS_1G, 0x4c05a) +EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 24b5b5b..2ca0b33 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -101,9 +101,45 @@ enum { #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +/* Nasty Power9 specific hack */ +#define PVR_POWER9_CUMULUS 0x00002000 + /* PowerISA v2.07 format attribute structure*/ extern struct attribute_group isa207_pmu_format_group; +int p9_dd21_bl_ev[] = { + PM_MRK_ST_DONE_L2, + PM_RADIX_PWC_L1_HIT, + PM_FLOP_CMPL, + PM_MRK_NTF_FIN, + PM_RADIX_PWC_L2_HIT, + PM_IFETCH_THROTTLE, + PM_MRK_L2_TM_ST_ABORT_SISTER, + PM_RADIX_PWC_L3_HIT, + PM_RUN_CYC_SMT2_MODE, + PM_TM_TX_PASS_RUN_INST, + PM_DISP_HELD_SYNC_HOLD, +}; + +int p9_dd22_bl_ev[] = { + PM_DTLB_MISS_16G, + PM_DERAT_MISS_2M, + PM_DTLB_MISS_2M, + PM_MRK_DTLB_MISS_1G, + PM_DTLB_MISS_4K, + PM_DERAT_MISS_1G, + PM_MRK_DERAT_MISS_2M, + PM_MRK_DTLB_MISS_4K, + PM_MRK_DTLB_MISS_16G, + PM_DTLB_MISS_64K, + PM_MRK_DERAT_MISS_1G, + PM_MRK_DTLB_MISS_64K, + PM_DISP_HELD_SYNC_HOLD, + PM_DTLB_MISS_16M, + PM_DTLB_MISS_1G, + PM_MRK_DTLB_MISS_16M, +}; + /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { { PM_INST_DISP, PM_INST_DISP_ALT }, @@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = { static int __init init_power9_pmu(void) { int rc = 0; + unsigned int pvr = mfspr(SPRN_PVR); /* Comes from cpu_specs[] */ if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) return -ENODEV; + /* Blacklist events */ + if (!(pvr & PVR_POWER9_CUMULUS)) { + if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) { + power9_pmu.blacklist_ev = p9_dd21_bl_ev; + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev); + } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) { + power9_pmu.blacklist_ev = p9_dd22_bl_ev; + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev); + } + } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { /* * Since PM_INST_CMPL may not provide right counts in all diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c index d50417e..96aaae6 100644 --- a/arch/powerpc/platforms/4xx/msi.c +++ b/arch/powerpc/platforms/4xx/msi.c @@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); - msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL); + msi = kzalloc(sizeof(*msi), GFP_KERNEL); if (!msi) { dev_err(&dev->dev, "No memory for MSI structure\n"); return -ENOMEM; @@ -241,7 +241,8 @@ static int ppc4xx_msi_probe(struct platform_device *dev) if (!msi_irqs) return -ENODEV; - if (ppc4xx_setup_pcieh_hw(dev, res, msi)) + err = ppc4xx_setup_pcieh_hw(dev, res, msi); + if (err) goto error_out; err = ppc4xx_msi_init_allocator(dev, msi); diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c index 85d9e37..69d9f60 100644 --- a/arch/powerpc/platforms/4xx/ocm.c +++ b/arch/powerpc/platforms/4xx/ocm.c @@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align, if (IS_ERR_VALUE(offset)) continue; - ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL); + ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL); if (!ocm_blk) { printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block"); rh_free(ocm_reg->rh, offset); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index e1274db..2188d69 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd) static void cpm_cascade(struct irq_desc *desc) { - struct irq_chip *chip = irq_desc_get_chip(desc); - int cascade_irq = cpm_get_irq(); - - if (cascade_irq >= 0) - generic_handle_irq(cascade_irq); - - chip->irq_eoi(&desc->irq_data); + generic_handle_irq(cpm_get_irq()); } /* Initialize the internal interrupt controllers. The number of diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a429d85..5a8b1bf 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -326,6 +326,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool default y if PPC_BOOK3S_64 + default y if PPC_8xx && HUGETLB_PAGE default n config PPC_HAVE_PMU_SUPPORT diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 6ea3f24..326d34e 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device) pr_devel("axon_msi: setting up dn %pOF\n", dn); - msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); + msic = kzalloc(sizeof(*msic), GFP_KERNEL); if (!msic) { printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n", dn); diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index d1e61e2..1200d0d 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n", np); - priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { pr_err("SPIDERPCI-IOWA:" "Can't allocate struct spiderpci_iowa_private"); diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index b847e94..d9de848 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa) struct spu_lscsa *lscsa; unsigned char *p; - lscsa = vzalloc(sizeof(struct spu_lscsa)); + lscsa = vzalloc(sizeof(*lscsa)); if (!lscsa) return -ENOMEM; csa->lscsa = lscsa; diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index ade8382..7206f3f 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base) out_be32(io_base + FLIPPER_ICR, 0xffffffff); } -struct irq_domain * __init flipper_pic_init(struct device_node *np) +static struct irq_domain * __init flipper_pic_init(struct device_node *np) { struct device_node *pi; struct irq_domain *irq_domain = NULL; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index 7feb325..5c7e7ce 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -169,7 +169,7 @@ static int ug_getc(void) /* * Transmits a character. */ -void ug_udbg_putc(char ch) +static void ug_udbg_putc(char ch) { ug_putc(ch); } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 3408f31..fa89f30 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) const u32 *psteps, *prate, *addrp; u32 steps; - host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL); + host = kzalloc(sizeof(*host), GFP_KERNEL); if (host == NULL) { printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n", np); diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index df3c93b..e0462fe 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata, while (length >= 12) { /* Allocate a structure */ - func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL); + func = kzalloc(sizeof(*func), GFP_KERNEL); if (func == NULL) goto bail; kref_init(&func->ref); @@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np, return -EBUSY; } - dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) { DBG("pmf: no memory !\n"); return -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 6c9d519..703a350 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o -obj-$(CONFIG_PPC_FTW) += nx-ftw.o obj-$(CONFIG_OCXL_BASE) += ocxl.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 33c86c1..ddfc354 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose, dev_pe = dev_pe->parent; while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE); - ret = eeh_ops->get_state(dev_pe, NULL); - if (ret <= 0 || (ret & active_flags) == active_flags) { + if (ret <= 0 || eeh_state_active(ret)) { dev_pe = dev_pe->parent; continue; } @@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) struct eeh_pe *phb_pe, *parent_pe; __be64 frozen_pe_no; __be16 err_type, severity; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); long rc; int state, ret = EEH_NEXT_ERR_NONE; @@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) /* Frozen parent PE ? */ state = eeh_ops->get_state(parent_pe, NULL); - if (state > 0 && - (state & active_flags) != active_flags) + if (state > 0 && !eeh_state_active(state)) *pe = parent_pe; /* Next parent level */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 5b2ca71..d9e366b 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -24,6 +24,7 @@ #include <asm/code-patching.h> #include <asm/smp.h> #include <asm/runlatch.h> +#include <asm/dbell.h> #include "powernv.h" #include "subcore.h" @@ -387,6 +388,82 @@ void power9_idle(void) power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * This is used in working around bugs in thread reconfiguration + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional + * memory and the way that XER[SO] is checkpointed. + * This function forces the core into SMT4 in order by asking + * all other threads not to stop, and sending a message to any + * that are in a stop state. + * Must be called with preemption disabled. + * + * DO NOT call this unless cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG) is + * true; otherwise this function will hang the system, due to the + * optimization in power9_idle_stop. + */ +void pnv_power9_force_smt4_catch(void) +{ + int cpu, cpu0, thr; + int awake_threads = 1; /* this thread is awake */ + int poke_threads = 0; + int need_awake = threads_per_core; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); + } + /* order setting dont_stop vs testing requested_psscr */ + mb(); + for (thr = 0; thr < threads_per_core; ++thr) { + if (!paca_ptrs[cpu0+thr]->requested_psscr) + ++awake_threads; + else + poke_threads |= (1 << thr); + } + + /* If at least 3 threads are awake, the core is in SMT4 already */ + if (awake_threads < need_awake) { + /* We have to wake some threads; we'll use msgsnd */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (poke_threads & (1 << thr)) { + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, + paca_ptrs[cpu0+thr]->hw_cpu_id); + } + } + /* now spin until at least 3 threads are awake */ + do { + for (thr = 0; thr < threads_per_core; ++thr) { + if ((poke_threads & (1 << thr)) && + !paca_ptrs[cpu0+thr]->requested_psscr) { + ++awake_threads; + poke_threads &= ~(1 << thr); + } + } + } while (awake_threads < need_awake); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); + +void pnv_power9_force_smt4_release(void) +{ + int cpu, cpu0, thr; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + + /* clear all the dont_stop flags */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + #ifdef CONFIG_HOTPLUG_CPU static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) { diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 0a253b6..69a4f9e 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -410,6 +410,11 @@ struct npu_context { void *priv; }; +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + /* * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC * if none are available. @@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) return i; } @@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu) static void put_mmio_atsd_reg(struct npu *npu, int reg) { - clear_bit(reg, &npu->mmio_atsd_usage); + clear_bit_unlock(reg, &npu->mmio_atsd_usage); } /* MMIO ATSD register offsets */ #define XTS_ATSD_AVA 1 #define XTS_ATSD_STAT 2 -static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, - unsigned long va) +static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, + unsigned long launch, unsigned long va) { - int mmio_atsd_reg; - - do { - mmio_atsd_reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } while (mmio_atsd_reg < 0); + struct npu *npu = mmio_atsd_reg->npu; + int reg = mmio_atsd_reg->reg; __raw_writeq(cpu_to_be64(va), - npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA); + npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); eieio(); - __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]); - - return mmio_atsd_reg; + __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]); } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) +static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* IS set to invalidate matching PID */ + launch = PPC_BIT(12); - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); + /* PRS set to process-scoped */ + launch |= PPC_BIT(13); - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); - /* Invalidating the entire process doesn't use a va */ - return mmio_launch_invalidate(npu, launch, 0); + /* Invalidating the entire process doesn't use a va */ + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); + } } -static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid, bool flush) +static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long va, unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate target VA */ - launch = 0; + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; - /* PRS set to process scoped */ - launch |= PPC_BIT(13); + /* IS set to invalidate target VA */ + launch = 0; - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* PRS set to process scoped */ + launch |= PPC_BIT(13); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - return mmio_launch_invalidate(npu, launch, va); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); + } } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) { struct npu *npu; int i, reg; @@ -522,16 +531,67 @@ static void mmio_invalidate_wait( reg = mmio_atsd_reg[i].reg; while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) cpu_relax(); + } +} - put_mmio_atsd_reg(npu, reg); +/* + * Acquires all the address translation shootdown (ATSD) registers required to + * launch an ATSD on all links this npu_context is active on. + */ +static void acquire_atsd_reg(struct npu_context *npu_context, + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i, j; + struct npu *npu; + struct pci_dev *npdev; + struct pnv_phb *nphb; + for (i = 0; i <= max_npu2_index; i++) { + mmio_atsd_reg[i].reg = -1; + for (j = 0; j < NV_MAX_LINKS; j++) { + /* + * There are no ordering requirements with respect to + * the setup of struct npu_context, but to ensure + * consistent behaviour we need to ensure npdev[][] is + * only read once. + */ + npdev = READ_ONCE(npu_context->npdev[i][j]); + if (!npdev) + continue; + + nphb = pci_bus_to_host(npdev->bus)->private_data; + npu = &nphb->npu; + mmio_atsd_reg[i].npu = npu; + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + while (mmio_atsd_reg[i].reg < 0) { + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + cpu_relax(); + } + break; + } + } +} + +/* + * Release previously acquired ATSD registers. To avoid deadlocks the registers + * must be released in the same order they were acquired above in + * acquire_atsd_reg. + */ +static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i; + + for (i = 0; i <= max_npu2_index; i++) { /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. + * We can't rely on npu_context->npdev[][] being the same here + * as when acquire_atsd_reg() was called, hence we use the + * values stored in mmio_atsd_reg during the acquire phase + * rather than re-reading npdev[][]. */ - if (flush) - mmio_invalidate_pid(npu, 0, true); + if (mmio_atsd_reg[i].reg < 0) + continue; + + put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); } } @@ -542,10 +602,6 @@ static void mmio_invalidate_wait( static void mmio_invalidate(struct npu_context *npu_context, int va, unsigned long address, bool flush) { - int i, j; - struct npu *npu; - struct pnv_phb *nphb; - struct pci_dev *npdev; struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; @@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, * Loop over all the NPUs this process is active on and launch * an invalidate. */ - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - npdev = npu_context->npdev[i][j]; - if (!npdev) - continue; - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - mmio_atsd_reg[i].npu = npu; - - if (va) - mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid, - flush); - else - mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid, flush); - - /* - * The NPU hardware forwards the shootdown to all GPUs - * so we only have to launch one shootdown per NPU. - */ - break; - } + acquire_atsd_reg(npu_context, mmio_atsd_reg); + if (va) + mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); + else + mmio_invalidate_pid(mmio_atsd_reg, pid, flush); + + mmio_invalidate_wait(mmio_atsd_reg); + if (flush) { + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); } - - mmio_invalidate_wait(mmio_atsd_reg, flush); - if (flush) - /* Wait for the flush to complete */ - mmio_invalidate_wait(mmio_atsd_reg, false); + release_atsd_reg(mmio_atsd_reg); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); + nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); + if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", + &nvlink_index))) + return ERR_PTR(-ENODEV); + if (!mm || mm->context.id == 0) { /* * Kernel thread contexts are not supported and context id 0 is @@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, */ npu_context = mm->context.npu_context; if (!npu_context) { + rc = -ENOMEM; npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (!npu_context) - return ERR_PTR(-ENOMEM); + if (npu_context) { + kref_init(&npu_context->kref); + npu_context->mm = mm; + npu_context->mn.ops = &nv_nmmu_notifier_ops; + rc = __mmu_notifier_register(&npu_context->mn, mm); + } + + if (rc) { + kfree(npu_context); + opal_npu_destroy_context(nphb->opal_id, mm->context.id, + PCI_DEVID(gpdev->bus->number, + gpdev->devfn)); + return ERR_PTR(rc); + } mm->context.npu_context = npu_context; - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - __mmu_notifier_register(&npu_context->mn, mm); - kref_init(&npu_context->kref); } else { - kref_get(&npu_context->kref); + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); } npu_context->release_cb = cb; npu_context->priv = priv; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); - npu_context->npdev[npu->index][nvlink_index] = npdev; + + /* + * npdev is a pci_dev pointer setup by the PCI code. We assign it to + * npdev[][] to indicate to the mmu notifiers that an invalidation + * should also be sent over this nvlink. The notifiers don't use any + * other fields in npu_context, so we just need to ensure that when they + * deference npu_context->npdev[][] it is either a valid pointer or + * NULL. + */ + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); if (!nphb->npu.nmmu_flush) { /* @@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return; - npu_context->npdev[npu->index][nvlink_index] = NULL; + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); kref_put(&npu_context->kref, pnv_npu2_release_context); diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 2fa3ac8..1cb0b89 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -418,12 +418,12 @@ static int alloc_image_buf(char *buffer, size_t count) void *addr; int size; - if (count < sizeof(struct image_header_t)) { + if (count < sizeof(image_header)) { pr_warn("FLASH: Invalid candidate image\n"); return -EINVAL; } - memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t)); + memcpy(&image_header, (void *)buffer, sizeof(image_header)); image_data.size = be32_to_cpu(image_header.size); pr_debug("FLASH: Candidate image size = %u\n", image_data.size); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index c9e1a4f..4efc95b 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb, pr_err("HMI: out of memory, Opal message event not handled\n"); return -ENOMEM; } - memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt)); spin_lock_irqsave(&opal_hmi_evt_lock, flags); list_add(&msg_node->list, &opal_hmi_evt_list); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index dd4c9b8..2a14fda 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node, if (nr_chips <= 0) return -ENODEV; - base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL); + base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL); if (!base_addr_arr) return -ENOMEM; - chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL); + chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL); if (!chipid_arr) return -ENOMEM; @@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info), - GFP_KERNEL); + pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; @@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) u32 offset; /* memory for pmu */ - pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL); + pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr) return -ENOMEM; @@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void) const struct cpumask *l_cpumask; get_online_cpus(); - for_each_online_node(nid) { + for_each_node_with_cpus(nid) { l_cpumask = cpumask_of_node(nid); - cpu = cpumask_first(l_cpumask); + cpu = cpumask_first_and(l_cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + continue; opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(cpu)); } diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 8ddc1ac..dcb42bc 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb, "handled\n"); return -ENOMEM; } - memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); + memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); spin_lock_irqsave(&opal_mem_err_lock, flags); list_add(&msg_node->list, &opal_memory_err_list); diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 9db4398..ba2ff06 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -59,6 +59,10 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); } + + if (rc) + return -EIO; + *index += count; return count; } diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c index 7313b7f..74986b3 100644 --- a/arch/powerpc/platforms/powernv/opal-psr.c +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -136,7 +136,7 @@ void __init opal_psr_init(void) return; } - psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr), + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs), GFP_KERNEL); if (!psr_attrs) return; diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 7e5a235..541c9ea 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void) if (!nr_attrs) continue; - sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr), + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs), GFP_KERNEL); if (!sgs[i].sgattrs) goto out_sgs_sgattrs; sgs[i].sg.attrs = kcalloc(nr_attrs + 1, - sizeof(struct attribute *), + sizeof(*sgs[i].sg.attrs), GFP_KERNEL); if (!sgs[i].sg.attrs) { diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 1b2936b..3da30c2 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); +OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 81c0a94..22d5e11 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count) __func__, dev); return SCOM_MAP_INVALID; } - m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL); + m = kmalloc(sizeof(*m), GFP_KERNEL); if (!m) return NULL; m->chip = be32_to_cpup(gcid); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c151827..516e23d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) * opal to trigger checkstop explicitly for error analysis. * The FSP PRD component would have already got notified * about this error through other channels. + * 4. We are running on a newer skiboot that by default does + * not cause a checkstop, drops us back to the kernel to + * extract context and state at the time of the error. */ - ppc_md.restart(NULL); + panic(msg); } int opal_machine_check(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 94498a0..cee003d 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -16,14 +16,6 @@ #include "pci.h" -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) { struct pci_controller *hose = pci_bus_to_host(dev->bus); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e476..3f9c69d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) s64 rc; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV;; + return -ENODEV; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { @@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe( static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, int num, struct iommu_table *tbl) { + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); + int num2 = (num == 0) ? 1 : 0; long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); if (ret) return ret; - ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl); - if (ret) + if (table_group->tables[num2]) + pnv_npu_unset_window(npe, num2); + + ret = pnv_npu_set_window(npe, num, tbl); + if (ret) { pnv_pci_ioda2_unset_window(table_group, num); + if (table_group->tables[num2]) + pnv_npu_set_window(npe, num2, + table_group->tables[num2]); + } return ret; } @@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window( struct iommu_table_group *table_group, int num) { + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); + int num2 = (num == 0) ? 1 : 0; long ret = pnv_pci_ioda2_unset_window(table_group, num); if (ret) return ret; - return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num); + if (!npe->table_group.tables[num]) + return 0; + + ret = pnv_npu_unset_window(npe, num); + if (ret) + return ret; + + if (table_group->tables[num2]) + ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); + + return ret; } static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) @@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb = memblock_virt_alloc(sizeof(*phb), 0); /* Allocate PCI controller */ phb->hose = hose = pcibios_alloc_controller(np); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 69d102c..b265ecc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -18,6 +18,7 @@ #include <linux/io.h> #include <linux/msi.h> #include <linux/iommu.h> +#include <linux/sched/mm.h> #include <asm/sections.h> #include <asm/io.h> @@ -38,6 +39,7 @@ #include "pci.h" static DEFINE_MUTEX(p2p_mutex); +static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { @@ -1092,6 +1094,139 @@ out: } EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return of_node_get(hose->dn); +} +EXPORT_SYMBOL(pnv_pci_get_phb_node); + +int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind) +{ + struct device_node *np; + const __be32 *prop; + struct pnv_ioda_pe *pe; + uint16_t window_id; + int rc; + + if (!radix_enabled()) + return -ENXIO; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENXIO; + + prop = of_get_property(np, "ibm,phb-indications", NULL); + of_node_put(np); + + if (!prop || !prop[1]) + return -ENXIO; + + *asnind = (u64)be32_to_cpu(prop[1]); + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + /* Increase real window size to accept as_notify messages. */ + window_id = (pe->pe_number << 1 ) + 1; + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number, + window_id, pe->tce_bypass_base, + (uint64_t)1 << 48); + return opal_error_code(rc); +} +EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel); + +int pnv_pci_disable_tunnel(struct pci_dev *dev) +{ + struct pnv_ioda_pe *pe; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + /* Restore default real window size. */ + pnv_pci_ioda2_set_bypass(pe, true); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel); + +int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) +{ + __be64 val; + struct pci_controller *hose; + struct pnv_phb *phb; + u64 tunnel_bar; + int rc; + + if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + + hose = pci_bus_to_host(dev->bus); + phb = hose->private_data; + + mutex_lock(&tunnel_mutex); + rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + tunnel_bar = be64_to_cpu(val); + if (enable) { + /* + * Only one device per PHB can use atomics. + * Our policy is first-come, first-served. + */ + if (tunnel_bar) { + if (tunnel_bar != addr) + rc = -EBUSY; + else + rc = 0; /* Setting same address twice is ok */ + goto out; + } + } else { + /* + * The device that owns atomics and wants to release + * them must pass the same address with enable == 0. + */ + if (tunnel_bar != addr) { + rc = -EPERM; + goto out; + } + addr = 0x0ULL; + } + rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); + rc = opal_error_code(rc); +out: + mutex_unlock(&tunnel_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); + +#ifdef CONFIG_PPC64 /* for thread.tidr */ +int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid, + u32 *tid) +{ + struct mm_struct *mm = NULL; + + if (task == NULL) + return -EINVAL; + + mm = get_task_mm(task); + if (mm == NULL) + return -EINVAL; + + *pid = mm->context.id; + mmput(mm); + + *tid = task->thread.tidr; + *lpid = mfspr(SPRN_LPID); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info); +#endif + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index b62ca02..5f96328 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -38,53 +38,92 @@ #include <asm/smp.h> #include <asm/tm.h> #include <asm/setup.h> +#include <asm/security_features.h> #include "powernv.h" + +static bool fw_feature_is(const char *state, const char *name, + struct device_node *fw_features) +{ + struct device_node *np; + bool rc = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + rc = of_property_read_bool(np, state); + of_node_put(np); + } + + return rc; +} + +static void init_fw_feat_flags(struct device_node *np) +{ + if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); + + if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + static void pnv_setup_rfi_flush(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; - int enable; + bool enable; /* Default to fallback in case fw-features are not available */ type = L1D_FLUSH_FALLBACK; - enable = 1; np = of_find_node_by_name(NULL, "ibm,opal"); fw_features = of_get_child_by_name(np, "fw-features"); of_node_put(np); if (fw_features) { - np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); - if (np && of_property_read_bool(np, "enabled")) - type = L1D_FLUSH_MTTRIG; + init_fw_feat_flags(fw_features); + of_node_put(fw_features); - of_node_put(np); + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + type = L1D_FLUSH_MTTRIG; - np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); - if (np && of_property_read_bool(np, "enabled")) + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) type = L1D_FLUSH_ORI; - - of_node_put(np); - - /* Enable unless firmware says NOT to */ - enable = 2; - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - of_node_put(np); - - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - of_node_put(np); - of_node_put(fw_features); } - setup_rfi_flush(type, enable > 0); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); + + setup_rfi_flush(type, enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index ca22f1e..4f7276e 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window) return; -free_name: - kfree(window->dbgname); - window->dbgname = NULL; - remove_dir: debugfs_remove_recursive(window->dbgdir); window->dbgdir = NULL; + +free_name: + kfree(window->dbgname); + window->dbgname = NULL; } void vas_instance_init_dbgdir(struct vas_instance *vinst) { struct dentry *d; + vas_init_dbgdir(); if (!vas_debugfs) return; @@ -201,8 +202,18 @@ free_name: vinst->dbgdir = NULL; } +/* + * Set up the "root" VAS debugfs dir. Return if we already set it up + * (or failed to) in an earlier instance of VAS. + */ void vas_init_dbgdir(void) { + static bool first_time = true; + + if (!first_time) + return; + + first_time = false; vas_debugfs = debugfs_create_dir("vas", NULL); if (IS_ERR(vas_debugfs)) vas_debugfs = NULL; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h new file mode 100644 index 0000000..a449b9f --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vas + +#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _VAS_TRACE_H +#include <linux/tracepoint.h> +#include <linux/sched.h> +#include <asm/vas.h> + +TRACE_EVENT( vas_rx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_rx_win_attr *rxattr), + + TP_ARGS(tsk, vasid, cop, rxattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_rx_win_attr *, rxattr) + __field(int, lnotify_lpid) + __field(int, lnotify_pid) + __field(int, lnotify_tid) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lnotify_lpid = rxattr->lnotify_lpid; + __entry->lnotify_pid = rxattr->lnotify_pid; + __entry->lnotify_tid = rxattr->lnotify_tid; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lnotify_lpid, __entry->lnotify_pid, + __entry->lnotify_tid) +); + +TRACE_EVENT( vas_tx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_tx_win_attr *txattr), + + TP_ARGS(tsk, vasid, cop, txattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_tx_win_attr *, txattr) + __field(int, lpid) + __field(int, pidr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lpid = txattr->lpid; + __entry->pidr = txattr->pidr; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lpid, __entry->pidr) +); + +TRACE_EVENT( vas_paste_crb, + + TP_PROTO(struct task_struct *tsk, + struct vas_window *win), + + TP_ARGS(tsk, win), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(struct vas_window *, win) + __field(int, pid) + __field(int, vasid) + __field(int, winid) + __field(unsigned long, paste_kaddr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = win->vinst->vas_id; + __entry->winid = win->winid; + __entry->paste_kaddr = (unsigned long)win->paste_kaddr + ), + + TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n", + __entry->pid, __entry->vasid, __entry->winid, + __entry->paste_kaddr) +); + +#endif /* _VAS_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv +#define TRACE_INCLUDE_FILE vas-trace +#include <trace/define_trace.h> diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 2b3eb01..ff9f488 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -21,6 +21,9 @@ #include "vas.h" #include "copy-paste.h" +#define CREATE_TRACE_POINTS +#include "vas-trace.h" + /* * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. @@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_winctx winctx; struct vas_instance *vinst; + trace_vas_rx_win_open(current, vasid, cop, rxattr); + if (!rx_win_args_valid(cop, rxattr)) return ERR_PTR(-EINVAL); @@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_winctx winctx; struct vas_instance *vinst; + trace_vas_tx_win_open(current, vasid, cop, attr); + if (!tx_win_args_valid(cop, attr)) return ERR_PTR(-EINVAL); @@ -1063,16 +1070,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = PTR_ERR(txwin->paste_kaddr); goto free_window; } + } else { + /* + * A user mapping must ensure that context switch issues + * CP_ABORT for this thread. + */ + rc = set_thread_uses_vas(); + if (rc) + goto free_window; } - /* - * Now that we have a send window, ensure context switch issues - * CP_ABORT for this thread. - */ - rc = -EINVAL; - if (set_thread_uses_vas() < 0) - goto free_window; - set_vinst_win(vinst, txwin); return txwin; @@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) void *addr; uint64_t val; + trace_vas_paste_crb(current, txwin); + /* * Only NX windows are supported for now and hardware assumes * report-enable flag is set for NX windows. Ensure software diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index aebbe95..5a2b24c 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -160,8 +160,6 @@ static int __init vas_init(void) int found = 0; struct device_node *dn; - vas_init_dbgdir(); - platform_driver_register(&vas_driver); for_each_compatible_node(dn, NULL, "ibm,vas") { @@ -169,8 +167,10 @@ static int __init vas_init(void) found++; } - if (!found) + if (!found) { + platform_driver_unregister(&vas_driver); return -ENODEV; + } pr_devel("Found %d instances\n", found); diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 7f870ec..8c7009d 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, int result; struct dma_chunk *c; - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); - + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) { result = -ENOMEM; goto fail_alloc; @@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__, phys_addr, ps3_mm_phys_to_lpar(phys_addr), len); - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); - + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) { result = -ENOMEM; goto fail_alloc; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 357471a..6ef77ca 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@ #include <asm/xics.h> #include <asm/xive.h> #include <asm/plpar_wrappers.h> +#include <asm/topology.h> #include "pseries.h" #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np) BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); set_hard_smp_processor_id(cpu, -1); + update_numa_cpu_lookup_table(cpu, -1); break; } if (cpu >= nr_cpu_ids) @@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } -extern int find_and_online_cpu_nid(int cpu); - static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b6d2ecc..adb996e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, want_v = hpte_encode_avpn(vpn, psize, ssize); - pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", - want_v, slot, flags, psize); - flags = (newpp & 7) | H_AVPN; if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", + want_v, slot, flags, psize); + lpar_rc = plpar_pte_protect(flags, slot, want_v); if (lpar_rc == H_NOT_FOUND) { @@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift) return 0; } -/* Actually only used for radix, so far */ static int pseries_lpar_register_process_table(unsigned long base, unsigned long page_size, unsigned long table_size) { long rc; - unsigned long flags = PROC_TABLE_NEW; + unsigned long flags = 0; + if (table_size) + flags |= PROC_TABLE_NEW; if (radix_enabled()) flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; + else + flags |= PROC_TABLE_HPT_SLB; for (;;) { rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, page_size, table_size); @@ -760,6 +763,7 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + register_process_table = pseries_lpar_register_process_table; if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 0f7fb71..8a8033a 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -348,6 +348,9 @@ void post_mobility_fixup(void) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + /* Possibly switch to a new RFI flush type */ + pseries_setup_rfi_flush(); + return; } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 1ae1d9f..60db2ee 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs); #ifdef CONFIG_SMP extern void smp_init_pseries(void); + +/* Get state of physical CPU from query_cpu_stopped */ +int smp_query_cpu_stopped(unsigned int pcpu); +#define QCSS_STOPPED 0 +#define QCSS_STOPPING 1 +#define QCSS_NOT_STOPPED 2 +#define QCSS_HARDWARE_ERROR -1 +#define QCSS_HARDWARE_BUSY -2 #else static inline void smp_init_pseries(void) { }; #endif @@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +void pseries_setup_rfi_flush(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 81d8614..5e1ef91 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id); /* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +int __init init_ras_hotplug_IRQ(void) +{ + struct device_node *np; + + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + + return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/* * Initialize handlers for the set of interrupts caused by hardware errors * and power system events. */ @@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void) of_node_put(np); } - /* Hotplug Events */ - np = of_find_node_by_path("/event-sources/hot-plug-events"); - if (np != NULL) { - if (dlpar_workqueue_init() == 0) - request_event_sources_irqs(np, ras_hotplug_interrupt, - "RAS_HOTPLUG"); - of_node_put(np); - } - /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a66005a..98bca8d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -68,6 +68,7 @@ #include <asm/plpar_wrappers.h> #include <asm/kexec.h> #include <asm/isa-bridge.h> +#include <asm/security_features.h> #include "pseries.h" @@ -459,35 +460,67 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } -static void pseries_setup_rfi_flush(void) +static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) +{ + if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + +void pseries_setup_rfi_flush(void) { struct h_cpu_char_result result; enum l1d_flush_type types; bool enable; long rc; - /* Enable by default */ - enable = true; - rc = plpar_get_cpu_characteristics(&result); - if (rc == H_SUCCESS) { - types = L1D_FLUSH_NONE; + if (rc == H_SUCCESS) + init_cpu_char_feature_flags(&result); + + /* + * We're the guest so this doesn't apply to us, clear it to simplify + * handling of it elsewhere. + */ + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); - if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) - types |= L1D_FLUSH_MTTRIG; - if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) - types |= L1D_FLUSH_ORI; + types = L1D_FLUSH_FALLBACK; - /* Use fallback if nothing set in hcall */ - if (types == L1D_FLUSH_NONE) - types = L1D_FLUSH_FALLBACK; + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + types |= L1D_FLUSH_MTTRIG; - if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) - enable = false; - } else { - /* Default to fallback if case hcall is not available */ - types = L1D_FLUSH_FALLBACK; - } + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) + types |= L1D_FLUSH_ORI; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); } @@ -738,7 +771,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) /* PAPR says we can't set HYP */ dawrx &= ~DAWRX_HYP; - return plapr_set_watchpoint0(dawr, dawrx); + return plpar_set_watchpoint0(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index d506bf6..3df4612 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu) hwcpu = get_hard_smp_processor_id(cpu); } - if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS) + if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS) return 1; return 0; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 40c0611..34590150 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -246,7 +246,7 @@ notrace void xmon_xive_do_dump(int cpu) u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi, val & XIVE_ESB_VAL_P ? 'P' : 'p', - val & XIVE_ESB_VAL_P ? 'Q' : 'q'); + val & XIVE_ESB_VAL_Q ? 'Q' : 'q'); } #endif } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c93..091f1d0 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, + target, prio); rc = -EIO; goto fail; } @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, /* Configure and enable the queue in HW */ rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, + target, prio); rc = -EIO; } else { q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc, if (IS_ERR(qpage)) return PTR_ERR(qpage); - return xive_spapr_configure_queue(cpu, q, prio, qpage, - xive_queue_shift); + return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), + q, prio, qpage, xive_queue_shift); } static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, struct xive_q *q = &xc->queue[prio]; unsigned int alloc_order; long rc; + int hw_cpu = get_hard_smp_processor_id(cpu); - rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); + rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0); if (rc) - pr_err("Error %ld setting queue for prio %d\n", rc, prio); + pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, + hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); free_pages((unsigned long)q->qpage, alloc_order); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b6574b6..a0842f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -41,6 +41,7 @@ #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> +#include <asm/plpar_wrappers.h> #include <asm/cputable.h> #include <asm/rtas.h> #include <asm/sstep.h> @@ -61,12 +62,6 @@ #include <asm/paca.h> #endif -#if defined(CONFIG_PPC_SPLPAR) -#include <asm/plpar_wrappers.h> -#else -static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; -#endif - #include "nonstdio.h" #include "dis-asm.h" @@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr) mtspr(SPRN_CIABR, ciabr); return; } - plapr_set_ciabr(ciabr); + plpar_set_ciabr(ciabr); } /** @@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr) return 1; } +/* Force enable xmon if not already enabled */ +static inline void force_enable_xmon(void) +{ + /* Enable xmon hooks if needed */ + if (!xmon_on) { + printf("xmon: Enabling debugger hooks\n"); + xmon_on = 1; + } +} + static char *breakpoint_help_string = "Breakpoint command usage:\n" "b show breakpoints\n" @@ -1297,6 +1302,10 @@ bpt_cmds(void) static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ + if (!ppc_breakpoint_available()) { + printf("Hardware data breakpoint not supported on this cpu\n"); + break; + } mode = 7; cmd = inchar(); if (cmd == 'r') @@ -1315,6 +1324,8 @@ bpt_cmds(void) dabr.address &= ~HW_BRK_TYPE_DABR; dabr.enabled = mode | BP_DABR; } + + force_enable_xmon(); break; case 'i': /* bi - hardware instr breakpoint */ @@ -1335,6 +1346,7 @@ bpt_cmds(void) if (bp != NULL) { bp->enabled |= BP_CIABR; iabr = bp; + force_enable_xmon(); } break; #endif @@ -1399,8 +1411,10 @@ bpt_cmds(void) if (!check_bp_loc(a)) break; bp = new_breakpoint(a); - if (bp != NULL) + if (bp != NULL) { bp->enabled |= BP_TRAP; + force_enable_xmon(); + } break; } } @@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq); #endif /* CONFIG_MAGIC_SYSRQ */ #ifdef CONFIG_DEBUG_FS +static void clear_all_bpt(void) +{ + int i; + + /* clear/unpatch all breakpoints */ + remove_bpts(); + remove_cpu_bpts(); + + /* Disable all breakpoints */ + for (i = 0; i < NBPTS; ++i) + bpts[i].enabled = 0; + + /* Clear any data or iabr breakpoints */ + if (iabr || dabr.enabled) { + iabr = NULL; + dabr.enabled = 0; + } + + printf("xmon: All breakpoints cleared\n"); +} + static int xmon_dbgfs_set(void *data, u64 val) { xmon_on = !!val; xmon_init(xmon_on); + /* make sure all breakpoints removed when disabling */ + if (!xmon_on) + clear_all_bpt(); return 0; } |