diff options
Diffstat (limited to 'arch')
766 files changed, 16569 insertions, 13888 deletions
diff --git a/arch/alpha/include/asm/gpio.h b/arch/alpha/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/alpha/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index 98f2eee..a06c24b 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -7,7 +7,6 @@ #include <linux/dma-mapping.h> #include <linux/scatterlist.h> #include <asm/machvec.h> -#include <asm-generic/pci-bridge.h> /* * The following structure is used to manage multiple PCI busses. @@ -66,13 +65,6 @@ extern void pcibios_set_master(struct pci_dev *dev); decisions. */ #define PCI_DMA_BUS_IS_PHYS 0 -#ifdef CONFIG_PCI - -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - -#endif - /* TODO: integrate with include/asm-generic/pci.h ? */ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { diff --git a/arch/alpha/include/asm/serial.h b/arch/alpha/include/asm/serial.h index 22909b8..e31557f 100644 --- a/arch/alpha/include/asm/serial.h +++ b/arch/alpha/include/asm/serial.h @@ -14,11 +14,11 @@ /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_8250_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ) #else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) -#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST) +#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF #endif #define SERIAL_PORT_DFNS \ diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 2f24447f..46bf263 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -168,7 +168,7 @@ smp_callin(void) cpuid, current, current->active_mm)); preempt_disable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8a188bc..07a5cb9 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -17,6 +17,7 @@ config ARC select GENERIC_FIND_FIRST_BIT # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP select GENERIC_IRQ_SHOW + select GENERIC_PCI_IOMAP select GENERIC_PENDING_IRQ if SMP select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB @@ -37,6 +38,9 @@ config ARC select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW +config MIGHT_HAVE_PCI + bool + config TRACE_IRQFLAGS_SUPPORT def_bool y @@ -569,6 +573,28 @@ config FORCE_MAX_ZONEORDER source "net/Kconfig" source "drivers/Kconfig" + +menu "Bus Support" + +config PCI + bool "PCI support" if MIGHT_HAVE_PCI + help + PCI is the name of a bus system, i.e., the way the CPU talks to + the other stuff inside your box. Find out if your board/platform + has PCI. + + Note: PCIe support for Synopsys Device will be available only + when HAPS DX is configured with PCIe RC bitmap. If you have PCI, + say Y, otherwise N. + +config PCI_SYSCALL + def_bool PCI + +source "drivers/pci/Kconfig" +source "drivers/pci/pcie/Kconfig" + +endmenu + source "fs/Kconfig" source "arch/arc/Kconfig.debug" source "security/Kconfig" diff --git a/arch/arc/include/asm/dma.h b/arch/arc/include/asm/dma.h index ca7c451..01e47a6 100644 --- a/arch/arc/include/asm/dma.h +++ b/arch/arc/include/asm/dma.h @@ -10,5 +10,10 @@ #define ASM_ARC_DMA_H #define MAX_DMA_ADDRESS 0xC0000000 +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy 0 +#endif #endif diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index c5094de..7afe335 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -30,19 +30,16 @@ static inline pmd_t pte_pmd(pte_t pte) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) -#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) -#define pmd_special(pmd) pte_special(pmd_pte(pmd)) #define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot)) #define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ) -#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd)) #define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 694ece8..947bf0c 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -16,6 +16,15 @@ extern void __iomem *ioremap(unsigned long physaddr, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags); +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *)port; +} + +static inline void ioport_unmap(void __iomem *addr) +{ +} + extern void iounmap(const void __iomem *addr); #define ioremap_nocache(phy, sz) ioremap(phy, sz) diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h new file mode 100644 index 0000000..ba56c23 --- /dev/null +++ b/arch/arc/include/asm/pci.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARC_PCI_H +#define _ASM_ARC_PCI_H + +#ifdef __KERNEL__ +#include <linux/ioport.h> + +#define PCIBIOS_MIN_IO 0x100 +#define PCIBIOS_MIN_MEM 0x100000 + +#define pcibios_assign_all_busses() 1 +/* + * The PCI address space does equal the physical memory address space. + * The networking and block device layers use this boolean for bounce + * buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS 1 + +#endif /* __KERNEL__ */ + +#endif /* _ASM_ARC_PCI_H */ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index e7f3625..1bc2036 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -12,6 +12,7 @@ obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o +obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/kernel/pcibios.c b/arch/arc/kernel/pcibios.c new file mode 100644 index 0000000..72e1d73 --- /dev/null +++ b/arch/arc/kernel/pcibios.c @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/pci.h> + +/* + * We don't have to worry about legacy ISA devices, so nothing to do here + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} + +void pcibios_fixup_bus(struct pci_bus *bus) +{ +} diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 424e937..4cb3add 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -142,7 +142,7 @@ void start_kernel_secondary(void) local_irq_enable(); preempt_disable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig index d475f9d..426ac4b 100644 --- a/arch/arc/plat-axs10x/Kconfig +++ b/arch/arc/plat-axs10x/Kconfig @@ -11,6 +11,7 @@ menuconfig ARC_PLAT_AXS10X select DW_APB_ICTL select GPIO_DWAPB select OF_GPIO + select MIGHT_HAVE_PCI select GENERIC_IRQ_CHIP select ARCH_REQUIRE_GPIOLIB help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4f799e5..1d00da1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1212,7 +1212,6 @@ config PCI_HOST_ITE8152 select DMABOUNCE source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" source "drivers/pcmcia/Kconfig" diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index a0986c6..592e65c 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -562,8 +562,7 @@ extcon_usb2: tps659038_usb { compatible = "ti,palmas-usb-vid"; ti,enable-vbus-detection; - ti,enable-id-detection; - id-gpios = <&gpio7 24 GPIO_ACTIVE_HIGH>; + vbus-gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts index 23fc670..5c21b23 100644 --- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts +++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index 3065730..cca3665 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -76,8 +76,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000 + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000 MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>; devbus-bootcs { diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index a1ded01..061f423 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -95,8 +95,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000 + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000 MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>; devbus-bootcs { diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts index fb9e1bb..8af463f 100644 --- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts +++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts @@ -65,8 +65,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts index 6e9820e..b89e6cf 100644 --- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts +++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts index 6ab3383..6522b04 100644 --- a/arch/arm/boot/dts/armada-xp-matrix.dts +++ b/arch/arm/boot/dts/armada-xp-matrix.dts @@ -68,8 +68,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; internal-regs { serial@12000 { diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts index 62175a8..d19f44c 100644 --- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts +++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts @@ -64,8 +64,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index 3aa29a9..ed3b889d 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -65,9 +65,9 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000 + MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000 + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000 MBUS_ID(0x0c, 0x04) 0 0 0xd1200000 0x100000>; devbus-bootcs { diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts index 2391b11..d17dab0 100644 --- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts +++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts @@ -78,8 +78,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index c4d9175..f82aa44 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1500,6 +1500,16 @@ 0x48485200 0x2E00>; #address-cells = <1>; #size-cells = <1>; + + /* + * Do not allow gating of cpsw clock as workaround + * for errata i877. Keeping internal clock disabled + * causes the device switching characteristics + * to degrade over time and eventually fail to meet + * the data manual delay time/skew specs. + */ + ti,no-idle; + /* * rx_thresh_pend * rx_pend diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index 1c6c075..302d116 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -569,7 +569,7 @@ }; }; - iio_hwmon { + iio-hwmon { compatible = "iio-hwmon"; io-channels = <&lradc 8>; }; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index fae7b90..f637ec9 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -1256,7 +1256,7 @@ }; }; - iio_hwmon { + iio-hwmon { compatible = "iio-hwmon"; io-channels = <&lradc 8>; }; diff --git a/arch/arm/boot/dts/mt2701-pinfunc.h b/arch/arm/boot/dts/mt2701-pinfunc.h new file mode 100644 index 0000000..e24ebc8 --- /dev/null +++ b/arch/arm/boot/dts/mt2701-pinfunc.h @@ -0,0 +1,735 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Biao Huang <biao.huang@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DTS_MT2701_PINFUNC_H +#define __DTS_MT2701_PINFUNC_H + +#include <dt-bindings/pinctrl/mt65xx.h> + +#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_GPIO0 (MTK_PIN_NO(0) | 0) +#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDO (MTK_PIN_NO(0) | 1) +#define MT2701_PIN_0_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDI (MTK_PIN_NO(0) | 2) + +#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_GPIO1 (MTK_PIN_NO(1) | 0) +#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDI (MTK_PIN_NO(1) | 1) +#define MT2701_PIN_1_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDO (MTK_PIN_NO(1) | 2) + +#define MT2701_PIN_2_PWRAP_INT__FUNC_GPIO2 (MTK_PIN_NO(2) | 0) +#define MT2701_PIN_2_PWRAP_INT__FUNC_PWRAP_INT (MTK_PIN_NO(2) | 1) + +#define MT2701_PIN_3_PWRAP_SPI0_CK__FUNC_GPIO3 (MTK_PIN_NO(3) | 0) +#define MT2701_PIN_3_PWRAP_SPI0_CK__FUNC_PWRAP_SPICK_I (MTK_PIN_NO(3) | 1) + +#define MT2701_PIN_4_PWRAP_SPI0_CSN__FUNC_GPIO4 (MTK_PIN_NO(4) | 0) +#define MT2701_PIN_4_PWRAP_SPI0_CSN__FUNC_PWRAP_SPICS_B_I (MTK_PIN_NO(4) | 1) + +#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_GPIO5 (MTK_PIN_NO(5) | 0) +#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_PWRAP_SPICK2_I (MTK_PIN_NO(5) | 1) +#define MT2701_PIN_5_PWRAP_SPI0_CK2__FUNC_ANT_SEL1 (MTK_PIN_NO(5) | 5) + +#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_GPIO6 (MTK_PIN_NO(6) | 0) +#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_PWRAP_SPICS2_B_I (MTK_PIN_NO(6) | 1) +#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_ANT_SEL0 (MTK_PIN_NO(6) | 5) +#define MT2701_PIN_6_PWRAP_SPI0_CSN2__FUNC_DBG_MON_A_0 (MTK_PIN_NO(6) | 7) + +#define MT2701_PIN_7_SPI1_CSN__FUNC_GPIO7 (MTK_PIN_NO(7) | 0) +#define MT2701_PIN_7_SPI1_CSN__FUNC_SPI1_CS (MTK_PIN_NO(7) | 1) +#define MT2701_PIN_7_SPI1_CSN__FUNC_KCOL0 (MTK_PIN_NO(7) | 4) +#define MT2701_PIN_7_SPI1_CSN__FUNC_DBG_MON_B_12 (MTK_PIN_NO(7) | 7) + +#define MT2701_PIN_8_SPI1_MI__FUNC_GPIO8 (MTK_PIN_NO(8) | 0) +#define MT2701_PIN_8_SPI1_MI__FUNC_SPI1_MI (MTK_PIN_NO(8) | 1) +#define MT2701_PIN_8_SPI1_MI__FUNC_SPI1_MO (MTK_PIN_NO(8) | 2) +#define MT2701_PIN_8_SPI1_MI__FUNC_KCOL1 (MTK_PIN_NO(8) | 4) +#define MT2701_PIN_8_SPI1_MI__FUNC_DBG_MON_B_13 (MTK_PIN_NO(8) | 7) + +#define MT2701_PIN_9_SPI1_MO__FUNC_GPIO9 (MTK_PIN_NO(9) | 0) +#define MT2701_PIN_9_SPI1_MO__FUNC_SPI1_MO (MTK_PIN_NO(9) | 1) +#define MT2701_PIN_9_SPI1_MO__FUNC_SPI1_MI (MTK_PIN_NO(9) | 2) +#define MT2701_PIN_9_SPI1_MO__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(9) | 3) +#define MT2701_PIN_9_SPI1_MO__FUNC_KCOL2 (MTK_PIN_NO(9) | 4) +#define MT2701_PIN_9_SPI1_MO__FUNC_DBG_MON_B_14 (MTK_PIN_NO(9) | 7) + +#define MT2701_PIN_10_RTC32K_CK__FUNC_GPIO10 (MTK_PIN_NO(10) | 0) +#define MT2701_PIN_10_RTC32K_CK__FUNC_RTC32K_CK (MTK_PIN_NO(10) | 1) + +#define MT2701_PIN_11_WATCHDOG__FUNC_GPIO11 (MTK_PIN_NO(11) | 0) +#define MT2701_PIN_11_WATCHDOG__FUNC_WATCHDOG (MTK_PIN_NO(11) | 1) + +#define MT2701_PIN_12_SRCLKENA__FUNC_GPIO12 (MTK_PIN_NO(12) | 0) +#define MT2701_PIN_12_SRCLKENA__FUNC_SRCLKENA (MTK_PIN_NO(12) | 1) + +#define MT2701_PIN_13_SRCLKENAI__FUNC_GPIO13 (MTK_PIN_NO(13) | 0) +#define MT2701_PIN_13_SRCLKENAI__FUNC_SRCLKENAI (MTK_PIN_NO(13) | 1) + +#define MT2701_PIN_14_URXD2__FUNC_GPIO14 (MTK_PIN_NO(14) | 0) +#define MT2701_PIN_14_URXD2__FUNC_URXD2 (MTK_PIN_NO(14) | 1) +#define MT2701_PIN_14_URXD2__FUNC_UTXD2 (MTK_PIN_NO(14) | 2) +#define MT2701_PIN_14_URXD2__FUNC_SRCCLKENAI2 (MTK_PIN_NO(14) | 5) +#define MT2701_PIN_14_URXD2__FUNC_DBG_MON_B_30 (MTK_PIN_NO(14) | 7) + +#define MT2701_PIN_15_UTXD2__FUNC_GPIO15 (MTK_PIN_NO(15) | 0) +#define MT2701_PIN_15_UTXD2__FUNC_UTXD2 (MTK_PIN_NO(15) | 1) +#define MT2701_PIN_15_UTXD2__FUNC_URXD2 (MTK_PIN_NO(15) | 2) +#define MT2701_PIN_15_UTXD2__FUNC_DBG_MON_B_31 (MTK_PIN_NO(15) | 7) + +#define MT2701_PIN_18_PCM_CLK__FUNC_GPIO18 (MTK_PIN_NO(18) | 0) +#define MT2701_PIN_18_PCM_CLK__FUNC_PCM_CLK0 (MTK_PIN_NO(18) | 1) +#define MT2701_PIN_18_PCM_CLK__FUNC_MRG_CLK (MTK_PIN_NO(18) | 2) +#define MT2701_PIN_18_PCM_CLK__FUNC_MM_TEST_CK (MTK_PIN_NO(18) | 4) +#define MT2701_PIN_18_PCM_CLK__FUNC_CONN_DSP_JCK (MTK_PIN_NO(18) | 5) +#define MT2701_PIN_18_PCM_CLK__FUNC_WCN_PCM_CLKO (MTK_PIN_NO(18) | 6) +#define MT2701_PIN_18_PCM_CLK__FUNC_DBG_MON_A_3 (MTK_PIN_NO(18) | 7) + +#define MT2701_PIN_19_PCM_SYNC__FUNC_GPIO19 (MTK_PIN_NO(19) | 0) +#define MT2701_PIN_19_PCM_SYNC__FUNC_PCM_SYNC (MTK_PIN_NO(19) | 1) +#define MT2701_PIN_19_PCM_SYNC__FUNC_MRG_SYNC (MTK_PIN_NO(19) | 2) +#define MT2701_PIN_19_PCM_SYNC__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(19) | 5) +#define MT2701_PIN_19_PCM_SYNC__FUNC_WCN_PCM_SYNC (MTK_PIN_NO(19) | 6) +#define MT2701_PIN_19_PCM_SYNC__FUNC_DBG_MON_A_5 (MTK_PIN_NO(19) | 7) + +#define MT2701_PIN_20_PCM_RX__FUNC_GPIO20 (MTK_PIN_NO(20) | 0) +#define MT2701_PIN_20_PCM_RX__FUNC_PCM_RX (MTK_PIN_NO(20) | 1) +#define MT2701_PIN_20_PCM_RX__FUNC_MRG_RX (MTK_PIN_NO(20) | 2) +#define MT2701_PIN_20_PCM_RX__FUNC_MRG_TX (MTK_PIN_NO(20) | 3) +#define MT2701_PIN_20_PCM_RX__FUNC_PCM_TX (MTK_PIN_NO(20) | 4) +#define MT2701_PIN_20_PCM_RX__FUNC_CONN_DSP_JDI (MTK_PIN_NO(20) | 5) +#define MT2701_PIN_20_PCM_RX__FUNC_WCN_PCM_RX (MTK_PIN_NO(20) | 6) +#define MT2701_PIN_20_PCM_RX__FUNC_DBG_MON_A_4 (MTK_PIN_NO(20) | 7) + +#define MT2701_PIN_21_PCM_TX__FUNC_GPIO21 (MTK_PIN_NO(21) | 0) +#define MT2701_PIN_21_PCM_TX__FUNC_PCM_TX (MTK_PIN_NO(21) | 1) +#define MT2701_PIN_21_PCM_TX__FUNC_MRG_TX (MTK_PIN_NO(21) | 2) +#define MT2701_PIN_21_PCM_TX__FUNC_MRG_RX (MTK_PIN_NO(21) | 3) +#define MT2701_PIN_21_PCM_TX__FUNC_PCM_RX (MTK_PIN_NO(21) | 4) +#define MT2701_PIN_21_PCM_TX__FUNC_CONN_DSP_JMS (MTK_PIN_NO(21) | 5) +#define MT2701_PIN_21_PCM_TX__FUNC_WCN_PCM_TX (MTK_PIN_NO(21) | 6) +#define MT2701_PIN_21_PCM_TX__FUNC_DBG_MON_A_2 (MTK_PIN_NO(21) | 7) + +#define MT2701_PIN_22_EINT0__FUNC_GPIO22 (MTK_PIN_NO(22) | 0) +#define MT2701_PIN_22_EINT0__FUNC_UCTS0 (MTK_PIN_NO(22) | 1) +#define MT2701_PIN_22_EINT0__FUNC_KCOL3 (MTK_PIN_NO(22) | 3) +#define MT2701_PIN_22_EINT0__FUNC_CONN_DSP_JDO (MTK_PIN_NO(22) | 4) +#define MT2701_PIN_22_EINT0__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(22) | 5) +#define MT2701_PIN_22_EINT0__FUNC_DBG_MON_A_30 (MTK_PIN_NO(22) | 7) +#define MT2701_PIN_22_EINT0__FUNC_PCIE0_PERST_N (MTK_PIN_NO(22) | 10) + +#define MT2701_PIN_23_EINT1__FUNC_GPIO23 (MTK_PIN_NO(23) | 0) +#define MT2701_PIN_23_EINT1__FUNC_URTS0 (MTK_PIN_NO(23) | 1) +#define MT2701_PIN_23_EINT1__FUNC_KCOL2 (MTK_PIN_NO(23) | 3) +#define MT2701_PIN_23_EINT1__FUNC_CONN_MCU_TDO (MTK_PIN_NO(23) | 4) +#define MT2701_PIN_23_EINT1__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(23) | 5) +#define MT2701_PIN_23_EINT1__FUNC_DBG_MON_A_29 (MTK_PIN_NO(23) | 7) +#define MT2701_PIN_23_EINT1__FUNC_PCIE1_PERST_N (MTK_PIN_NO(23) | 10) + +#define MT2701_PIN_24_EINT2__FUNC_GPIO24 (MTK_PIN_NO(24) | 0) +#define MT2701_PIN_24_EINT2__FUNC_UCTS1 (MTK_PIN_NO(24) | 1) +#define MT2701_PIN_24_EINT2__FUNC_KCOL1 (MTK_PIN_NO(24) | 3) +#define MT2701_PIN_24_EINT2__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(24) | 4) +#define MT2701_PIN_24_EINT2__FUNC_DBG_MON_A_28 (MTK_PIN_NO(24) | 7) +#define MT2701_PIN_24_EINT2__FUNC_PCIE2_PERST_N (MTK_PIN_NO(24) | 10) + +#define MT2701_PIN_25_EINT3__FUNC_GPIO25 (MTK_PIN_NO(25) | 0) +#define MT2701_PIN_25_EINT3__FUNC_URTS1 (MTK_PIN_NO(25) | 1) +#define MT2701_PIN_25_EINT3__FUNC_KCOL0 (MTK_PIN_NO(25) | 3) +#define MT2701_PIN_25_EINT3__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(25) | 4) +#define MT2701_PIN_25_EINT3__FUNC_DBG_MON_A_27 (MTK_PIN_NO(25) | 7) + +#define MT2701_PIN_26_EINT4__FUNC_GPIO26 (MTK_PIN_NO(26) | 0) +#define MT2701_PIN_26_EINT4__FUNC_UCTS3 (MTK_PIN_NO(26) | 1) +#define MT2701_PIN_26_EINT4__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(26) | 2) +#define MT2701_PIN_26_EINT4__FUNC_KROW3 (MTK_PIN_NO(26) | 3) +#define MT2701_PIN_26_EINT4__FUNC_CONN_MCU_TCK0 (MTK_PIN_NO(26) | 4) +#define MT2701_PIN_26_EINT4__FUNC_CONN_MCU_AICE_JCKC (MTK_PIN_NO(26) | 5) +#define MT2701_PIN_26_EINT4__FUNC_PCIE2_WAKE_N (MTK_PIN_NO(26) | 6) +#define MT2701_PIN_26_EINT4__FUNC_DBG_MON_A_26 (MTK_PIN_NO(26) | 7) + +#define MT2701_PIN_27_EINT5__FUNC_GPIO27 (MTK_PIN_NO(27) | 0) +#define MT2701_PIN_27_EINT5__FUNC_URTS3 (MTK_PIN_NO(27) | 1) +#define MT2701_PIN_27_EINT5__FUNC_IDDIG_P1 (MTK_PIN_NO(27) | 2) +#define MT2701_PIN_27_EINT5__FUNC_KROW2 (MTK_PIN_NO(27) | 3) +#define MT2701_PIN_27_EINT5__FUNC_CONN_MCU_TDI (MTK_PIN_NO(27) | 4) +#define MT2701_PIN_27_EINT5__FUNC_PCIE1_WAKE_N (MTK_PIN_NO(27) | 6) +#define MT2701_PIN_27_EINT5__FUNC_DBG_MON_A_25 (MTK_PIN_NO(27) | 7) + +#define MT2701_PIN_28_EINT6__FUNC_GPIO28 (MTK_PIN_NO(28) | 0) +#define MT2701_PIN_28_EINT6__FUNC_DRV_VBUS (MTK_PIN_NO(28) | 1) +#define MT2701_PIN_28_EINT6__FUNC_KROW1 (MTK_PIN_NO(28) | 3) +#define MT2701_PIN_28_EINT6__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(28) | 4) +#define MT2701_PIN_28_EINT6__FUNC_PCIE0_WAKE_N (MTK_PIN_NO(28) | 6) +#define MT2701_PIN_28_EINT6__FUNC_DBG_MON_A_24 (MTK_PIN_NO(28) | 7) + +#define MT2701_PIN_29_EINT7__FUNC_GPIO29 (MTK_PIN_NO(29) | 0) +#define MT2701_PIN_29_EINT7__FUNC_IDDIG (MTK_PIN_NO(29) | 1) +#define MT2701_PIN_29_EINT7__FUNC_MSDC1_WP (MTK_PIN_NO(29) | 2) +#define MT2701_PIN_29_EINT7__FUNC_KROW0 (MTK_PIN_NO(29) | 3) +#define MT2701_PIN_29_EINT7__FUNC_CONN_MCU_TMS (MTK_PIN_NO(29) | 4) +#define MT2701_PIN_29_EINT7__FUNC_CONN_MCU_AICE_JMSC (MTK_PIN_NO(29) | 5) +#define MT2701_PIN_29_EINT7__FUNC_DBG_MON_A_23 (MTK_PIN_NO(29) | 7) +#define MT2701_PIN_29_EINT7__FUNC_PCIE2_PERST_N (MTK_PIN_NO(29) | 14) + +#define MT2701_PIN_33_I2S1_DATA__FUNC_GPIO33 (MTK_PIN_NO(33) | 0) +#define MT2701_PIN_33_I2S1_DATA__FUNC_I2S1_DATA (MTK_PIN_NO(33) | 1) +#define MT2701_PIN_33_I2S1_DATA__FUNC_I2S1_DATA_BYPS (MTK_PIN_NO(33) | 2) +#define MT2701_PIN_33_I2S1_DATA__FUNC_PCM_TX (MTK_PIN_NO(33) | 3) +#define MT2701_PIN_33_I2S1_DATA__FUNC_IMG_TEST_CK (MTK_PIN_NO(33) | 4) +#define MT2701_PIN_33_I2S1_DATA__FUNC_G1_RXD0 (MTK_PIN_NO(33) | 5) +#define MT2701_PIN_33_I2S1_DATA__FUNC_WCN_PCM_TX (MTK_PIN_NO(33) | 6) +#define MT2701_PIN_33_I2S1_DATA__FUNC_DBG_MON_B_8 (MTK_PIN_NO(33) | 7) + +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_GPIO34 (MTK_PIN_NO(34) | 0) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_I2S1_DATA_IN (MTK_PIN_NO(34) | 1) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_PCM_RX (MTK_PIN_NO(34) | 3) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_VDEC_TEST_CK (MTK_PIN_NO(34) | 4) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_G1_RXD1 (MTK_PIN_NO(34) | 5) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_WCN_PCM_RX (MTK_PIN_NO(34) | 6) +#define MT2701_PIN_34_I2S1_DATA_IN__FUNC_DBG_MON_B_7 (MTK_PIN_NO(34) | 7) + +#define MT2701_PIN_35_I2S1_BCK__FUNC_GPIO35 (MTK_PIN_NO(35) | 0) +#define MT2701_PIN_35_I2S1_BCK__FUNC_I2S1_BCK (MTK_PIN_NO(35) | 1) +#define MT2701_PIN_35_I2S1_BCK__FUNC_PCM_CLK0 (MTK_PIN_NO(35) | 3) +#define MT2701_PIN_35_I2S1_BCK__FUNC_G1_RXD2 (MTK_PIN_NO(35) | 5) +#define MT2701_PIN_35_I2S1_BCK__FUNC_WCN_PCM_CLKO (MTK_PIN_NO(35) | 6) +#define MT2701_PIN_35_I2S1_BCK__FUNC_DBG_MON_B_9 (MTK_PIN_NO(35) | 7) + +#define MT2701_PIN_36_I2S1_LRCK__FUNC_GPIO36 (MTK_PIN_NO(36) | 0) +#define MT2701_PIN_36_I2S1_LRCK__FUNC_I2S1_LRCK (MTK_PIN_NO(36) | 1) +#define MT2701_PIN_36_I2S1_LRCK__FUNC_PCM_SYNC (MTK_PIN_NO(36) | 3) +#define MT2701_PIN_36_I2S1_LRCK__FUNC_G1_RXD3 (MTK_PIN_NO(36) | 5) +#define MT2701_PIN_36_I2S1_LRCK__FUNC_WCN_PCM_SYNC (MTK_PIN_NO(36) | 6) +#define MT2701_PIN_36_I2S1_LRCK__FUNC_DBG_MON_B_10 (MTK_PIN_NO(36) | 7) + +#define MT2701_PIN_37_I2S1_MCLK__FUNC_GPIO37 (MTK_PIN_NO(37) | 0) +#define MT2701_PIN_37_I2S1_MCLK__FUNC_I2S1_MCLK (MTK_PIN_NO(37) | 1) +#define MT2701_PIN_37_I2S1_MCLK__FUNC_G1_RXDV (MTK_PIN_NO(37) | 5) +#define MT2701_PIN_37_I2S1_MCLK__FUNC_DBG_MON_B_11 (MTK_PIN_NO(37) | 7) + +#define MT2701_PIN_39_JTMS__FUNC_GPIO39 (MTK_PIN_NO(39) | 0) +#define MT2701_PIN_39_JTMS__FUNC_JTMS (MTK_PIN_NO(39) | 1) +#define MT2701_PIN_39_JTMS__FUNC_CONN_MCU_TMS (MTK_PIN_NO(39) | 2) +#define MT2701_PIN_39_JTMS__FUNC_CONN_MCU_AICE_JMSC (MTK_PIN_NO(39) | 3) +#define MT2701_PIN_39_JTMS__FUNC_DFD_TMS_XI (MTK_PIN_NO(39) | 4) + +#define MT2701_PIN_40_JTCK__FUNC_GPIO40 (MTK_PIN_NO(40) | 0) +#define MT2701_PIN_40_JTCK__FUNC_JTCK (MTK_PIN_NO(40) | 1) +#define MT2701_PIN_40_JTCK__FUNC_CONN_MCU_TCK1 (MTK_PIN_NO(40) | 2) +#define MT2701_PIN_40_JTCK__FUNC_CONN_MCU_AICE_JCKC (MTK_PIN_NO(40) | 3) +#define MT2701_PIN_40_JTCK__FUNC_DFD_TCK_XI (MTK_PIN_NO(40) | 4) + +#define MT2701_PIN_41_JTDI__FUNC_GPIO41 (MTK_PIN_NO(41) | 0) +#define MT2701_PIN_41_JTDI__FUNC_JTDI (MTK_PIN_NO(41) | 1) +#define MT2701_PIN_41_JTDI__FUNC_CONN_MCU_TDI (MTK_PIN_NO(41) | 2) +#define MT2701_PIN_41_JTDI__FUNC_DFD_TDI_XI (MTK_PIN_NO(41) | 4) + +#define MT2701_PIN_42_JTDO__FUNC_GPIO42 (MTK_PIN_NO(42) | 0) +#define MT2701_PIN_42_JTDO__FUNC_JTDO (MTK_PIN_NO(42) | 1) +#define MT2701_PIN_42_JTDO__FUNC_CONN_MCU_TDO (MTK_PIN_NO(42) | 2) +#define MT2701_PIN_42_JTDO__FUNC_DFD_TDO (MTK_PIN_NO(42) | 4) + +#define MT2701_PIN_43_NCLE__FUNC_GPIO43 (MTK_PIN_NO(43) | 0) +#define MT2701_PIN_43_NCLE__FUNC_NCLE (MTK_PIN_NO(43) | 1) +#define MT2701_PIN_43_NCLE__FUNC_EXT_XCS2 (MTK_PIN_NO(43) | 2) + +#define MT2701_PIN_44_NCEB1__FUNC_GPIO44 (MTK_PIN_NO(44) | 0) +#define MT2701_PIN_44_NCEB1__FUNC_NCEB1 (MTK_PIN_NO(44) | 1) +#define MT2701_PIN_44_NCEB1__FUNC_IDDIG (MTK_PIN_NO(44) | 2) + +#define MT2701_PIN_45_NCEB0__FUNC_GPIO45 (MTK_PIN_NO(45) | 0) +#define MT2701_PIN_45_NCEB0__FUNC_NCEB0 (MTK_PIN_NO(45) | 1) +#define MT2701_PIN_45_NCEB0__FUNC_DRV_VBUS (MTK_PIN_NO(45) | 2) + +#define MT2701_PIN_46_IR__FUNC_GPIO46 (MTK_PIN_NO(46) | 0) +#define MT2701_PIN_46_IR__FUNC_IR (MTK_PIN_NO(46) | 1) + +#define MT2701_PIN_47_NREB__FUNC_GPIO47 (MTK_PIN_NO(47) | 0) +#define MT2701_PIN_47_NREB__FUNC_NREB (MTK_PIN_NO(47) | 1) +#define MT2701_PIN_47_NREB__FUNC_IDDIG_P1 (MTK_PIN_NO(47) | 2) + +#define MT2701_PIN_48_NRNB__FUNC_GPIO48 (MTK_PIN_NO(48) | 0) +#define MT2701_PIN_48_NRNB__FUNC_NRNB (MTK_PIN_NO(48) | 1) +#define MT2701_PIN_48_NRNB__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(48) | 2) + +#define MT2701_PIN_49_I2S0_DATA__FUNC_GPIO49 (MTK_PIN_NO(49) | 0) +#define MT2701_PIN_49_I2S0_DATA__FUNC_I2S0_DATA (MTK_PIN_NO(49) | 1) +#define MT2701_PIN_49_I2S0_DATA__FUNC_I2S0_DATA_BYPS (MTK_PIN_NO(49) | 2) +#define MT2701_PIN_49_I2S0_DATA__FUNC_PCM_TX (MTK_PIN_NO(49) | 3) +#define MT2701_PIN_49_I2S0_DATA__FUNC_WCN_I2S_DO (MTK_PIN_NO(49) | 6) +#define MT2701_PIN_49_I2S0_DATA__FUNC_DBG_MON_B_3 (MTK_PIN_NO(49) | 7) + +#define MT2701_PIN_53_SPI0_CSN__FUNC_GPIO53 (MTK_PIN_NO(53) | 0) +#define MT2701_PIN_53_SPI0_CSN__FUNC_SPI0_CS (MTK_PIN_NO(53) | 1) +#define MT2701_PIN_53_SPI0_CSN__FUNC_SPDIF (MTK_PIN_NO(53) | 3) +#define MT2701_PIN_53_SPI0_CSN__FUNC_ADC_CK (MTK_PIN_NO(53) | 4) +#define MT2701_PIN_53_SPI0_CSN__FUNC_PWM1 (MTK_PIN_NO(53) | 5) +#define MT2701_PIN_53_SPI0_CSN__FUNC_DBG_MON_A_7 (MTK_PIN_NO(53) | 7) + +#define MT2701_PIN_54_SPI0_CK__FUNC_GPIO54 (MTK_PIN_NO(54) | 0) +#define MT2701_PIN_54_SPI0_CK__FUNC_SPI0_CK (MTK_PIN_NO(54) | 1) +#define MT2701_PIN_54_SPI0_CK__FUNC_SPDIF_IN1 (MTK_PIN_NO(54) | 3) +#define MT2701_PIN_54_SPI0_CK__FUNC_ADC_DAT_IN (MTK_PIN_NO(54) | 4) +#define MT2701_PIN_54_SPI0_CK__FUNC_DBG_MON_A_10 (MTK_PIN_NO(54) | 7) + +#define MT2701_PIN_55_SPI0_MI__FUNC_GPIO55 (MTK_PIN_NO(55) | 0) +#define MT2701_PIN_55_SPI0_MI__FUNC_SPI0_MI (MTK_PIN_NO(55) | 1) +#define MT2701_PIN_55_SPI0_MI__FUNC_SPI0_MO (MTK_PIN_NO(55) | 2) +#define MT2701_PIN_55_SPI0_MI__FUNC_MSDC1_WP (MTK_PIN_NO(55) | 3) +#define MT2701_PIN_55_SPI0_MI__FUNC_ADC_WS (MTK_PIN_NO(55) | 4) +#define MT2701_PIN_55_SPI0_MI__FUNC_PWM2 (MTK_PIN_NO(55) | 5) +#define MT2701_PIN_55_SPI0_MI__FUNC_DBG_MON_A_8 (MTK_PIN_NO(55) | 7) + +#define MT2701_PIN_56_SPI0_MO__FUNC_GPIO56 (MTK_PIN_NO(56) | 0) +#define MT2701_PIN_56_SPI0_MO__FUNC_SPI0_MO (MTK_PIN_NO(56) | 1) +#define MT2701_PIN_56_SPI0_MO__FUNC_SPI0_MI (MTK_PIN_NO(56) | 2) +#define MT2701_PIN_56_SPI0_MO__FUNC_SPDIF_IN0 (MTK_PIN_NO(56) | 3) +#define MT2701_PIN_56_SPI0_MO__FUNC_DBG_MON_A_9 (MTK_PIN_NO(56) | 7) + +#define MT2701_PIN_57_SDA1__FUNC_GPIO57 (MTK_PIN_NO(57) | 0) +#define MT2701_PIN_57_SDA1__FUNC_SDA1 (MTK_PIN_NO(57) | 1) + +#define MT2701_PIN_58_SCL1__FUNC_GPIO58 (MTK_PIN_NO(58) | 0) +#define MT2701_PIN_58_SCL1__FUNC_SCL1 (MTK_PIN_NO(58) | 1) + +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_GPIO72 (MTK_PIN_NO(72) | 0) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_I2S0_DATA_IN (MTK_PIN_NO(72) | 1) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_PCM_RX (MTK_PIN_NO(72) | 3) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_PWM0 (MTK_PIN_NO(72) | 4) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_DISP_PWM (MTK_PIN_NO(72) | 5) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_WCN_I2S_DI (MTK_PIN_NO(72) | 6) +#define MT2701_PIN_72_I2S0_DATA_IN__FUNC_DBG_MON_B_2 (MTK_PIN_NO(72) | 7) + +#define MT2701_PIN_73_I2S0_LRCK__FUNC_GPIO73 (MTK_PIN_NO(73) | 0) +#define MT2701_PIN_73_I2S0_LRCK__FUNC_I2S0_LRCK (MTK_PIN_NO(73) | 1) +#define MT2701_PIN_73_I2S0_LRCK__FUNC_PCM_SYNC (MTK_PIN_NO(73) | 3) +#define MT2701_PIN_73_I2S0_LRCK__FUNC_WCN_I2S_LRCK (MTK_PIN_NO(73) | 6) +#define MT2701_PIN_73_I2S0_LRCK__FUNC_DBG_MON_B_5 (MTK_PIN_NO(73) | 7) + +#define MT2701_PIN_74_I2S0_BCK__FUNC_GPIO74 (MTK_PIN_NO(74) | 0) +#define MT2701_PIN_74_I2S0_BCK__FUNC_I2S0_BCK (MTK_PIN_NO(74) | 1) +#define MT2701_PIN_74_I2S0_BCK__FUNC_PCM_CLK0 (MTK_PIN_NO(74) | 3) +#define MT2701_PIN_74_I2S0_BCK__FUNC_WCN_I2S_BCK (MTK_PIN_NO(74) | 6) +#define MT2701_PIN_74_I2S0_BCK__FUNC_DBG_MON_B_4 (MTK_PIN_NO(74) | 7) + +#define MT2701_PIN_75_SDA0__FUNC_GPIO75 (MTK_PIN_NO(75) | 0) +#define MT2701_PIN_75_SDA0__FUNC_SDA0 (MTK_PIN_NO(75) | 1) + +#define MT2701_PIN_76_SCL0__FUNC_GPIO76 (MTK_PIN_NO(76) | 0) +#define MT2701_PIN_76_SCL0__FUNC_SCL0 (MTK_PIN_NO(76) | 1) + +#define MT2701_PIN_77_SDA2__FUNC_GPIO77 (MTK_PIN_NO(77) | 0) +#define MT2701_PIN_77_SDA2__FUNC_SDA2 (MTK_PIN_NO(77) | 1) + +#define MT2701_PIN_78_SCL2__FUNC_GPIO78 (MTK_PIN_NO(78) | 0) +#define MT2701_PIN_78_SCL2__FUNC_SCL2 (MTK_PIN_NO(78) | 1) + +#define MT2701_PIN_79_URXD0__FUNC_GPIO79 (MTK_PIN_NO(79) | 0) +#define MT2701_PIN_79_URXD0__FUNC_URXD0 (MTK_PIN_NO(79) | 1) +#define MT2701_PIN_79_URXD0__FUNC_UTXD0 (MTK_PIN_NO(79) | 2) +#define MT2701_PIN_79_URXD0__FUNC_ (MTK_PIN_NO(79) | 5) + +#define MT2701_PIN_80_UTXD0__FUNC_GPIO80 (MTK_PIN_NO(80) | 0) +#define MT2701_PIN_80_UTXD0__FUNC_UTXD0 (MTK_PIN_NO(80) | 1) +#define MT2701_PIN_80_UTXD0__FUNC_URXD0 (MTK_PIN_NO(80) | 2) + +#define MT2701_PIN_81_URXD1__FUNC_GPIO81 (MTK_PIN_NO(81) | 0) +#define MT2701_PIN_81_URXD1__FUNC_URXD1 (MTK_PIN_NO(81) | 1) +#define MT2701_PIN_81_URXD1__FUNC_UTXD1 (MTK_PIN_NO(81) | 2) + +#define MT2701_PIN_82_UTXD1__FUNC_GPIO82 (MTK_PIN_NO(82) | 0) +#define MT2701_PIN_82_UTXD1__FUNC_UTXD1 (MTK_PIN_NO(82) | 1) +#define MT2701_PIN_82_UTXD1__FUNC_URXD1 (MTK_PIN_NO(82) | 2) + +#define MT2701_PIN_83_LCM_RST__FUNC_GPIO83 (MTK_PIN_NO(83) | 0) +#define MT2701_PIN_83_LCM_RST__FUNC_LCM_RST (MTK_PIN_NO(83) | 1) +#define MT2701_PIN_83_LCM_RST__FUNC_VDAC_CK_XI (MTK_PIN_NO(83) | 2) +#define MT2701_PIN_83_LCM_RST__FUNC_DBG_MON_B_1 (MTK_PIN_NO(83) | 7) + +#define MT2701_PIN_84_DSI_TE__FUNC_GPIO84 (MTK_PIN_NO(84) | 0) +#define MT2701_PIN_84_DSI_TE__FUNC_DSI_TE (MTK_PIN_NO(84) | 1) +#define MT2701_PIN_84_DSI_TE__FUNC_DBG_MON_B_0 (MTK_PIN_NO(84) | 7) + +#define MT2701_PIN_91_TDN3__FUNC_GPI91 (MTK_PIN_NO(91) | 0) +#define MT2701_PIN_91_TDN3__FUNC_TDN3 (MTK_PIN_NO(91) | 1) + +#define MT2701_PIN_92_TDP3__FUNC_GPI92 (MTK_PIN_NO(92) | 0) +#define MT2701_PIN_92_TDP3__FUNC_TDP3 (MTK_PIN_NO(92) | 1) + +#define MT2701_PIN_93_TDN2__FUNC_GPI93 (MTK_PIN_NO(93) | 0) +#define MT2701_PIN_93_TDN2__FUNC_TDN2 (MTK_PIN_NO(93) | 1) + +#define MT2701_PIN_94_TDP2__FUNC_GPI94 (MTK_PIN_NO(94) | 0) +#define MT2701_PIN_94_TDP2__FUNC_TDP2 (MTK_PIN_NO(94) | 1) + +#define MT2701_PIN_95_TCN__FUNC_GPI95 (MTK_PIN_NO(95) | 0) +#define MT2701_PIN_95_TCN__FUNC_TCN (MTK_PIN_NO(95) | 1) + +#define MT2701_PIN_96_TCP__FUNC_GPI96 (MTK_PIN_NO(96) | 0) +#define MT2701_PIN_96_TCP__FUNC_TCP (MTK_PIN_NO(96) | 1) + +#define MT2701_PIN_97_TDN1__FUNC_GPI97 (MTK_PIN_NO(97) | 0) +#define MT2701_PIN_97_TDN1__FUNC_TDN1 (MTK_PIN_NO(97) | 1) + +#define MT2701_PIN_98_TDP1__FUNC_GPI98 (MTK_PIN_NO(98) | 0) +#define MT2701_PIN_98_TDP1__FUNC_TDP1 (MTK_PIN_NO(98) | 1) + +#define MT2701_PIN_99_TDN0__FUNC_GPI99 (MTK_PIN_NO(99) | 0) +#define MT2701_PIN_99_TDN0__FUNC_TDN0 (MTK_PIN_NO(99) | 1) + +#define MT2701_PIN_100_TDP0__FUNC_GPI100 (MTK_PIN_NO(100) | 0) +#define MT2701_PIN_100_TDP0__FUNC_TDP0 (MTK_PIN_NO(100) | 1) + +#define MT2701_PIN_101_SPI2_CSN__FUNC_GPIO101 (MTK_PIN_NO(101) | 0) +#define MT2701_PIN_101_SPI2_CSN__FUNC_SPI2_CS (MTK_PIN_NO(101) | 1) +#define MT2701_PIN_101_SPI2_CSN__FUNC_SCL3 (MTK_PIN_NO(101) | 3) +#define MT2701_PIN_101_SPI2_CSN__FUNC_KROW0 (MTK_PIN_NO(101) | 4) + +#define MT2701_PIN_102_SPI2_MI__FUNC_GPIO102 (MTK_PIN_NO(102) | 0) +#define MT2701_PIN_102_SPI2_MI__FUNC_SPI2_MI (MTK_PIN_NO(102) | 1) +#define MT2701_PIN_102_SPI2_MI__FUNC_SPI2_MO (MTK_PIN_NO(102) | 2) +#define MT2701_PIN_102_SPI2_MI__FUNC_SDA3 (MTK_PIN_NO(102) | 3) +#define MT2701_PIN_102_SPI2_MI__FUNC_KROW1 (MTK_PIN_NO(102) | 4) + +#define MT2701_PIN_103_SPI2_MO__FUNC_GPIO103 (MTK_PIN_NO(103) | 0) +#define MT2701_PIN_103_SPI2_MO__FUNC_SPI2_MO (MTK_PIN_NO(103) | 1) +#define MT2701_PIN_103_SPI2_MO__FUNC_SPI2_MI (MTK_PIN_NO(103) | 2) +#define MT2701_PIN_103_SPI2_MO__FUNC_SCL3 (MTK_PIN_NO(103) | 3) +#define MT2701_PIN_103_SPI2_MO__FUNC_KROW2 (MTK_PIN_NO(103) | 4) + +#define MT2701_PIN_104_SPI2_CLK__FUNC_GPIO104 (MTK_PIN_NO(104) | 0) +#define MT2701_PIN_104_SPI2_CLK__FUNC_SPI2_CK (MTK_PIN_NO(104) | 1) +#define MT2701_PIN_104_SPI2_CLK__FUNC_SDA3 (MTK_PIN_NO(104) | 3) +#define MT2701_PIN_104_SPI2_CLK__FUNC_KROW3 (MTK_PIN_NO(104) | 4) + +#define MT2701_PIN_105_MSDC1_CMD__FUNC_GPIO105 (MTK_PIN_NO(105) | 0) +#define MT2701_PIN_105_MSDC1_CMD__FUNC_MSDC1_CMD (MTK_PIN_NO(105) | 1) +#define MT2701_PIN_105_MSDC1_CMD__FUNC_ANT_SEL0 (MTK_PIN_NO(105) | 2) +#define MT2701_PIN_105_MSDC1_CMD__FUNC_SDA1 (MTK_PIN_NO(105) | 3) +#define MT2701_PIN_105_MSDC1_CMD__FUNC_I2SOUT_BCK (MTK_PIN_NO(105) | 6) +#define MT2701_PIN_105_MSDC1_CMD__FUNC_DBG_MON_B_27 (MTK_PIN_NO(105) | 7) + +#define MT2701_PIN_106_MSDC1_CLK__FUNC_GPIO106 (MTK_PIN_NO(106) | 0) +#define MT2701_PIN_106_MSDC1_CLK__FUNC_MSDC1_CLK (MTK_PIN_NO(106) | 1) +#define MT2701_PIN_106_MSDC1_CLK__FUNC_ANT_SEL1 (MTK_PIN_NO(106) | 2) +#define MT2701_PIN_106_MSDC1_CLK__FUNC_SCL1 (MTK_PIN_NO(106) | 3) +#define MT2701_PIN_106_MSDC1_CLK__FUNC_I2SOUT_LRCK (MTK_PIN_NO(106) | 6) +#define MT2701_PIN_106_MSDC1_CLK__FUNC_DBG_MON_B_28 (MTK_PIN_NO(106) | 7) + +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_GPIO107 (MTK_PIN_NO(107) | 0) +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_MSDC1_DAT0 (MTK_PIN_NO(107) | 1) +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_ANT_SEL2 (MTK_PIN_NO(107) | 2) +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_UTXD0 (MTK_PIN_NO(107) | 5) +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_I2SOUT_DATA_OUT (MTK_PIN_NO(107) | 6) +#define MT2701_PIN_107_MSDC1_DAT0__FUNC_DBG_MON_B_26 (MTK_PIN_NO(107) | 7) + +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_GPIO108 (MTK_PIN_NO(108) | 0) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_MSDC1_DAT1 (MTK_PIN_NO(108) | 1) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_ANT_SEL3 (MTK_PIN_NO(108) | 2) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_PWM0 (MTK_PIN_NO(108) | 3) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_URXD0 (MTK_PIN_NO(108) | 5) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_PWM1 (MTK_PIN_NO(108) | 6) +#define MT2701_PIN_108_MSDC1_DAT1__FUNC_DBG_MON_B_25 (MTK_PIN_NO(108) | 7) + +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_GPIO109 (MTK_PIN_NO(109) | 0) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_MSDC1_DAT2 (MTK_PIN_NO(109) | 1) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_ANT_SEL4 (MTK_PIN_NO(109) | 2) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_SDA2 (MTK_PIN_NO(109) | 3) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_UTXD1 (MTK_PIN_NO(109) | 5) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_PWM2 (MTK_PIN_NO(109) | 6) +#define MT2701_PIN_109_MSDC1_DAT2__FUNC_DBG_MON_B_24 (MTK_PIN_NO(109) | 7) + +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_GPIO110 (MTK_PIN_NO(110) | 0) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_MSDC1_DAT3 (MTK_PIN_NO(110) | 1) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_ANT_SEL5 (MTK_PIN_NO(110) | 2) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_SCL2 (MTK_PIN_NO(110) | 3) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(110) | 4) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_URXD1 (MTK_PIN_NO(110) | 5) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_PWM3 (MTK_PIN_NO(110) | 6) +#define MT2701_PIN_110_MSDC1_DAT3__FUNC_DBG_MON_B_23 (MTK_PIN_NO(110) | 7) + +#define MT2701_PIN_111_MSDC0_DAT7__FUNC_GPIO111 (MTK_PIN_NO(111) | 0) +#define MT2701_PIN_111_MSDC0_DAT7__FUNC_MSDC0_DAT7 (MTK_PIN_NO(111) | 1) +#define MT2701_PIN_111_MSDC0_DAT7__FUNC_NLD7 (MTK_PIN_NO(111) | 4) + +#define MT2701_PIN_112_MSDC0_DAT6__FUNC_GPIO112 (MTK_PIN_NO(112) | 0) +#define MT2701_PIN_112_MSDC0_DAT6__FUNC_MSDC0_DAT6 (MTK_PIN_NO(112) | 1) +#define MT2701_PIN_112_MSDC0_DAT6__FUNC_NLD6 (MTK_PIN_NO(112) | 4) + +#define MT2701_PIN_113_MSDC0_DAT5__FUNC_GPIO113 (MTK_PIN_NO(113) | 0) +#define MT2701_PIN_113_MSDC0_DAT5__FUNC_MSDC0_DAT5 (MTK_PIN_NO(113) | 1) +#define MT2701_PIN_113_MSDC0_DAT5__FUNC_NLD5 (MTK_PIN_NO(113) | 4) + +#define MT2701_PIN_114_MSDC0_DAT4__FUNC_GPIO114 (MTK_PIN_NO(114) | 0) +#define MT2701_PIN_114_MSDC0_DAT4__FUNC_MSDC0_DAT4 (MTK_PIN_NO(114) | 1) +#define MT2701_PIN_114_MSDC0_DAT4__FUNC_NLD4 (MTK_PIN_NO(114) | 4) + +#define MT2701_PIN_115_MSDC0_RSTB__FUNC_GPIO115 (MTK_PIN_NO(115) | 0) +#define MT2701_PIN_115_MSDC0_RSTB__FUNC_MSDC0_RSTB (MTK_PIN_NO(115) | 1) +#define MT2701_PIN_115_MSDC0_RSTB__FUNC_NLD8 (MTK_PIN_NO(115) | 4) + +#define MT2701_PIN_116_MSDC0_CMD__FUNC_GPIO116 (MTK_PIN_NO(116) | 0) +#define MT2701_PIN_116_MSDC0_CMD__FUNC_MSDC0_CMD (MTK_PIN_NO(116) | 1) +#define MT2701_PIN_116_MSDC0_CMD__FUNC_NALE (MTK_PIN_NO(116) | 4) + +#define MT2701_PIN_117_MSDC0_CLK__FUNC_GPIO117 (MTK_PIN_NO(117) | 0) +#define MT2701_PIN_117_MSDC0_CLK__FUNC_MSDC0_CLK (MTK_PIN_NO(117) | 1) +#define MT2701_PIN_117_MSDC0_CLK__FUNC_NWEB (MTK_PIN_NO(117) | 4) + +#define MT2701_PIN_118_MSDC0_DAT3__FUNC_GPIO118 (MTK_PIN_NO(118) | 0) +#define MT2701_PIN_118_MSDC0_DAT3__FUNC_MSDC0_DAT3 (MTK_PIN_NO(118) | 1) +#define MT2701_PIN_118_MSDC0_DAT3__FUNC_NLD3 (MTK_PIN_NO(118) | 4) + +#define MT2701_PIN_119_MSDC0_DAT2__FUNC_GPIO119 (MTK_PIN_NO(119) | 0) +#define MT2701_PIN_119_MSDC0_DAT2__FUNC_MSDC0_DAT2 (MTK_PIN_NO(119) | 1) +#define MT2701_PIN_119_MSDC0_DAT2__FUNC_NLD2 (MTK_PIN_NO(119) | 4) + +#define MT2701_PIN_120_MSDC0_DAT1__FUNC_GPIO120 (MTK_PIN_NO(120) | 0) +#define MT2701_PIN_120_MSDC0_DAT1__FUNC_MSDC0_DAT1 (MTK_PIN_NO(120) | 1) +#define MT2701_PIN_120_MSDC0_DAT1__FUNC_NLD1 (MTK_PIN_NO(120) | 4) + +#define MT2701_PIN_121_MSDC0_DAT0__FUNC_GPIO121 (MTK_PIN_NO(121) | 0) +#define MT2701_PIN_121_MSDC0_DAT0__FUNC_MSDC0_DAT0 (MTK_PIN_NO(121) | 1) +#define MT2701_PIN_121_MSDC0_DAT0__FUNC_NLD0 (MTK_PIN_NO(121) | 4) +#define MT2701_PIN_121_MSDC0_DAT0__FUNC_WATCHDOG (MTK_PIN_NO(121) | 5) + +#define MT2701_PIN_122_CEC__FUNC_GPIO122 (MTK_PIN_NO(122) | 0) +#define MT2701_PIN_122_CEC__FUNC_CEC (MTK_PIN_NO(122) | 1) +#define MT2701_PIN_122_CEC__FUNC_SDA2 (MTK_PIN_NO(122) | 4) +#define MT2701_PIN_122_CEC__FUNC_URXD0 (MTK_PIN_NO(122) | 5) + +#define MT2701_PIN_123_HTPLG__FUNC_GPIO123 (MTK_PIN_NO(123) | 0) +#define MT2701_PIN_123_HTPLG__FUNC_HTPLG (MTK_PIN_NO(123) | 1) +#define MT2701_PIN_123_HTPLG__FUNC_SCL2 (MTK_PIN_NO(123) | 4) +#define MT2701_PIN_123_HTPLG__FUNC_UTXD0 (MTK_PIN_NO(123) | 5) + +#define MT2701_PIN_124_HDMISCK__FUNC_GPIO124 (MTK_PIN_NO(124) | 0) +#define MT2701_PIN_124_HDMISCK__FUNC_HDMISCK (MTK_PIN_NO(124) | 1) +#define MT2701_PIN_124_HDMISCK__FUNC_SDA1 (MTK_PIN_NO(124) | 4) +#define MT2701_PIN_124_HDMISCK__FUNC_PWM3 (MTK_PIN_NO(124) | 5) + +#define MT2701_PIN_125_HDMISD__FUNC_GPIO125 (MTK_PIN_NO(125) | 0) +#define MT2701_PIN_125_HDMISD__FUNC_HDMISD (MTK_PIN_NO(125) | 1) +#define MT2701_PIN_125_HDMISD__FUNC_SCL1 (MTK_PIN_NO(125) | 4) +#define MT2701_PIN_125_HDMISD__FUNC_PWM4 (MTK_PIN_NO(125) | 5) + +#define MT2701_PIN_126_I2S0_MCLK__FUNC_GPIO126 (MTK_PIN_NO(126) | 0) +#define MT2701_PIN_126_I2S0_MCLK__FUNC_I2S0_MCLK (MTK_PIN_NO(126) | 1) +#define MT2701_PIN_126_I2S0_MCLK__FUNC_WCN_I2S_MCLK (MTK_PIN_NO(126) | 6) +#define MT2701_PIN_126_I2S0_MCLK__FUNC_DBG_MON_B_6 (MTK_PIN_NO(126) | 7) + +#define MT2701_PIN_199_SPI1_CLK__FUNC_GPIO199 (MTK_PIN_NO(199) | 0) +#define MT2701_PIN_199_SPI1_CLK__FUNC_SPI1_CK (MTK_PIN_NO(199) | 1) +#define MT2701_PIN_199_SPI1_CLK__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(199) | 3) +#define MT2701_PIN_199_SPI1_CLK__FUNC_KCOL3 (MTK_PIN_NO(199) | 4) +#define MT2701_PIN_199_SPI1_CLK__FUNC_DBG_MON_B_15 (MTK_PIN_NO(199) | 7) + +#define MT2701_PIN_200_SPDIF_OUT__FUNC_GPIO200 (MTK_PIN_NO(200) | 0) +#define MT2701_PIN_200_SPDIF_OUT__FUNC_SPDIF_OUT (MTK_PIN_NO(200) | 1) +#define MT2701_PIN_200_SPDIF_OUT__FUNC_G1_TXD3 (MTK_PIN_NO(200) | 5) +#define MT2701_PIN_200_SPDIF_OUT__FUNC_URXD2 (MTK_PIN_NO(200) | 6) +#define MT2701_PIN_200_SPDIF_OUT__FUNC_DBG_MON_B_16 (MTK_PIN_NO(200) | 7) + +#define MT2701_PIN_201_SPDIF_IN0__FUNC_GPIO201 (MTK_PIN_NO(201) | 0) +#define MT2701_PIN_201_SPDIF_IN0__FUNC_SPDIF_IN0 (MTK_PIN_NO(201) | 1) +#define MT2701_PIN_201_SPDIF_IN0__FUNC_G1_TXEN (MTK_PIN_NO(201) | 5) +#define MT2701_PIN_201_SPDIF_IN0__FUNC_UTXD2 (MTK_PIN_NO(201) | 6) +#define MT2701_PIN_201_SPDIF_IN0__FUNC_DBG_MON_B_17 (MTK_PIN_NO(201) | 7) + +#define MT2701_PIN_202_SPDIF_IN1__FUNC_GPIO202 (MTK_PIN_NO(202) | 0) +#define MT2701_PIN_202_SPDIF_IN1__FUNC_SPDIF_IN1 (MTK_PIN_NO(202) | 1) + +#define MT2701_PIN_203_PWM0__FUNC_GPIO203 (MTK_PIN_NO(203) | 0) +#define MT2701_PIN_203_PWM0__FUNC_PWM0 (MTK_PIN_NO(203) | 1) +#define MT2701_PIN_203_PWM0__FUNC_DISP_PWM (MTK_PIN_NO(203) | 2) +#define MT2701_PIN_203_PWM0__FUNC_G1_TXD2 (MTK_PIN_NO(203) | 5) +#define MT2701_PIN_203_PWM0__FUNC_DBG_MON_B_18 (MTK_PIN_NO(203) | 7) +#define MT2701_PIN_203_PWM0__FUNC_I2S2_DATA (MTK_PIN_NO(203) | 9) + +#define MT2701_PIN_204_PWM1__FUNC_GPIO204 (MTK_PIN_NO(204) | 0) +#define MT2701_PIN_204_PWM1__FUNC_PWM1 (MTK_PIN_NO(204) | 1) +#define MT2701_PIN_204_PWM1__FUNC_CLKM3 (MTK_PIN_NO(204) | 2) +#define MT2701_PIN_204_PWM1__FUNC_G1_TXD1 (MTK_PIN_NO(204) | 5) +#define MT2701_PIN_204_PWM1__FUNC_DBG_MON_B_19 (MTK_PIN_NO(204) | 7) +#define MT2701_PIN_204_PWM1__FUNC_I2S3_DATA (MTK_PIN_NO(204) | 9) + +#define MT2701_PIN_205_PWM2__FUNC_GPIO205 (MTK_PIN_NO(205) | 0) +#define MT2701_PIN_205_PWM2__FUNC_PWM2 (MTK_PIN_NO(205) | 1) +#define MT2701_PIN_205_PWM2__FUNC_CLKM2 (MTK_PIN_NO(205) | 2) +#define MT2701_PIN_205_PWM2__FUNC_G1_TXD0 (MTK_PIN_NO(205) | 5) +#define MT2701_PIN_205_PWM2__FUNC_DBG_MON_B_20 (MTK_PIN_NO(205) | 7) + +#define MT2701_PIN_206_PWM3__FUNC_GPIO206 (MTK_PIN_NO(206) | 0) +#define MT2701_PIN_206_PWM3__FUNC_PWM3 (MTK_PIN_NO(206) | 1) +#define MT2701_PIN_206_PWM3__FUNC_CLKM1 (MTK_PIN_NO(206) | 2) +#define MT2701_PIN_206_PWM3__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(206) | 3) +#define MT2701_PIN_206_PWM3__FUNC_G1_TXC (MTK_PIN_NO(206) | 5) +#define MT2701_PIN_206_PWM3__FUNC_DBG_MON_B_21 (MTK_PIN_NO(206) | 7) + +#define MT2701_PIN_207_PWM4__FUNC_GPIO207 (MTK_PIN_NO(207) | 0) +#define MT2701_PIN_207_PWM4__FUNC_PWM4 (MTK_PIN_NO(207) | 1) +#define MT2701_PIN_207_PWM4__FUNC_CLKM0 (MTK_PIN_NO(207) | 2) +#define MT2701_PIN_207_PWM4__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(207) | 3) +#define MT2701_PIN_207_PWM4__FUNC_G1_RXC (MTK_PIN_NO(207) | 5) +#define MT2701_PIN_207_PWM4__FUNC_DBG_MON_B_22 (MTK_PIN_NO(207) | 7) + +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_GPIO208 (MTK_PIN_NO(208) | 0) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_AUD_EXT_CK1 (MTK_PIN_NO(208) | 1) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_PWM0 (MTK_PIN_NO(208) | 2) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_ANT_SEL5 (MTK_PIN_NO(208) | 4) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_DISP_PWM (MTK_PIN_NO(208) | 5) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_DBG_MON_A_31 (MTK_PIN_NO(208) | 7) +#define MT2701_PIN_208_AUD_EXT_CK1__FUNC_PCIE0_PERST_N (MTK_PIN_NO(208) | 11) + +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_GPIO209 (MTK_PIN_NO(209) | 0) +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_AUD_EXT_CK2 (MTK_PIN_NO(209) | 1) +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_MSDC1_WP (MTK_PIN_NO(209) | 2) +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_PWM1 (MTK_PIN_NO(209) | 5) +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_DBG_MON_A_32 (MTK_PIN_NO(209) | 7) +#define MT2701_PIN_209_AUD_EXT_CK2__FUNC_PCIE1_PERST_N (MTK_PIN_NO(209) | 11) + +#define MT2701_PIN_236_EXT_SDIO3__FUNC_GPIO236 (MTK_PIN_NO(236) | 0) +#define MT2701_PIN_236_EXT_SDIO3__FUNC_EXT_SDIO3 (MTK_PIN_NO(236) | 1) +#define MT2701_PIN_236_EXT_SDIO3__FUNC_IDDIG (MTK_PIN_NO(236) | 2) +#define MT2701_PIN_236_EXT_SDIO3__FUNC_DBG_MON_A_1 (MTK_PIN_NO(236) | 7) + +#define MT2701_PIN_237_EXT_SDIO2__FUNC_GPIO237 (MTK_PIN_NO(237) | 0) +#define MT2701_PIN_237_EXT_SDIO2__FUNC_EXT_SDIO2 (MTK_PIN_NO(237) | 1) +#define MT2701_PIN_237_EXT_SDIO2__FUNC_DRV_VBUS (MTK_PIN_NO(237) | 2) + +#define MT2701_PIN_238_EXT_SDIO1__FUNC_GPIO238 (MTK_PIN_NO(238) | 0) +#define MT2701_PIN_238_EXT_SDIO1__FUNC_EXT_SDIO1 (MTK_PIN_NO(238) | 1) +#define MT2701_PIN_238_EXT_SDIO1__FUNC_IDDIG_P1 (MTK_PIN_NO(238) | 2) + +#define MT2701_PIN_239_EXT_SDIO0__FUNC_GPIO239 (MTK_PIN_NO(239) | 0) +#define MT2701_PIN_239_EXT_SDIO0__FUNC_EXT_SDIO0 (MTK_PIN_NO(239) | 1) +#define MT2701_PIN_239_EXT_SDIO0__FUNC_DRV_VBUS_P1 (MTK_PIN_NO(239) | 2) + +#define MT2701_PIN_240_EXT_XCS__FUNC_GPIO240 (MTK_PIN_NO(240) | 0) +#define MT2701_PIN_240_EXT_XCS__FUNC_EXT_XCS (MTK_PIN_NO(240) | 1) + +#define MT2701_PIN_241_EXT_SCK__FUNC_GPIO241 (MTK_PIN_NO(241) | 0) +#define MT2701_PIN_241_EXT_SCK__FUNC_EXT_SCK (MTK_PIN_NO(241) | 1) + +#define MT2701_PIN_242_URTS2__FUNC_GPIO242 (MTK_PIN_NO(242) | 0) +#define MT2701_PIN_242_URTS2__FUNC_URTS2 (MTK_PIN_NO(242) | 1) +#define MT2701_PIN_242_URTS2__FUNC_UTXD3 (MTK_PIN_NO(242) | 2) +#define MT2701_PIN_242_URTS2__FUNC_URXD3 (MTK_PIN_NO(242) | 3) +#define MT2701_PIN_242_URTS2__FUNC_SCL1 (MTK_PIN_NO(242) | 4) +#define MT2701_PIN_242_URTS2__FUNC_DBG_MON_B_32 (MTK_PIN_NO(242) | 7) + +#define MT2701_PIN_243_UCTS2__FUNC_GPIO243 (MTK_PIN_NO(243) | 0) +#define MT2701_PIN_243_UCTS2__FUNC_UCTS2 (MTK_PIN_NO(243) | 1) +#define MT2701_PIN_243_UCTS2__FUNC_URXD3 (MTK_PIN_NO(243) | 2) +#define MT2701_PIN_243_UCTS2__FUNC_UTXD3 (MTK_PIN_NO(243) | 3) +#define MT2701_PIN_243_UCTS2__FUNC_SDA1 (MTK_PIN_NO(243) | 4) +#define MT2701_PIN_243_UCTS2__FUNC_DBG_MON_A_6 (MTK_PIN_NO(243) | 7) + +#define MT2701_PIN_244_HDMI_SDA_RX__FUNC_GPIO244 (MTK_PIN_NO(244) | 0) +#define MT2701_PIN_244_HDMI_SDA_RX__FUNC_HDMI_SDA_RX (MTK_PIN_NO(244) | 1) + +#define MT2701_PIN_245_HDMI_SCL_RX__FUNC_GPIO245 (MTK_PIN_NO(245) | 0) +#define MT2701_PIN_245_HDMI_SCL_RX__FUNC_HDMI_SCL_RX (MTK_PIN_NO(245) | 1) + +#define MT2701_PIN_246_MHL_SENCE__FUNC_GPIO246 (MTK_PIN_NO(246) | 0) + +#define MT2701_PIN_247_HDMI_HPD_CBUS_RX__FUNC_GPIO247 (MTK_PIN_NO(247) | 0) +#define MT2701_PIN_247_HDMI_HPD_CBUS_RX__FUNC_HDMI_HPD_RX (MTK_PIN_NO(247) | 1) + +#define MT2701_PIN_248_HDMI_TESTOUTP_RX__FUNC_GPIO248 (MTK_PIN_NO(248) | 0) +#define MT2701_PIN_248_HDMI_TESTOUTP_RX__FUNC_HDMI_TESTOUTP_RX (MTK_PIN_NO(248) | 1) + +#define MT2701_PIN_249_MSDC0E_RSTB__FUNC_MSDC0E_RSTB (MTK_PIN_NO(249) | 9) + +#define MT2701_PIN_250_MSDC0E_DAT7__FUNC_MSDC3_DAT7 (MTK_PIN_NO(250) | 9) +#define MT2701_PIN_250_MSDC0E_DAT7__FUNC_PCIE0_CLKREQ_N (MTK_PIN_NO(250) | 14) + +#define MT2701_PIN_251_MSDC0E_DAT6__FUNC_MSDC3_DAT6 (MTK_PIN_NO(251) | 9) +#define MT2701_PIN_251_MSDC0E_DAT6__FUNC_PCIE0_WAKE_N (MTK_PIN_NO(251) | 14) + +#define MT2701_PIN_252_MSDC0E_DAT5__FUNC_MSDC3_DAT5 (MTK_PIN_NO(252) | 9) +#define MT2701_PIN_252_MSDC0E_DAT5__FUNC_PCIE1_CLKREQ_N (MTK_PIN_NO(252) | 14) + +#define MT2701_PIN_253_MSDC0E_DAT4__FUNC_MSDC3_DAT4 (MTK_PIN_NO(253) | 9) +#define MT2701_PIN_253_MSDC0E_DAT4__FUNC_PCIE1_WAKE_N (MTK_PIN_NO(253) | 14) + +#define MT2701_PIN_254_MSDC0E_DAT3__FUNC_MSDC3_DAT3 (MTK_PIN_NO(254) | 9) +#define MT2701_PIN_254_MSDC0E_DAT3__FUNC_PCIE2_CLKREQ_N (MTK_PIN_NO(254) | 14) + +#define MT2701_PIN_255_MSDC0E_DAT2__FUNC_MSDC3_DAT2 (MTK_PIN_NO(255) | 9) +#define MT2701_PIN_255_MSDC0E_DAT2__FUNC_PCIE2_WAKE_N (MTK_PIN_NO(255) | 14) + +#define MT2701_PIN_256_MSDC0E_DAT1__FUNC_MSDC3_DAT1 (MTK_PIN_NO(256) | 9) + +#define MT2701_PIN_257_MSDC0E_DAT0__FUNC_MSDC3_DAT0 (MTK_PIN_NO(257) | 9) + +#define MT2701_PIN_258_MSDC0E_CMD__FUNC_MSDC3_CMD (MTK_PIN_NO(258) | 9) + +#define MT2701_PIN_259_MSDC0E_CLK__FUNC_MSDC3_CLK (MTK_PIN_NO(259) | 9) + +#define MT2701_PIN_260_MSDC0E_DSL__FUNC_MSDC3_DSL (MTK_PIN_NO(260) | 9) + +#define MT2701_PIN_261_MSDC1_INS__FUNC_GPIO261 (MTK_PIN_NO(261) | 0) +#define MT2701_PIN_261_MSDC1_INS__FUNC_MSDC1_INS (MTK_PIN_NO(261) | 1) +#define MT2701_PIN_261_MSDC1_INS__FUNC_DBG_MON_B_29 (MTK_PIN_NO(261) | 7) + +#define MT2701_PIN_262_G2_TXEN__FUNC_GPIO262 (MTK_PIN_NO(262) | 0) +#define MT2701_PIN_262_G2_TXEN__FUNC_G2_TXEN (MTK_PIN_NO(262) | 1) + +#define MT2701_PIN_263_G2_TXD3__FUNC_GPIO263 (MTK_PIN_NO(263) | 0) +#define MT2701_PIN_263_G2_TXD3__FUNC_G2_TXD3 (MTK_PIN_NO(263) | 1) +#define MT2701_PIN_263_G2_TXD3__FUNC_ANT_SEL5 (MTK_PIN_NO(263) | 6) + +#define MT2701_PIN_264_G2_TXD2__FUNC_GPIO264 (MTK_PIN_NO(264) | 0) +#define MT2701_PIN_264_G2_TXD2__FUNC_G2_TXD2 (MTK_PIN_NO(264) | 1) +#define MT2701_PIN_264_G2_TXD2__FUNC_ANT_SEL4 (MTK_PIN_NO(264) | 6) + +#define MT2701_PIN_265_G2_TXD1__FUNC_GPIO265 (MTK_PIN_NO(265) | 0) +#define MT2701_PIN_265_G2_TXD1__FUNC_G2_TXD1 (MTK_PIN_NO(265) | 1) +#define MT2701_PIN_265_G2_TXD1__FUNC_ANT_SEL3 (MTK_PIN_NO(265) | 6) + +#define MT2701_PIN_266_G2_TXD0__FUNC_GPIO266 (MTK_PIN_NO(266) | 0) +#define MT2701_PIN_266_G2_TXD0__FUNC_G2_TXD0 (MTK_PIN_NO(266) | 1) +#define MT2701_PIN_266_G2_TXD0__FUNC_ANT_SEL2 (MTK_PIN_NO(266) | 6) + +#define MT2701_PIN_267_G2_TXC__FUNC_GPIO267 (MTK_PIN_NO(267) | 0) +#define MT2701_PIN_267_G2_TXC__FUNC_G2_TXC (MTK_PIN_NO(267) | 1) + +#define MT2701_PIN_268_G2_RXC__FUNC_GPIO268 (MTK_PIN_NO(268) | 0) +#define MT2701_PIN_268_G2_RXC__FUNC_G2_RXC (MTK_PIN_NO(268) | 1) + +#define MT2701_PIN_269_G2_RXD0__FUNC_GPIO269 (MTK_PIN_NO(269) | 0) +#define MT2701_PIN_269_G2_RXD0__FUNC_G2_RXD0 (MTK_PIN_NO(269) | 1) + +#define MT2701_PIN_270_G2_RXD1__FUNC_GPIO270 (MTK_PIN_NO(270) | 0) +#define MT2701_PIN_270_G2_RXD1__FUNC_G2_RXD1 (MTK_PIN_NO(270) | 1) + +#define MT2701_PIN_271_G2_RXD2__FUNC_GPIO271 (MTK_PIN_NO(271) | 0) +#define MT2701_PIN_271_G2_RXD2__FUNC_G2_RXD2 (MTK_PIN_NO(271) | 1) + +#define MT2701_PIN_272_G2_RXD3__FUNC_GPIO272 (MTK_PIN_NO(272) | 0) +#define MT2701_PIN_272_G2_RXD3__FUNC_G2_RXD3 (MTK_PIN_NO(272) | 1) + +#define MT2701_PIN_274_G2_RXDV__FUNC_GPIO274 (MTK_PIN_NO(274) | 0) +#define MT2701_PIN_274_G2_RXDV__FUNC_G2_RXDV (MTK_PIN_NO(274) | 1) + +#define MT2701_PIN_275_MDC__FUNC_GPIO275 (MTK_PIN_NO(275) | 0) +#define MT2701_PIN_275_MDC__FUNC_MDC (MTK_PIN_NO(275) | 1) +#define MT2701_PIN_275_MDC__FUNC_ANT_SEL0 (MTK_PIN_NO(275) | 6) + +#define MT2701_PIN_276_MDIO__FUNC_GPIO276 (MTK_PIN_NO(276) | 0) +#define MT2701_PIN_276_MDIO__FUNC_MDIO (MTK_PIN_NO(276) | 1) +#define MT2701_PIN_276_MDIO__FUNC_ANT_SEL1 (MTK_PIN_NO(276) | 6) + +#define MT2701_PIN_278_JTAG_RESET__FUNC_GPIO278 (MTK_PIN_NO(278) | 0) +#define MT2701_PIN_278_JTAG_RESET__FUNC_JTAG_RESET (MTK_PIN_NO(278) | 1) + +#endif /* __DTS_MT2701_PINFUNC_H */ diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 74d8f7eb..e3bdcf8 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -107,8 +107,8 @@ }; }; - isp1704: isp1704 { - compatible = "nxp,isp1704"; + isp1707: isp1707 { + compatible = "nxp,isp1707"; nxp,enable-gpio = <&gpio3 3 GPIO_ACTIVE_HIGH>; usb-phy = <&usb2_phy>; }; @@ -618,7 +618,7 @@ ti,termination-current = <100>; ti,resistor-sense = <68>; - ti,usb-charger-detection = <&isp1704>; + ti,usb-charger-detection = <&isp1707>; }; }; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 3ed4abd..15cbc74 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -656,6 +656,26 @@ status = "disabled"; }; + eccmgr: eccmgr@ffd08140 { + compatible = "altr,socfpga-ecc-manager"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + l2-ecc@ffd08140 { + compatible = "altr,socfpga-l2-ecc"; + reg = <0xffd08140 0x4>; + interrupts = <0 36 1>, <0 37 1>; + }; + + ocram-ecc@ffd08144 { + compatible = "altr,socfpga-ocram-ecc"; + reg = <0xffd08144 0x4>; + iram = <&ocram>; + interrupts = <0 178 1>, <0 179 1>; + }; + }; + L2: l2-cache@fffef000 { compatible = "arm,pl310-cache"; reg = <0xfffef000 0x1000>; diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi index a9ceb5b..4539f8d 100644 --- a/arch/arm/boot/dts/vfxxx.dtsi +++ b/arch/arm/boot/dts/vfxxx.dtsi @@ -629,5 +629,10 @@ status = "disabled"; }; }; + + iio-hwmon { + compatible = "iio-hwmon"; + io-channels = <&adc0 16>, <&adc1 16>; + }; }; }; diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index e0df333..9ba45ad 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -69,7 +69,7 @@ static void __scoop_gpio_set(struct scoop_dev *sdev, static void scoop_gpio_set(struct gpio_chip *chip, unsigned offset, int value) { - struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); + struct scoop_dev *sdev = gpiochip_get_data(chip); unsigned long flags; spin_lock_irqsave(&sdev->scoop_lock, flags); @@ -81,7 +81,7 @@ static void scoop_gpio_set(struct gpio_chip *chip, unsigned offset, int value) static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset) { - struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); + struct scoop_dev *sdev = gpiochip_get_data(chip); /* XXX: I'm unsure, but it seems so */ return !!(ioread16(sdev->base + SCOOP_GPRR) & (1 << (offset + 1))); @@ -90,7 +90,7 @@ static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset) static int scoop_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { - struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); + struct scoop_dev *sdev = gpiochip_get_data(chip); unsigned long flags; unsigned short gpcr; @@ -108,7 +108,7 @@ static int scoop_gpio_direction_input(struct gpio_chip *chip, static int scoop_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { - struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); + struct scoop_dev *sdev = gpiochip_get_data(chip); unsigned long flags; unsigned short gpcr; @@ -224,7 +224,7 @@ static int scoop_probe(struct platform_device *pdev) devptr->gpio.direction_input = scoop_gpio_direction_input; devptr->gpio.direction_output = scoop_gpio_direction_output; - ret = gpiochip_add(&devptr->gpio); + ret = gpiochip_add_data(&devptr->gpio, devptr); if (ret) goto err_gpio; } diff --git a/arch/arm/configs/colibri_pxa270_defconfig b/arch/arm/configs/colibri_pxa270_defconfig index 18c311a..0b9211b 100644 --- a/arch/arm/configs/colibri_pxa270_defconfig +++ b/arch/arm/configs/colibri_pxa270_defconfig @@ -166,7 +166,6 @@ CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y CONFIG_DEBUG_LL=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA1=m diff --git a/arch/arm/configs/iop13xx_defconfig b/arch/arm/configs/iop13xx_defconfig index 4fa94a1..652b7bd 100644 --- a/arch/arm/configs/iop13xx_defconfig +++ b/arch/arm/configs/iop13xx_defconfig @@ -95,7 +95,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_NLS=y CONFIG_DEBUG_USER=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_LRW=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index c3058da..aa3af0a 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -108,7 +108,6 @@ CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y CONFIG_DEBUG_LL_UART_8250=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_LRW=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 4bc8700..0ada29d 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -214,7 +214,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_USER=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=m diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 89a3a3e..da3c042 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -15,6 +15,7 @@ #include <crypto/ablk_helper.h> #include <crypto/algapi.h> #include <linux/module.h> +#include <crypto/xts.h> MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); @@ -152,6 +153,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); int ret; + ret = xts_check_key(tfm, in_key, key_len); + if (ret) + return ret; + ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); if (!ret) ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 6d68529..0511a6c 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -13,6 +13,7 @@ #include <crypto/ablk_helper.h> #include <crypto/algapi.h> #include <linux/module.h> +#include <crypto/xts.h> #include "aes_glue.h" @@ -89,6 +90,11 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); int bits = key_len * 4; + int err; + + err = xts_check_key(tfm, in_key, key_len); + if (err) + return err; if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d5525bf..9156fc3 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); void set_kernel_text_rw(void); void set_kernel_text_ro(void); #else diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 194c91b..3d5a5cd 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -19,38 +19,7 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ -/* 0 is reserved as an invalid value. */ -#define c0_MPIDR 1 /* MultiProcessor ID Register */ -#define c0_CSSELR 2 /* Cache Size Selection Register */ -#define c1_SCTLR 3 /* System Control Register */ -#define c1_ACTLR 4 /* Auxiliary Control Register */ -#define c1_CPACR 5 /* Coprocessor Access Control */ -#define c2_TTBR0 6 /* Translation Table Base Register 0 */ -#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ -#define c2_TTBR1 8 /* Translation Table Base Register 1 */ -#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ -#define c2_TTBCR 10 /* Translation Table Base Control R. */ -#define c3_DACR 11 /* Domain Access Control Register */ -#define c5_DFSR 12 /* Data Fault Status Register */ -#define c5_IFSR 13 /* Instruction Fault Status Register */ -#define c5_ADFSR 14 /* Auxilary Data Fault Status R */ -#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ -#define c6_DFAR 16 /* Data Fault Address Register */ -#define c6_IFAR 17 /* Instruction Fault Address Register */ -#define c7_PAR 18 /* Physical Address Register */ -#define c7_PAR_high 19 /* PAR top 32 bits */ -#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ -#define c10_PRRR 21 /* Primary Region Remap Register */ -#define c10_NMRR 22 /* Normal Memory Remap Register */ -#define c12_VBAR 23 /* Vector Base Address Register */ -#define c13_CID 24 /* Context ID Register */ -#define c13_TID_URW 25 /* Thread ID, User R/W */ -#define c13_TID_URO 26 /* Thread ID, User R/O */ -#define c13_TID_PRIV 27 /* Thread ID, Privileged */ -#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ -#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */ -#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ -#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ +#include <asm/virt.h> #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 @@ -79,6 +48,8 @@ #define rr_lo_hi(a1, a2) a1, a2 #endif +#define kvm_ksym_ref(kva) (kva) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; @@ -86,19 +57,15 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; -extern char __kvm_hyp_exit[]; -extern char __kvm_hyp_exit_end[]; - extern char __kvm_hyp_vector[]; -extern char __kvm_hyp_code_start[]; -extern char __kvm_hyp_code_end[]; - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +extern void __init_stage2_translation(void); #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 3095df0..ee5328f 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -68,12 +68,12 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) { - return &vcpu->arch.regs.usr_regs.ARM_pc; + return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc; } static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) { - return &vcpu->arch.regs.usr_regs.ARM_cpsr; + return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) @@ -83,13 +83,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) { - unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK; return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); } static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) { - unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK; return cpsr_mode > USR_MODE;; } @@ -108,11 +108,6 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu) return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8; } -static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.fault.hyp_pc; -} - static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; @@ -143,6 +138,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; } +static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM); +} + /* Get Access Size from a data abort */ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) { @@ -192,7 +192,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK; + return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f9f2779..3850701 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -85,20 +85,61 @@ struct kvm_vcpu_fault_info { u32 hsr; /* Hyp Syndrome Register */ u32 hxfar; /* Hyp Data/Inst. Fault Address Register */ u32 hpfar; /* Hyp IPA Fault Address Register */ - u32 hyp_pc; /* PC when exception was taken from Hyp mode */ }; -typedef struct vfp_hard_struct kvm_cpu_context_t; +/* + * 0 is reserved as an invalid value. + * Order should be kept in sync with the save/restore code. + */ +enum vcpu_sysreg { + __INVALID_SYSREG__, + c0_MPIDR, /* MultiProcessor ID Register */ + c0_CSSELR, /* Cache Size Selection Register */ + c1_SCTLR, /* System Control Register */ + c1_ACTLR, /* Auxiliary Control Register */ + c1_CPACR, /* Coprocessor Access Control */ + c2_TTBR0, /* Translation Table Base Register 0 */ + c2_TTBR0_high, /* TTBR0 top 32 bits */ + c2_TTBR1, /* Translation Table Base Register 1 */ + c2_TTBR1_high, /* TTBR1 top 32 bits */ + c2_TTBCR, /* Translation Table Base Control R. */ + c3_DACR, /* Domain Access Control Register */ + c5_DFSR, /* Data Fault Status Register */ + c5_IFSR, /* Instruction Fault Status Register */ + c5_ADFSR, /* Auxilary Data Fault Status R */ + c5_AIFSR, /* Auxilary Instrunction Fault Status R */ + c6_DFAR, /* Data Fault Address Register */ + c6_IFAR, /* Instruction Fault Address Register */ + c7_PAR, /* Physical Address Register */ + c7_PAR_high, /* PAR top 32 bits */ + c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */ + c10_PRRR, /* Primary Region Remap Register */ + c10_NMRR, /* Normal Memory Remap Register */ + c12_VBAR, /* Vector Base Address Register */ + c13_CID, /* Context ID Register */ + c13_TID_URW, /* Thread ID, User R/W */ + c13_TID_URO, /* Thread ID, User R/O */ + c13_TID_PRIV, /* Thread ID, Privileged */ + c14_CNTKCTL, /* Timer Control Register (PL1) */ + c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */ + c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */ + NR_CP15_REGS /* Number of regs (incl. invalid) */ +}; + +struct kvm_cpu_context { + struct kvm_regs gp_regs; + struct vfp_hard_struct vfp; + u32 cp15[NR_CP15_REGS]; +}; + +typedef struct kvm_cpu_context kvm_cpu_context_t; struct kvm_vcpu_arch { - struct kvm_regs regs; + struct kvm_cpu_context ctxt; int target; /* Processor target */ DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); - /* System control coprocessor (cp15) */ - u32 cp15[NR_CP15_REGS]; - /* The CPU type we expose to the VM */ u32 midr; @@ -111,9 +152,6 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Floating point registers (VFP and Advanced SIMD/NEON) */ - struct vfp_hard_struct vfp_guest; - /* Host FP context */ kvm_cpu_context_t *host_cpu_context; @@ -158,12 +196,14 @@ struct kvm_vcpu_stat { u64 exits; }; +#define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r] + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -u64 kvm_call_hyp(void *hypfn, ...); +unsigned long kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER @@ -220,6 +260,11 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } +static inline void __cpu_init_stage2(void) +{ + kvm_call_hyp(__init_stage2_translation); +} + static inline int kvm_arch_dev_ioctl_check_extension(long ext) { return 0; @@ -242,5 +287,20 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} +static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} +static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h new file mode 100644 index 0000000..f0e8607 --- /dev/null +++ b/arch/arm/include/asm/kvm_hyp.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KVM_HYP_H__ +#define __ARM_KVM_HYP_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> +#include <asm/vfp.h> + +#define __hyp_text __section(.hyp.text) notrace + +#define kern_hyp_va(v) (v) +#define hyp_kern_va(v) (v) + +#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ + "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 +#define __ACCESS_CP15_64(Op1, CRm) \ + "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64 +#define __ACCESS_VFP(CRn) \ + "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32 + +#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v))) +#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__) + +#define __read_sysreg(r, w, c, t) ({ \ + t __val; \ + asm volatile(r " " c : "=r" (__val)); \ + __val; \ +}) +#define read_sysreg(...) __read_sysreg(__VA_ARGS__) + +#define write_special(v, r) \ + asm volatile("msr " __stringify(r) ", %0" : : "r" (v)) +#define read_special(r) ({ \ + u32 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define TTBR0 __ACCESS_CP15_64(0, c2) +#define TTBR1 __ACCESS_CP15_64(1, c2) +#define VTTBR __ACCESS_CP15_64(6, c2) +#define PAR __ACCESS_CP15_64(0, c7) +#define CNTV_CVAL __ACCESS_CP15_64(3, c14) +#define CNTVOFF __ACCESS_CP15_64(4, c14) + +#define MIDR __ACCESS_CP15(c0, 0, c0, 0) +#define CSSELR __ACCESS_CP15(c0, 2, c0, 0) +#define VPIDR __ACCESS_CP15(c0, 4, c0, 0) +#define VMPIDR __ACCESS_CP15(c0, 4, c0, 5) +#define SCTLR __ACCESS_CP15(c1, 0, c0, 0) +#define CPACR __ACCESS_CP15(c1, 0, c0, 2) +#define HCR __ACCESS_CP15(c1, 4, c1, 0) +#define HDCR __ACCESS_CP15(c1, 4, c1, 1) +#define HCPTR __ACCESS_CP15(c1, 4, c1, 2) +#define HSTR __ACCESS_CP15(c1, 4, c1, 3) +#define TTBCR __ACCESS_CP15(c2, 0, c0, 2) +#define HTCR __ACCESS_CP15(c2, 4, c0, 2) +#define VTCR __ACCESS_CP15(c2, 4, c1, 2) +#define DACR __ACCESS_CP15(c3, 0, c0, 0) +#define DFSR __ACCESS_CP15(c5, 0, c0, 0) +#define IFSR __ACCESS_CP15(c5, 0, c0, 1) +#define ADFSR __ACCESS_CP15(c5, 0, c1, 0) +#define AIFSR __ACCESS_CP15(c5, 0, c1, 1) +#define HSR __ACCESS_CP15(c5, 4, c2, 0) +#define DFAR __ACCESS_CP15(c6, 0, c0, 0) +#define IFAR __ACCESS_CP15(c6, 0, c0, 2) +#define HDFAR __ACCESS_CP15(c6, 4, c0, 0) +#define HIFAR __ACCESS_CP15(c6, 4, c0, 2) +#define HPFAR __ACCESS_CP15(c6, 4, c0, 4) +#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) +#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) +#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) +#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4) +#define PRRR __ACCESS_CP15(c10, 0, c2, 0) +#define NMRR __ACCESS_CP15(c10, 0, c2, 1) +#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0) +#define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1) +#define VBAR __ACCESS_CP15(c12, 0, c0, 0) +#define CID __ACCESS_CP15(c13, 0, c0, 1) +#define TID_URW __ACCESS_CP15(c13, 0, c0, 2) +#define TID_URO __ACCESS_CP15(c13, 0, c0, 3) +#define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) +#define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) +#define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) +#define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) +#define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) + +#define VFP_FPEXC __ACCESS_VFP(FPEXC) + +/* AArch64 compatibility macros, only for the timer so far */ +#define read_sysreg_el0(r) read_sysreg(r##_el0) +#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) + +#define cntv_ctl_el0 CNTV_CTL +#define cntv_cval_el0 CNTV_CVAL +#define cntvoff_el2 CNTVOFF +#define cnthctl_el2 CNTHCTL + +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +void __sysreg_save_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state(struct kvm_cpu_context *ctxt); + +void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp); +void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp); +static inline bool __vfp_enabled(void) +{ + return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10))); +} + +void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt); +void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt); + +int asmlinkage __guest_enter(struct kvm_vcpu *vcpu, + struct kvm_cpu_context *host); +int asmlinkage __hyp_do_panic(const char *, int, u32); + +#endif /* __ARM_KVM_HYP_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index a520b79..da44be9 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -179,7 +179,7 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; + return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101; } static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index a563544..057d381 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -2,9 +2,6 @@ #define ASMARM_PCI_H #ifdef __KERNEL__ -#include <asm-generic/pci-dma-compat.h> -#include <asm-generic/pci-bridge.h> - #include <asm/mach/pci.h> /* for pci_sys_data */ extern unsigned long pcibios_min_io; @@ -41,5 +38,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #endif /* __KERNEL__ */ - #endif diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 4371f45..d4ceaf5 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void) { return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); } + +static inline bool is_kernel_in_hyp_mode(void) +{ + return false; +} + +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 871b826..27d0581 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -170,41 +170,11 @@ int main(void) DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); BLANK(); #ifdef CONFIG_KVM_ARM_HOST - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); - DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); - DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); - DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); - DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); - DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); - DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); - DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs)); - DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs)); - DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs)); - DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); - DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); - DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); - DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); - DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); - DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); - DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp)); + DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs)); #endif BLANK(); #ifdef CONFIG_VDSO diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7d0cba6f..139791e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -176,13 +176,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 37312f6..baee702 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde..b4139cb 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -18,6 +18,11 @@ *(.proc.info.init) \ VMLINUX_SYMBOL(__proc_info_end) = .; +#define HYPERVISOR_TEXT \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + #define IDMAP_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__idmap_text_start) = .; \ @@ -108,6 +113,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT KPROBES_TEXT *(.gnu.warning) *(.glue_7) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index c5eef02c..eb1bf43 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -17,6 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp/ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dda1959..3e0fb66 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -28,6 +28,7 @@ #include <linux/sched.h> #include <linux/kvm.h> #include <trace/events/kvm.h> +#include <kvm/arm_pmu.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); + kvm_pmu_vcpu_destroy(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); + kvm_timer_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, @@ -506,18 +509,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) { - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); vcpu->arch.pause = false; - wake_up_interruptible(wq); + swake_up(wq); } } static void vcpu_sleep(struct kvm_vcpu *vcpu) { - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); } @@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * non-preemptible context. */ preempt_disable(); + kvm_pmu_flush_hwstate(vcpu); kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); @@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || vcpu->arch.power_off || vcpu->arch.pause) { local_irq_enable(); + kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); @@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* - * We must sync the timer state before the vgic state so that - * the vgic can properly sample the updated state of the + * We must sync the PMU and timer state before the vgic state so + * that the vgic can properly sample the updated state of the * interrupt line. */ + kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); @@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, return 0; } +static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); + break; + } + + return ret; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; switch (ioctl) { case KVM_ARM_VCPU_INIT: { @@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return -E2BIG; return kvm_arm_copy_reg_indices(vcpu, user_list->reg); } + case KVM_SET_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + return kvm_arm_vcpu_set_attr(vcpu, &attr); + } + case KVM_GET_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + return kvm_arm_vcpu_get_attr(vcpu, &attr); + } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + return kvm_arm_vcpu_has_attr(vcpu, &attr); + } default: return -EINVAL; } @@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp, } } +static void cpu_init_stage2(void *dummy) +{ + __cpu_init_stage2(); +} + static void cpu_init_hyp_mode(void *dummy) { phys_addr_t boot_pgd_ptr; @@ -982,9 +1051,10 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)__kvm_hyp_vector; + vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_stage2(); kvm_arm_init_debug(); } @@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void) } #endif +static void teardown_common_resources(void) +{ + free_percpu(kvm_host_cpu_state); +} + +static int init_common_resources(void) +{ + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { + kvm_err("Cannot allocate host CPU state\n"); + return -ENOMEM; + } + + return 0; +} + +static int init_subsystems(void) +{ + int err; + + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + break; + default: + return err; + } + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(); + if (err) + return err; + + kvm_perf_init(); + kvm_coproc_table_init(); + + return 0; +} + +static void teardown_hyp_mode(void) +{ + int cpu; + + if (is_kernel_in_hyp_mode()) + return; + + free_hyp_pgds(); + for_each_possible_cpu(cpu) + free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); +} + +static int init_vhe_mode(void) +{ + /* + * Execute the init code on each CPU. + */ + on_each_cpu(cpu_init_stage2, NULL, 1); + + /* set size of VMID supported by CPU */ + kvm_vmid_bits = kvm_get_vmid_bits(); + kvm_info("%d-bit VMID\n", kvm_vmid_bits); + + kvm_info("VHE mode initialized successfully\n"); + return 0; +} + /** * Inits Hyp-mode on all online CPUs */ @@ -1065,7 +1211,7 @@ static int init_hyp_mode(void) stack_page = __get_free_page(GFP_KERNEL); if (!stack_page) { err = -ENOMEM; - goto out_free_stack_pages; + goto out_err; } per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; @@ -1074,16 +1220,18 @@ static int init_hyp_mode(void) /* * Map the Hyp-code called directly from the host */ - err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); + err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), + kvm_ksym_ref(__hyp_text_end)); if (err) { kvm_err("Cannot map world-switch code\n"); - goto out_free_mappings; + goto out_err; } - err = create_hyp_mappings(__start_rodata, __end_rodata); + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), + kvm_ksym_ref(__end_rodata)); if (err) { kvm_err("Cannot map rodata section\n"); - goto out_free_mappings; + goto out_err; } /* @@ -1095,20 +1243,10 @@ static int init_hyp_mode(void) if (err) { kvm_err("Cannot map hyp stack\n"); - goto out_free_mappings; + goto out_err; } } - /* - * Map the host CPU structures - */ - kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); - if (!kvm_host_cpu_state) { - err = -ENOMEM; - kvm_err("Cannot allocate host CPU state\n"); - goto out_free_mappings; - } - for_each_possible_cpu(cpu) { kvm_cpu_context_t *cpu_ctxt; @@ -1117,7 +1255,7 @@ static int init_hyp_mode(void) if (err) { kvm_err("Cannot map host CPU state: %d\n", err); - goto out_free_context; + goto out_err; } } @@ -1126,34 +1264,22 @@ static int init_hyp_mode(void) */ on_each_cpu(cpu_init_hyp_mode, NULL, 1); - /* - * Init HYP view of VGIC - */ - err = kvm_vgic_hyp_init(); - switch (err) { - case 0: - vgic_present = true; - break; - case -ENODEV: - case -ENXIO: - vgic_present = false; - break; - default: - goto out_free_context; - } - - /* - * Init HYP architected timer support - */ - err = kvm_timer_hyp_init(); - if (err) - goto out_free_context; - #ifndef CONFIG_HOTPLUG_CPU free_boot_hyp_pgd(); #endif - kvm_perf_init(); + cpu_notifier_register_begin(); + + err = __register_cpu_notifier(&hyp_init_cpu_nb); + + cpu_notifier_register_done(); + + if (err) { + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + goto out_err; + } + + hyp_cpu_pm_init(); /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); @@ -1162,14 +1288,9 @@ static int init_hyp_mode(void) kvm_info("Hyp mode initialized successfully\n"); return 0; -out_free_context: - free_percpu(kvm_host_cpu_state); -out_free_mappings: - free_hyp_pgds(); -out_free_stack_pages: - for_each_possible_cpu(cpu) - free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + out_err: + teardown_hyp_mode(); kvm_err("error initializing Hyp mode: %d\n", err); return err; } @@ -1213,26 +1334,27 @@ int kvm_arch_init(void *opaque) } } - cpu_notifier_register_begin(); - - err = init_hyp_mode(); + err = init_common_resources(); if (err) - goto out_err; + return err; - err = __register_cpu_notifier(&hyp_init_cpu_nb); - if (err) { - kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + if (is_kernel_in_hyp_mode()) + err = init_vhe_mode(); + else + err = init_hyp_mode(); + if (err) goto out_err; - } - - cpu_notifier_register_done(); - hyp_cpu_pm_init(); + err = init_subsystems(); + if (err) + goto out_hyp; - kvm_coproc_table_init(); return 0; + +out_hyp: + teardown_hyp_mode(); out_err: - cpu_notifier_register_done(); + teardown_common_resources(); return err; } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index f3d88dc..1bb2b79 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -16,6 +16,8 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include <linux/bsearch.h> #include <linux/mm.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> @@ -54,8 +56,8 @@ static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, const struct coproc_reg *r, u64 val) { - vcpu->arch.cp15[r->reg] = val & 0xffffffff; - vcpu->arch.cp15[r->reg + 1] = val >> 32; + vcpu_cp15(vcpu, r->reg) = val & 0xffffffff; + vcpu_cp15(vcpu, r->reg + 1) = val >> 32; } static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, @@ -63,9 +65,9 @@ static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, { u64 val; - val = vcpu->arch.cp15[r->reg + 1]; + val = vcpu_cp15(vcpu, r->reg + 1); val = val << 32; - val = val | vcpu->arch.cp15[r->reg]; + val = val | vcpu_cp15(vcpu, r->reg); return val; } @@ -104,7 +106,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) * vcpu_id, but we read the 'U' bit from the underlying * hardware directly. */ - vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | (vcpu->vcpu_id & 3)); } @@ -117,7 +119,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR); return true; } @@ -139,7 +141,7 @@ static bool access_l2ctlr(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR); return true; } @@ -156,7 +158,7 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) ncores = min(ncores, 3U); l2ctlr |= (ncores & 3) << 24; - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; + vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr; } static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) @@ -171,7 +173,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) else actlr &= ~(1U << 6); - vcpu->arch.cp15[c1_ACTLR] = actlr; + vcpu_cp15(vcpu, c1_ACTLR) = actlr; } /* @@ -218,9 +220,9 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); - vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); + vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1); if (p->is_64bit) - vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -381,17 +383,26 @@ static const struct coproc_reg cp15_regs[] = { { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; +static int check_reg_table(const struct coproc_reg *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i < n; i++) { + if (cmp_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + /* Target specific emulation tables */ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { - unsigned int i; - - for (i = 1; i < table->num; i++) - BUG_ON(cmp_reg(&table->table[i-1], - &table->table[i]) >= 0); - + BUG_ON(check_reg_table(table->table, table->num)); target_tables[table->target] = table; } @@ -405,29 +416,32 @@ static const struct coproc_reg *get_target_table(unsigned target, size_t *num) return table->table; } +#define reg_to_match_value(x) \ + ({ \ + unsigned long val; \ + val = (x)->CRn << 11; \ + val |= (x)->CRm << 7; \ + val |= (x)->Op1 << 4; \ + val |= (x)->Op2 << 1; \ + val |= !(x)->is_64bit; \ + val; \ + }) + +static int match_reg(const void *key, const void *elt) +{ + const unsigned long pval = (unsigned long)key; + const struct coproc_reg *r = elt; + + return pval - reg_to_match_value(r); +} + static const struct coproc_reg *find_reg(const struct coproc_params *params, const struct coproc_reg table[], unsigned int num) { - unsigned int i; - - for (i = 0; i < num; i++) { - const struct coproc_reg *r = &table[i]; - - if (params->is_64bit != r->is_64) - continue; - if (params->CRn != r->CRn) - continue; - if (params->CRm != r->CRm) - continue; - if (params->Op1 != r->Op1) - continue; - if (params->Op2 != r->Op2) - continue; + unsigned long pval = reg_to_match_value(params); - return r; - } - return NULL; + return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg); } static int emulate_cp15(struct kvm_vcpu *vcpu, @@ -645,6 +659,9 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR }, { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR }, + { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, + { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, + { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 }, { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 }, { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 }, @@ -660,9 +677,6 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 }, { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 }, { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 }, - - { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, - { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; /* @@ -901,7 +915,7 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) if (vfpid < num_fp_regs()) { if (KVM_REG_SIZE(id) != 8) return -ENOENT; - return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid], + return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid], id); } @@ -911,13 +925,13 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) switch (vfpid) { case KVM_REG_ARM_VFP_FPEXC: - return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id); + return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id); case KVM_REG_ARM_VFP_FPSCR: - return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id); + return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id); case KVM_REG_ARM_VFP_FPINST: - return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id); + return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id); case KVM_REG_ARM_VFP_FPINST2: - return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id); + return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id); case KVM_REG_ARM_VFP_MVFR0: val = fmrx(MVFR0); return reg_to_user(uaddr, &val, id); @@ -945,7 +959,7 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr) if (vfpid < num_fp_regs()) { if (KVM_REG_SIZE(id) != 8) return -ENOENT; - return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid], + return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid], uaddr, id); } @@ -955,13 +969,13 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr) switch (vfpid) { case KVM_REG_ARM_VFP_FPEXC: - return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id); + return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id); case KVM_REG_ARM_VFP_FPSCR: - return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id); + return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id); case KVM_REG_ARM_VFP_FPINST: - return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id); + return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id); case KVM_REG_ARM_VFP_FPINST2: - return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id); + return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id); /* These are invariant. */ case KVM_REG_ARM_VFP_MVFR0: if (reg_from_user(&val, uaddr, id)) @@ -1030,7 +1044,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) val = vcpu_cp15_reg64_get(vcpu, r); ret = reg_to_user(uaddr, &val, reg->id); } else if (KVM_REG_SIZE(reg->id) == 4) { - ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id); } return ret; @@ -1060,7 +1074,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!ret) vcpu_cp15_reg64_set(vcpu, r, val); } else if (KVM_REG_SIZE(reg->id) == 4) { - ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id); } return ret; @@ -1096,7 +1110,7 @@ static int write_demux_regids(u64 __user *uindices) static u64 cp15_to_index(const struct coproc_reg *reg) { u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT); - if (reg->is_64) { + if (reg->is_64bit) { val |= KVM_REG_SIZE_U64; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); /* @@ -1210,8 +1224,8 @@ void kvm_coproc_table_init(void) unsigned int i; /* Make sure tables are unique and in order. */ - for (i = 1; i < ARRAY_SIZE(cp15_regs); i++) - BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0); + BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15))); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) @@ -1248,7 +1262,7 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) const struct coproc_reg *table; /* Catch someone adding a register without putting in reset entry. */ - memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15)); + memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15)); /* Generic chip reset first (so target could override). */ reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); @@ -1257,6 +1271,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) reset_coproc_regs(vcpu, table, num); for (num = 1; num < NR_CP15_REGS; num++) - if (vcpu->arch.cp15[num] == 0x42424242) - panic("Didn't reset vcpu->arch.cp15[%zi]", num); + if (vcpu_cp15(vcpu, num) == 0x42424242) + panic("Didn't reset vcpu_cp15(vcpu, %zi)", num); } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 88d24a3..eef1759 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -37,7 +37,7 @@ struct coproc_reg { unsigned long Op1; unsigned long Op2; - bool is_64; + bool is_64bit; /* Trapped access from guest, if non-NULL. */ bool (*access)(struct kvm_vcpu *, @@ -47,7 +47,7 @@ struct coproc_reg { /* Initialization for vcpu. */ void (*reset)(struct kvm_vcpu *, const struct coproc_reg *); - /* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */ + /* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */ unsigned long reg; /* Value (usually reset value) */ @@ -104,25 +104,25 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { BUG_ON(!r->reg); - BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); - vcpu->arch.cp15[r->reg] = 0xdecafbad; + BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); + vcpu_cp15(vcpu, r->reg) = 0xdecafbad; } static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { BUG_ON(!r->reg); - BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); - vcpu->arch.cp15[r->reg] = r->val; + BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); + vcpu_cp15(vcpu, r->reg) = r->val; } static inline void reset_unknown64(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { BUG_ON(!r->reg); - BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15)); + BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); - vcpu->arch.cp15[r->reg] = 0xdecafbad; - vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee; + vcpu_cp15(vcpu, r->reg) = 0xdecafbad; + vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee; } static inline int cmp_reg(const struct coproc_reg *i1, @@ -141,7 +141,7 @@ static inline int cmp_reg(const struct coproc_reg *i1, return i1->Op1 - i2->Op1; if (i1->Op2 != i2->Op2) return i1->Op2 - i2->Op2; - return i2->is_64 - i1->is_64; + return i2->is_64bit - i1->is_64bit; } @@ -150,8 +150,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define CRm64(_x) .CRn = _x, .CRm = 0 #define Op1(_x) .Op1 = _x #define Op2(_x) .Op2 = _x -#define is64 .is_64 = true -#define is32 .is_64 = false +#define is64 .is_64bit = true +#define is32 .is_64bit = false bool access_vm_reg(struct kvm_vcpu *vcpu, const struct coproc_params *p, diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index dc99159..a494def 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -112,7 +112,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { */ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) { - unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs; unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { @@ -147,15 +147,15 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { case SVC_MODE: - return &vcpu->arch.regs.KVM_ARM_SVC_spsr; + return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr; case ABT_MODE: - return &vcpu->arch.regs.KVM_ARM_ABT_spsr; + return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr; case UND_MODE: - return &vcpu->arch.regs.KVM_ARM_UND_spsr; + return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr; case IRQ_MODE: - return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; + return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr; case FIQ_MODE: - return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; + return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr; default: BUG(); } @@ -266,8 +266,8 @@ void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) static u32 exc_vector_base(struct kvm_vcpu *vcpu) { - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; - u32 vbar = vcpu->arch.cp15[c12_VBAR]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + u32 vbar = vcpu_cp15(vcpu, c12_VBAR); if (sctlr & SCTLR_V) return 0xffff0000; @@ -282,7 +282,7 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) { unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; @@ -357,22 +357,22 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) if (is_pabt) { /* Set IFAR and IFSR */ - vcpu->arch.cp15[c6_IFAR] = addr; - is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); + vcpu_cp15(vcpu, c6_IFAR) = addr; + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); /* Always give debug fault for now - should give guest a clue */ if (is_lpae) - vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22; + vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22; else - vcpu->arch.cp15[c5_IFSR] = 2; + vcpu_cp15(vcpu, c5_IFSR) = 2; } else { /* !iabt */ /* Set DFAR and DFSR */ - vcpu->arch.cp15[c6_DFAR] = addr; - is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); + vcpu_cp15(vcpu, c6_DFAR) = addr; + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); /* Always give debug fault for now - should give guest a clue */ if (is_lpae) - vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22; + vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22; else - vcpu->arch.cp15[c5_DFSR] = 2; + vcpu_cp15(vcpu, c5_DFSR) = 2; } } diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 99361f1..9093ed0 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -25,7 +25,6 @@ #include <asm/cputype.h> #include <asm/uaccess.h> #include <asm/kvm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> @@ -55,7 +54,7 @@ static u64 core_reg_offset_from_id(u64 id) static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { u32 __user *uaddr = (u32 __user *)(long)reg->addr; - struct kvm_regs *regs = &vcpu->arch.regs; + struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs; u64 off; if (KVM_REG_SIZE(reg->id) != 4) @@ -72,7 +71,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { u32 __user *uaddr = (u32 __user *)(long)reg->addr; - struct kvm_regs *regs = &vcpu->arch.regs; + struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs; u64 off, val; if (KVM_REG_SIZE(reg->id) != 4) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3ede90d..3f1ef0d 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -147,13 +147,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, switch (exception_index) { case ARM_EXCEPTION_IRQ: return 1; - case ARM_EXCEPTION_UNDEFINED: - kvm_err("Undefined exception in Hyp mode at: %#08lx\n", - kvm_vcpu_get_hyp_pc(vcpu)); - BUG(); - panic("KVM: Hypervisor undefined exception!\n"); - case ARM_EXCEPTION_DATA_ABORT: - case ARM_EXCEPTION_PREF_ABORT: case ARM_EXCEPTION_HVC: /* * See ARM ARM B1.14.1: "Hyp traps on instructions diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile new file mode 100644 index 0000000..8dfa5f7 --- /dev/null +++ b/arch/arm/kvm/hyp/Makefile @@ -0,0 +1,17 @@ +# +# Makefile for Kernel-based Virtual Machine module, HYP part +# + +KVM=../../../../virt/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o + +obj-$(CONFIG_KVM_ARM_HOST) += tlb.o +obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vfp.o +obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += entry.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +obj-$(CONFIG_KVM_ARM_HOST) += switch.o +obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c new file mode 100644 index 0000000..111bda8 --- /dev/null +++ b/arch/arm/kvm/hyp/banked-sr.c @@ -0,0 +1,77 @@ +/* + * Original code: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/kvm_hyp.h> + +__asm__(".arch_extension virt"); + +void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr); + ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp); + ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR); + ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc); + ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc); + ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc); + ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt); + ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt); + ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt); + ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und); + ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und); + ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und); + ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq); + ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq); + ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq); + ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq); + ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq); +} + +void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt) +{ + write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr); + write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp); + write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf); + write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc); + write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc); + write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc); + write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt); + write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt); + write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt); + write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und); + write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und); + write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und); + write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq); + write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq); + write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq); + write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq); +} diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c new file mode 100644 index 0000000..c478281 --- /dev/null +++ b/arch/arm/kvm/hyp/cp15-sr.c @@ -0,0 +1,84 @@ +/* + * Original code: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/kvm_hyp.h> + +static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) +{ + return (u64 *)(ctxt->cp15 + idx); +} + +void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); + ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); + ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); + ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); + *cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0); + *cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1); + ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR); + ctxt->cp15[c3_DACR] = read_sysreg(DACR); + ctxt->cp15[c5_DFSR] = read_sysreg(DFSR); + ctxt->cp15[c5_IFSR] = read_sysreg(IFSR); + ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR); + ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR); + ctxt->cp15[c6_DFAR] = read_sysreg(DFAR); + ctxt->cp15[c6_IFAR] = read_sysreg(IFAR); + *cp15_64(ctxt, c7_PAR) = read_sysreg(PAR); + ctxt->cp15[c10_PRRR] = read_sysreg(PRRR); + ctxt->cp15[c10_NMRR] = read_sysreg(NMRR); + ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0); + ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1); + ctxt->cp15[c12_VBAR] = read_sysreg(VBAR); + ctxt->cp15[c13_CID] = read_sysreg(CID); + ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW); + ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO); + ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV); + ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL); +} + +void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR); + write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR); + write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR); + write_sysreg(ctxt->cp15[c1_CPACR], CPACR); + write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0); + write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1); + write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR); + write_sysreg(ctxt->cp15[c3_DACR], DACR); + write_sysreg(ctxt->cp15[c5_DFSR], DFSR); + write_sysreg(ctxt->cp15[c5_IFSR], IFSR); + write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR); + write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR); + write_sysreg(ctxt->cp15[c6_DFAR], DFAR); + write_sysreg(ctxt->cp15[c6_IFAR], IFAR); + write_sysreg(*cp15_64(ctxt, c7_PAR), PAR); + write_sysreg(ctxt->cp15[c10_PRRR], PRRR); + write_sysreg(ctxt->cp15[c10_NMRR], NMRR); + write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0); + write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1); + write_sysreg(ctxt->cp15[c12_VBAR], VBAR); + write_sysreg(ctxt->cp15[c13_CID], CID); + write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW); + write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO); + write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV); + write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL); +} diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S new file mode 100644 index 0000000..21c2388 --- /dev/null +++ b/arch/arm/kvm/hyp/entry.S @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/kvm_arm.h> + + .arch_extension virt + + .text + .pushsection .hyp.text, "ax" + +#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR) + +/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */ +ENTRY(__guest_enter) + @ Save host registers + add r1, r1, #(USR_REGS_OFFSET + S_R4) + stm r1!, {r4-r12} + str lr, [r1, #4] @ Skip SP_usr (already saved) + + @ Restore guest registers + add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) + ldr lr, [r0, #S_LR] + ldm r0, {r0-r12} + + clrex + eret +ENDPROC(__guest_enter) + +ENTRY(__guest_exit) + /* + * return convention: + * guest r0, r1, r2 saved on the stack + * r0: vcpu pointer + * r1: exception code + */ + + add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3) + stm r2!, {r3-r12} + str lr, [r2, #4] + add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) + pop {r3, r4, r5} @ r0, r1, r2 + stm r2, {r3-r5} + + ldr r0, [r0, #VCPU_HOST_CTXT] + add r0, r0, #(USR_REGS_OFFSET + S_R4) + ldm r0!, {r4-r12} + ldr lr, [r0, #4] + + mov r0, r1 + bx lr +ENDPROC(__guest_exit) + +/* + * If VFPv3 support is not available, then we will not switch the VFP + * registers; however cp10 and cp11 accesses will still trap and fallback + * to the regular coprocessor emulation code, which currently will + * inject an undefined exception to the guest. + */ +#ifdef CONFIG_VFPv3 +ENTRY(__vfp_guest_restore) + push {r3, r4, lr} + + @ NEON/VFP used. Turn on VFP access. + mrc p15, 4, r1, c1, c1, 2 @ HCPTR + bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + mcr p15, 4, r1, c1, c1, 2 @ HCPTR + isb + + @ Switch VFP/NEON hardware state to the guest's + mov r4, r0 + ldr r0, [r0, #VCPU_HOST_CTXT] + add r0, r0, #CPU_CTXT_VFP + bl __vfp_save_state + add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP) + bl __vfp_restore_state + + pop {r3, r4, lr} + pop {r0, r1, r2} + clrex + eret +ENDPROC(__vfp_guest_restore) +#endif + + .popsection + diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S new file mode 100644 index 0000000..7809138 --- /dev/null +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/linkage.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> + + .arch_extension virt + + .text + .pushsection .hyp.text, "ax" + +.macro load_vcpu reg + mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR +.endm + +/******************************************************************** + * Hypervisor exception vector and handlers + * + * + * The KVM/ARM Hypervisor ABI is defined as follows: + * + * Entry to Hyp mode from the host kernel will happen _only_ when an HVC + * instruction is issued since all traps are disabled when running the host + * kernel as per the Hyp-mode initialization at boot time. + * + * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc + * below) when the HVC instruction is called from SVC mode (i.e. a guest or the + * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC + * instructions are called from within Hyp-mode. + * + * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): + * Switching to Hyp mode is done through a simple HVC #0 instruction. The + * exception vector code will check that the HVC comes from VMID==0. + * - r0 contains a pointer to a HYP function + * - r1, r2, and r3 contain arguments to the above function. + * - The HYP function will be called with its arguments in r0, r1 and r2. + * On HYP function return, we return directly to SVC. + * + * Note that the above is used to execute code in Hyp-mode from a host-kernel + * point of view, and is a different concept from performing a world-switch and + * executing guest code SVC mode (with a VMID != 0). + */ + + .align 5 +__kvm_hyp_vector: + .global __kvm_hyp_vector + + @ Hyp-mode exception vector + W(b) hyp_reset + W(b) hyp_undef + W(b) hyp_svc + W(b) hyp_pabt + W(b) hyp_dabt + W(b) hyp_hvc + W(b) hyp_irq + W(b) hyp_fiq + +.macro invalid_vector label, cause + .align +\label: mov r0, #\cause + b __hyp_panic +.endm + + invalid_vector hyp_reset ARM_EXCEPTION_RESET + invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED + invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE + invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT + invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT + invalid_vector hyp_fiq ARM_EXCEPTION_FIQ + +ENTRY(__hyp_do_panic) + mrs lr, cpsr + bic lr, lr, #MODE_MASK + orr lr, lr, #SVC_MODE +THUMB( orr lr, lr, #PSR_T_BIT ) + msr spsr_cxsf, lr + ldr lr, =panic + msr ELR_hyp, lr + ldr lr, =kvm_call_hyp + clrex + eret +ENDPROC(__hyp_do_panic) + +hyp_hvc: + /* + * Getting here is either because of a trap from a guest, + * or from executing HVC from the host kernel, which means + * "do something in Hyp mode". + */ + push {r0, r1, r2} + + @ Check syndrome register + mrc p15, 4, r1, c5, c2, 0 @ HSR + lsr r0, r1, #HSR_EC_SHIFT + cmp r0, #HSR_EC_HVC + bne guest_trap @ Not HVC instr. + + /* + * Let's check if the HVC came from VMID 0 and allow simple + * switch to Hyp mode + */ + mrrc p15, 6, r0, r2, c2 + lsr r2, r2, #16 + and r2, r2, #0xff + cmp r2, #0 + bne guest_trap @ Guest called HVC + + /* + * Getting here means host called HVC, we shift parameters and branch + * to Hyp function. + */ + pop {r0, r1, r2} + + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + + push {lr} + + mov lr, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + +THUMB( orr lr, #1) + blx lr @ Call the HYP function + + pop {lr} +1: eret + +guest_trap: + load_vcpu r0 @ Load VCPU pointer to r0 + +#ifdef CONFIG_VFPv3 + @ Check for a VFP access + lsr r1, r1, #HSR_EC_SHIFT + cmp r1, #HSR_EC_CP_0_13 + beq __vfp_guest_restore +#endif + + mov r1, #ARM_EXCEPTION_HVC + b __guest_exit + +hyp_irq: + push {r0, r1, r2} + mov r1, #ARM_EXCEPTION_IRQ + load_vcpu r0 @ Load VCPU pointer to r0 + b __guest_exit + + .ltorg + + .popsection diff --git a/arch/arm/kvm/hyp/s2-setup.c b/arch/arm/kvm/hyp/s2-setup.c new file mode 100644 index 0000000..7be39af --- /dev/null +++ b/arch/arm/kvm/hyp/s2-setup.c @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/types.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> + +void __hyp_text __init_stage2_translation(void) +{ + u64 val; + + val = read_sysreg(VTCR) & ~VTCR_MASK; + + val |= read_sysreg(HTCR) & VTCR_HTCR_SH; + val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S; + + write_sysreg(val, VTCR); +} diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c new file mode 100644 index 0000000..b13caa9 --- /dev/null +++ b/arch/arm/kvm/hyp/switch.c @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> + +__asm__(".arch_extension virt"); + +/* + * Activate the traps, saving the host's fpexc register before + * overwriting it. We'll restore it on VM exit. + */ +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) +{ + u32 val; + + /* + * We are about to set HCPTR.TCP10/11 to trap all floating point + * register accesses to HYP, however, the ARM ARM clearly states that + * traps are only taken to HYP if the operation would not otherwise + * trap to SVC. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits. + */ + val = read_sysreg(VFP_FPEXC); + *fpexc_host = val; + if (!(val & FPEXC_EN)) { + write_sysreg(val | FPEXC_EN, VFP_FPEXC); + isb(); + } + + write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR); + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ + write_sysreg(HSTR_T(15), HSTR); + write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); + val = read_sysreg(HDCR); + write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR); +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u32 val; + + write_sysreg(0, HCR); + write_sysreg(0, HSTR); + val = read_sysreg(HDCR); + write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR); + write_sysreg(0, HCPTR); +} + +static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(vcpu->arch.midr, VPIDR); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, VTTBR); + write_sysreg(read_sysreg(MIDR), VPIDR); +} + +static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +{ + __vgic_v2_save_state(vcpu); +} + +static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +{ + __vgic_v2_restore_state(vcpu); +} + +static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u32 hsr = read_sysreg(HSR); + u8 ec = hsr >> HSR_EC_SHIFT; + u32 hpfar, far; + + vcpu->arch.fault.hsr = hsr; + + if (ec == HSR_EC_IABT) + far = read_sysreg(HIFAR); + else if (ec == HSR_EC_DABT) + far = read_sysreg(HDFAR); + else + return true; + + /* + * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode: + * + * Abort on the stage 2 translation for a memory access from a + * Non-secure PL1 or PL0 mode: + * + * For any Access flag fault or Translation fault, and also for any + * Permission fault on the stage 2 translation of a memory access + * made as part of a translation table walk for a stage 1 translation, + * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR + * is UNKNOWN. + */ + if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) { + u64 par, tmp; + + par = read_sysreg(PAR); + write_sysreg(far, ATS1CPR); + isb(); + + tmp = read_sysreg(PAR); + write_sysreg(par, PAR); + + if (unlikely(tmp & 1)) + return false; /* Translation failed, back to guest */ + + hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4; + } else { + hpfar = read_sysreg(HPFAR); + } + + vcpu->arch.fault.hxfar = far; + vcpu->arch.fault.hpfar = hpfar; + return true; +} + +static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + u32 fpexc; + + vcpu = kern_hyp_va(vcpu); + write_sysreg(vcpu, HTPIDR); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state(host_ctxt); + __banked_save_state(host_ctxt); + + __activate_traps(vcpu, &fpexc); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_restore_state(vcpu); + + __sysreg_restore_state(guest_ctxt); + __banked_restore_state(guest_ctxt); + + /* Jump in the fire! */ +again: + exit_code = __guest_enter(vcpu, host_ctxt); + /* And we're baaack! */ + + if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu)) + goto again; + + fp_enabled = __vfp_enabled(); + + __banked_save_state(guest_ctxt); + __sysreg_save_state(guest_ctxt); + __timer_save_state(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __banked_restore_state(host_ctxt); + __sysreg_restore_state(host_ctxt); + + if (fp_enabled) { + __vfp_save_state(&guest_ctxt->vfp); + __vfp_restore_state(&host_ctxt->vfp); + } + + write_sysreg(fpexc, VFP_FPEXC); + + return exit_code; +} + +__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +static const char * const __hyp_panic_string[] = { + [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x", + [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x", + [ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x", + [ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x", + [ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x", + [ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x", + [ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x", + [ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x", +}; + +void __hyp_text __noreturn __hyp_panic(int cause) +{ + u32 elr = read_special(ELR_hyp); + u32 val; + + if (cause == ARM_EXCEPTION_DATA_ABORT) + val = read_sysreg(HDFAR); + else + val = read_special(SPSR); + + if (read_sysreg(VTTBR)) { + struct kvm_vcpu *vcpu; + struct kvm_cpu_context *host_ctxt; + + vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state(host_ctxt); + } + + /* Call panic for real */ + __hyp_do_panic(__hyp_panic_string[cause], elr, val); + + unreachable(); +} diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c new file mode 100644 index 0000000..a263600 --- /dev/null +++ b/arch/arm/kvm/hyp/tlb.c @@ -0,0 +1,70 @@ +/* + * Original code: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/kvm_hyp.h> + +/** + * Flush per-VMID TLBs + * + * __kvm_tlb_flush_vmid(struct kvm *kvm); + * + * We rely on the hardware to broadcast the TLB invalidation to all CPUs + * inside the inner-shareable domain (which is the case for all v7 + * implementations). If we come across a non-IS SMP implementation, we'll + * have to use an IPI based mechanism. Until then, we stick to the simple + * hardware assisted version. + * + * As v7 does not support flushing per IPA, just nuke the whole TLB + * instead, ignoring the ipa value. + */ +static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, VTTBR); + isb(); + + write_sysreg(0, TLBIALLIS); + dsb(ish); + isb(); + + write_sysreg(0, VTTBR); +} + +__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); + +static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + __tlb_flush_vmid(kvm); +} + +__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, + phys_addr_t ipa); + +static void __hyp_text __tlb_flush_vm_context(void) +{ + write_sysreg(0, TLBIALLNSNHIS); + write_sysreg(0, ICIALLUIS); + dsb(ish); +} + +__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S new file mode 100644 index 0000000..7c297e8 --- /dev/null +++ b/arch/arm/kvm/hyp/vfp.S @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/vfpmacros.h> + + .text + .pushsection .hyp.text, "ax" + +/* void __vfp_save_state(struct vfp_hard_struct *vfp); */ +ENTRY(__vfp_save_state) + push {r4, r5} + VFPFMRX r1, FPEXC + + @ Make sure *really* VFP is enabled so we can touch the registers. + orr r5, r1, #FPEXC_EN + tst r5, #FPEXC_EX @ Check for VFP Subarchitecture + bic r5, r5, #FPEXC_EX @ FPEXC_EX disable + VFPFMXR FPEXC, r5 + isb + + VFPFMRX r2, FPSCR + beq 1f + + @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so + @ we only need to save them if FPEXC_EX is set. + VFPFMRX r3, FPINST + tst r5, #FPEXC_FP2V + VFPFMRX r4, FPINST2, ne @ vmrsne +1: + VFPFSTMIA r0, r5 @ Save VFP registers + stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2 + pop {r4, r5} + bx lr +ENDPROC(__vfp_save_state) + +/* void __vfp_restore_state(struct vfp_hard_struct *vfp); + * Assume FPEXC_EN is on and FPEXC_EX is off */ +ENTRY(__vfp_restore_state) + VFPFLDMIA r0, r1 @ Load VFP registers + ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2 + + VFPFMXR FPSCR, r1 + tst r0, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + VFPFMXR FPINST, r2 + tst r0, #FPEXC_FP2V + VFPFMXR FPINST2, r3, ne +1: + VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN) + bx lr +ENDPROC(__vfp_restore_state) + + .popsection diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 3988e72..1f9ae17 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -84,14 +84,6 @@ __do_hyp_init: orr r0, r0, r1 mcr p15, 4, r0, c2, c0, 2 @ HTCR - mrc p15, 4, r1, c2, c1, 2 @ VTCR - ldr r2, =VTCR_MASK - bic r1, r1, r2 - bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits - orr r1, r0, r1 - orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) - mcr p15, 4, r1, c2, c1, 2 @ VTCR - @ Use the same memory attributes for hyp. accesses as the kernel @ (copy MAIRx ro HMAIRx). mrc p15, 0, r0, c10, c2, 0 diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 900ef6d..b1bd316 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -17,211 +17,14 @@ */ #include <linux/linkage.h> -#include <linux/const.h> -#include <asm/unified.h> -#include <asm/page.h> -#include <asm/ptrace.h> -#include <asm/asm-offsets.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> -#include <asm/vfpmacros.h> -#include "interrupts_head.S" .text -__kvm_hyp_code_start: - .globl __kvm_hyp_code_start - -/******************************************************************** - * Flush per-VMID TLBs - * - * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); - * - * We rely on the hardware to broadcast the TLB invalidation to all CPUs - * inside the inner-shareable domain (which is the case for all v7 - * implementations). If we come across a non-IS SMP implementation, we'll - * have to use an IPI based mechanism. Until then, we stick to the simple - * hardware assisted version. - * - * As v7 does not support flushing per IPA, just nuke the whole TLB - * instead, ignoring the ipa value. - */ -ENTRY(__kvm_tlb_flush_vmid_ipa) - push {r2, r3} - - dsb ishst - add r0, r0, #KVM_VTTBR - ldrd r2, r3, [r0] - mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR - isb - mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) - dsb ish - isb - mov r2, #0 - mov r3, #0 - mcrr p15, 6, r2, r3, c2 @ Back to VMID #0 - isb @ Not necessary if followed by eret - - pop {r2, r3} - bx lr -ENDPROC(__kvm_tlb_flush_vmid_ipa) - -/** - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs - * - * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address - * parameter - */ - -ENTRY(__kvm_tlb_flush_vmid) - b __kvm_tlb_flush_vmid_ipa -ENDPROC(__kvm_tlb_flush_vmid) - -/******************************************************************** - * Flush TLBs and instruction caches of all CPUs inside the inner-shareable - * domain, for all VMIDs - * - * void __kvm_flush_vm_context(void); - */ -ENTRY(__kvm_flush_vm_context) - mov r0, #0 @ rn parameter for c15 flushes is SBZ - - /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */ - mcr p15, 4, r0, c8, c3, 4 - /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ - mcr p15, 0, r0, c7, c1, 0 - dsb ish - isb @ Not necessary if followed by eret - - bx lr -ENDPROC(__kvm_flush_vm_context) - - -/******************************************************************** - * Hypervisor world-switch code - * - * - * int __kvm_vcpu_run(struct kvm_vcpu *vcpu) - */ -ENTRY(__kvm_vcpu_run) - @ Save the vcpu pointer - mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR - - save_host_regs - - restore_vgic_state - restore_timer_state - - @ Store hardware CP15 state and load guest state - read_cp15_state store_to_vcpu = 0 - write_cp15_state read_from_vcpu = 1 - - @ If the host kernel has not been configured with VFPv3 support, - @ then it is safer if we deny guests from using it as well. -#ifdef CONFIG_VFPv3 - @ Set FPEXC_EN so the guest doesn't trap floating point instructions - VFPFMRX r2, FPEXC @ VMRS - push {r2} - orr r2, r2, #FPEXC_EN - VFPFMXR FPEXC, r2 @ VMSR -#endif - - @ Configure Hyp-role - configure_hyp_role vmentry - - @ Trap coprocessor CRx accesses - set_hstr vmentry - set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) - set_hdcr vmentry - - @ Write configured ID register into MIDR alias - ldr r1, [vcpu, #VCPU_MIDR] - mcr p15, 4, r1, c0, c0, 0 - - @ Write guest view of MPIDR into VMPIDR - ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)] - mcr p15, 4, r1, c0, c0, 5 - - @ Set up guest memory translation - ldr r1, [vcpu, #VCPU_KVM] - add r1, r1, #KVM_VTTBR - ldrd r2, r3, [r1] - mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR - - @ We're all done, just restore the GPRs and go to the guest - restore_guest_regs - clrex @ Clear exclusive monitor - eret - -__kvm_vcpu_return: - /* - * return convention: - * guest r0, r1, r2 saved on the stack - * r0: vcpu pointer - * r1: exception code - */ - save_guest_regs - - @ Set VMID == 0 - mov r2, #0 - mov r3, #0 - mcrr p15, 6, r2, r3, c2 @ Write VTTBR - - @ Don't trap coprocessor accesses for host kernel - set_hstr vmexit - set_hdcr vmexit - set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore - -#ifdef CONFIG_VFPv3 - @ Switch VFP/NEON hardware state to the host's - add r7, vcpu, #VCPU_VFP_GUEST - store_vfp_state r7 - add r7, vcpu, #VCPU_VFP_HOST - ldr r7, [r7] - restore_vfp_state r7 - -after_vfp_restore: - @ Restore FPEXC_EN which we clobbered on entry - pop {r2} - VFPFMXR FPEXC, r2 -#else -after_vfp_restore: -#endif - - @ Reset Hyp-role - configure_hyp_role vmexit - - @ Let host read hardware MIDR - mrc p15, 0, r2, c0, c0, 0 - mcr p15, 4, r2, c0, c0, 0 - - @ Back to hardware MPIDR - mrc p15, 0, r2, c0, c0, 5 - mcr p15, 4, r2, c0, c0, 5 - - @ Store guest CP15 state and restore host state - read_cp15_state store_to_vcpu = 1 - write_cp15_state read_from_vcpu = 0 - - save_timer_state - save_vgic_state - - restore_host_regs - clrex @ Clear exclusive monitor -#ifndef CONFIG_CPU_ENDIAN_BE8 - mov r0, r1 @ Return the return code - mov r1, #0 @ Clear upper bits in return value -#else - @ r1 already has return code - mov r0, #0 @ Clear upper bits in return value -#endif /* CONFIG_CPU_ENDIAN_BE8 */ - bx lr @ return to IOCTL - /******************************************************************** * Call function in Hyp mode * * - * u64 kvm_call_hyp(void *hypfn, ...); + * unsigned long kvm_call_hyp(void *hypfn, ...); * * This is not really a variadic function in the classic C-way and care must * be taken when calling this to ensure parameters are passed in registers @@ -232,7 +35,7 @@ after_vfp_restore: * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the * function pointer can be passed). The function being called must be mapped * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 and r1. + * passed in r0 (strictly 32bit). * * A function pointer with a value of 0xffffffff has a special meaning, * and is used to implement __hyp_get_vectors in the same way as in @@ -246,281 +49,4 @@ after_vfp_restore: ENTRY(kvm_call_hyp) hvc #0 bx lr - -/******************************************************************** - * Hypervisor exception vector and handlers - * - * - * The KVM/ARM Hypervisor ABI is defined as follows: - * - * Entry to Hyp mode from the host kernel will happen _only_ when an HVC - * instruction is issued since all traps are disabled when running the host - * kernel as per the Hyp-mode initialization at boot time. - * - * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc - * below) when the HVC instruction is called from SVC mode (i.e. a guest or the - * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC - * instructions are called from within Hyp-mode. - * - * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): - * Switching to Hyp mode is done through a simple HVC #0 instruction. The - * exception vector code will check that the HVC comes from VMID==0 and if - * so will push the necessary state (SPSR, lr_usr) on the Hyp stack. - * - r0 contains a pointer to a HYP function - * - r1, r2, and r3 contain arguments to the above function. - * - The HYP function will be called with its arguments in r0, r1 and r2. - * On HYP function return, we return directly to SVC. - * - * Note that the above is used to execute code in Hyp-mode from a host-kernel - * point of view, and is a different concept from performing a world-switch and - * executing guest code SVC mode (with a VMID != 0). - */ - -/* Handle undef, svc, pabt, or dabt by crashing with a user notice */ -.macro bad_exception exception_code, panic_str - push {r0-r2} - mrrc p15, 6, r0, r1, c2 @ Read VTTBR - lsr r1, r1, #16 - ands r1, r1, #0xff - beq 99f - - load_vcpu @ Load VCPU pointer - .if \exception_code == ARM_EXCEPTION_DATA_ABORT - mrc p15, 4, r2, c5, c2, 0 @ HSR - mrc p15, 4, r1, c6, c0, 0 @ HDFAR - str r2, [vcpu, #VCPU_HSR] - str r1, [vcpu, #VCPU_HxFAR] - .endif - .if \exception_code == ARM_EXCEPTION_PREF_ABORT - mrc p15, 4, r2, c5, c2, 0 @ HSR - mrc p15, 4, r1, c6, c0, 2 @ HIFAR - str r2, [vcpu, #VCPU_HSR] - str r1, [vcpu, #VCPU_HxFAR] - .endif - mov r1, #\exception_code - b __kvm_vcpu_return - - @ We were in the host already. Let's craft a panic-ing return to SVC. -99: mrs r2, cpsr - bic r2, r2, #MODE_MASK - orr r2, r2, #SVC_MODE -THUMB( orr r2, r2, #PSR_T_BIT ) - msr spsr_cxsf, r2 - mrs r1, ELR_hyp - ldr r2, =panic - msr ELR_hyp, r2 - ldr r0, =\panic_str - clrex @ Clear exclusive monitor - eret -.endm - - .text - - .align 5 -__kvm_hyp_vector: - .globl __kvm_hyp_vector - - @ Hyp-mode exception vector - W(b) hyp_reset - W(b) hyp_undef - W(b) hyp_svc - W(b) hyp_pabt - W(b) hyp_dabt - W(b) hyp_hvc - W(b) hyp_irq - W(b) hyp_fiq - - .align -hyp_reset: - b hyp_reset - - .align -hyp_undef: - bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str - - .align -hyp_svc: - bad_exception ARM_EXCEPTION_HVC, svc_die_str - - .align -hyp_pabt: - bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str - - .align -hyp_dabt: - bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str - - .align -hyp_hvc: - /* - * Getting here is either becuase of a trap from a guest or from calling - * HVC from the host kernel, which means "switch to Hyp mode". - */ - push {r0, r1, r2} - - @ Check syndrome register - mrc p15, 4, r1, c5, c2, 0 @ HSR - lsr r0, r1, #HSR_EC_SHIFT - cmp r0, #HSR_EC_HVC - bne guest_trap @ Not HVC instr. - - /* - * Let's check if the HVC came from VMID 0 and allow simple - * switch to Hyp mode - */ - mrrc p15, 6, r0, r2, c2 - lsr r2, r2, #16 - and r2, r2, #0xff - cmp r2, #0 - bne guest_trap @ Guest called HVC - - /* - * Getting here means host called HVC, we shift parameters and branch - * to Hyp function. - */ - pop {r0, r1, r2} - - /* Check for __hyp_get_vectors */ - cmp r0, #-1 - mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR - beq 1f - - push {lr} - mrs lr, SPSR - push {lr} - - mov lr, r0 - mov r0, r1 - mov r1, r2 - mov r2, r3 - -THUMB( orr lr, #1) - blx lr @ Call the HYP function - - pop {lr} - msr SPSR_csxf, lr - pop {lr} -1: eret - -guest_trap: - load_vcpu @ Load VCPU pointer to r0 - str r1, [vcpu, #VCPU_HSR] - - @ Check if we need the fault information - lsr r1, r1, #HSR_EC_SHIFT -#ifdef CONFIG_VFPv3 - cmp r1, #HSR_EC_CP_0_13 - beq switch_to_guest_vfp -#endif - cmp r1, #HSR_EC_IABT - mrceq p15, 4, r2, c6, c0, 2 @ HIFAR - beq 2f - cmp r1, #HSR_EC_DABT - bne 1f - mrc p15, 4, r2, c6, c0, 0 @ HDFAR - -2: str r2, [vcpu, #VCPU_HxFAR] - - /* - * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode: - * - * Abort on the stage 2 translation for a memory access from a - * Non-secure PL1 or PL0 mode: - * - * For any Access flag fault or Translation fault, and also for any - * Permission fault on the stage 2 translation of a memory access - * made as part of a translation table walk for a stage 1 translation, - * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR - * is UNKNOWN. - */ - - /* Check for permission fault, and S1PTW */ - mrc p15, 4, r1, c5, c2, 0 @ HSR - and r0, r1, #HSR_FSC_TYPE - cmp r0, #FSC_PERM - tsteq r1, #(1 << 7) @ S1PTW - mrcne p15, 4, r2, c6, c0, 4 @ HPFAR - bne 3f - - /* Preserve PAR */ - mrrc p15, 0, r0, r1, c7 @ PAR - push {r0, r1} - - /* Resolve IPA using the xFAR */ - mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR - isb - mrrc p15, 0, r0, r1, c7 @ PAR - tst r0, #1 - bne 4f @ Failed translation - ubfx r2, r0, #12, #20 - lsl r2, r2, #4 - orr r2, r2, r1, lsl #24 - - /* Restore PAR */ - pop {r0, r1} - mcrr p15, 0, r0, r1, c7 @ PAR - -3: load_vcpu @ Load VCPU pointer to r0 - str r2, [r0, #VCPU_HPFAR] - -1: mov r1, #ARM_EXCEPTION_HVC - b __kvm_vcpu_return - -4: pop {r0, r1} @ Failed translation, return to guest - mcrr p15, 0, r0, r1, c7 @ PAR - clrex - pop {r0, r1, r2} - eret - -/* - * If VFPv3 support is not available, then we will not switch the VFP - * registers; however cp10 and cp11 accesses will still trap and fallback - * to the regular coprocessor emulation code, which currently will - * inject an undefined exception to the guest. - */ -#ifdef CONFIG_VFPv3 -switch_to_guest_vfp: - push {r3-r7} - - @ NEON/VFP used. Turn on VFP access. - set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) - - @ Switch VFP/NEON hardware state to the guest's - add r7, r0, #VCPU_VFP_HOST - ldr r7, [r7] - store_vfp_state r7 - add r7, r0, #VCPU_VFP_GUEST - restore_vfp_state r7 - - pop {r3-r7} - pop {r0-r2} - clrex - eret -#endif - - .align -hyp_irq: - push {r0, r1, r2} - mov r1, #ARM_EXCEPTION_IRQ - load_vcpu @ Load VCPU pointer to r0 - b __kvm_vcpu_return - - .align -hyp_fiq: - b hyp_fiq - - .ltorg - -__kvm_hyp_code_end: - .globl __kvm_hyp_code_end - - .section ".rodata" - -und_die_str: - .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" -pabt_die_str: - .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" -dabt_die_str: - .ascii "unexpected data abort in Hyp mode at: %#08x\n" -svc_die_str: - .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" +ENDPROC(kvm_call_hyp) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S deleted file mode 100644 index 51a5950..0000000 --- a/arch/arm/kvm/interrupts_head.S +++ /dev/null @@ -1,648 +0,0 @@ -#include <linux/irqchip/arm-gic.h> -#include <asm/assembler.h> - -#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) -#define VCPU_USR_SP (VCPU_USR_REG(13)) -#define VCPU_USR_LR (VCPU_USR_REG(14)) -#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4)) - -/* - * Many of these macros need to access the VCPU structure, which is always - * held in r0. These macros should never clobber r1, as it is used to hold the - * exception code on the return path (except of course the macro that switches - * all the registers before the final jump to the VM). - */ -vcpu .req r0 @ vcpu pointer always in r0 - -/* Clobbers {r2-r6} */ -.macro store_vfp_state vfp_base - @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions - VFPFMRX r2, FPEXC - @ Make sure VFP is enabled so we can touch the registers. - orr r6, r2, #FPEXC_EN - VFPFMXR FPEXC, r6 - - VFPFMRX r3, FPSCR - tst r2, #FPEXC_EX @ Check for VFP Subarchitecture - beq 1f - @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so - @ we only need to save them if FPEXC_EX is set. - VFPFMRX r4, FPINST - tst r2, #FPEXC_FP2V - VFPFMRX r5, FPINST2, ne @ vmrsne - bic r6, r2, #FPEXC_EX @ FPEXC_EX disable - VFPFMXR FPEXC, r6 -1: - VFPFSTMIA \vfp_base, r6 @ Save VFP registers - stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 -.endm - -/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */ -.macro restore_vfp_state vfp_base - VFPFLDMIA \vfp_base, r6 @ Load VFP registers - ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 - - VFPFMXR FPSCR, r3 - tst r2, #FPEXC_EX @ Check for VFP Subarchitecture - beq 1f - VFPFMXR FPINST, r4 - tst r2, #FPEXC_FP2V - VFPFMXR FPINST2, r5, ne -1: - VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN) -.endm - -/* These are simply for the macros to work - value don't have meaning */ -.equ usr, 0 -.equ svc, 1 -.equ abt, 2 -.equ und, 3 -.equ irq, 4 -.equ fiq, 5 - -.macro push_host_regs_mode mode - mrs r2, SP_\mode - mrs r3, LR_\mode - mrs r4, SPSR_\mode - push {r2, r3, r4} -.endm - -/* - * Store all host persistent registers on the stack. - * Clobbers all registers, in all modes, except r0 and r1. - */ -.macro save_host_regs - /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */ - mrs r2, ELR_hyp - push {r2} - - /* usr regs */ - push {r4-r12} @ r0-r3 are always clobbered - mrs r2, SP_usr - mov r3, lr - push {r2, r3} - - push_host_regs_mode svc - push_host_regs_mode abt - push_host_regs_mode und - push_host_regs_mode irq - - /* fiq regs */ - mrs r2, r8_fiq - mrs r3, r9_fiq - mrs r4, r10_fiq - mrs r5, r11_fiq - mrs r6, r12_fiq - mrs r7, SP_fiq - mrs r8, LR_fiq - mrs r9, SPSR_fiq - push {r2-r9} -.endm - -.macro pop_host_regs_mode mode - pop {r2, r3, r4} - msr SP_\mode, r2 - msr LR_\mode, r3 - msr SPSR_\mode, r4 -.endm - -/* - * Restore all host registers from the stack. - * Clobbers all registers, in all modes, except r0 and r1. - */ -.macro restore_host_regs - pop {r2-r9} - msr r8_fiq, r2 - msr r9_fiq, r3 - msr r10_fiq, r4 - msr r11_fiq, r5 - msr r12_fiq, r6 - msr SP_fiq, r7 - msr LR_fiq, r8 - msr SPSR_fiq, r9 - - pop_host_regs_mode irq - pop_host_regs_mode und - pop_host_regs_mode abt - pop_host_regs_mode svc - - pop {r2, r3} - msr SP_usr, r2 - mov lr, r3 - pop {r4-r12} - - pop {r2} - msr ELR_hyp, r2 -.endm - -/* - * Restore SP, LR and SPSR for a given mode. offset is the offset of - * this mode's registers from the VCPU base. - * - * Assumes vcpu pointer in vcpu reg - * - * Clobbers r1, r2, r3, r4. - */ -.macro restore_guest_regs_mode mode, offset - add r1, vcpu, \offset - ldm r1, {r2, r3, r4} - msr SP_\mode, r2 - msr LR_\mode, r3 - msr SPSR_\mode, r4 -.endm - -/* - * Restore all guest registers from the vcpu struct. - * - * Assumes vcpu pointer in vcpu reg - * - * Clobbers *all* registers. - */ -.macro restore_guest_regs - restore_guest_regs_mode svc, #VCPU_SVC_REGS - restore_guest_regs_mode abt, #VCPU_ABT_REGS - restore_guest_regs_mode und, #VCPU_UND_REGS - restore_guest_regs_mode irq, #VCPU_IRQ_REGS - - add r1, vcpu, #VCPU_FIQ_REGS - ldm r1, {r2-r9} - msr r8_fiq, r2 - msr r9_fiq, r3 - msr r10_fiq, r4 - msr r11_fiq, r5 - msr r12_fiq, r6 - msr SP_fiq, r7 - msr LR_fiq, r8 - msr SPSR_fiq, r9 - - @ Load return state - ldr r2, [vcpu, #VCPU_PC] - ldr r3, [vcpu, #VCPU_CPSR] - msr ELR_hyp, r2 - msr SPSR_cxsf, r3 - - @ Load user registers - ldr r2, [vcpu, #VCPU_USR_SP] - ldr r3, [vcpu, #VCPU_USR_LR] - msr SP_usr, r2 - mov lr, r3 - add vcpu, vcpu, #(VCPU_USR_REGS) - ldm vcpu, {r0-r12} -.endm - -/* - * Save SP, LR and SPSR for a given mode. offset is the offset of - * this mode's registers from the VCPU base. - * - * Assumes vcpu pointer in vcpu reg - * - * Clobbers r2, r3, r4, r5. - */ -.macro save_guest_regs_mode mode, offset - add r2, vcpu, \offset - mrs r3, SP_\mode - mrs r4, LR_\mode - mrs r5, SPSR_\mode - stm r2, {r3, r4, r5} -.endm - -/* - * Save all guest registers to the vcpu struct - * Expects guest's r0, r1, r2 on the stack. - * - * Assumes vcpu pointer in vcpu reg - * - * Clobbers r2, r3, r4, r5. - */ -.macro save_guest_regs - @ Store usr registers - add r2, vcpu, #VCPU_USR_REG(3) - stm r2, {r3-r12} - add r2, vcpu, #VCPU_USR_REG(0) - pop {r3, r4, r5} @ r0, r1, r2 - stm r2, {r3, r4, r5} - mrs r2, SP_usr - mov r3, lr - str r2, [vcpu, #VCPU_USR_SP] - str r3, [vcpu, #VCPU_USR_LR] - - @ Store return state - mrs r2, ELR_hyp - mrs r3, spsr - str r2, [vcpu, #VCPU_PC] - str r3, [vcpu, #VCPU_CPSR] - - @ Store other guest registers - save_guest_regs_mode svc, #VCPU_SVC_REGS - save_guest_regs_mode abt, #VCPU_ABT_REGS - save_guest_regs_mode und, #VCPU_UND_REGS - save_guest_regs_mode irq, #VCPU_IRQ_REGS -.endm - -/* Reads cp15 registers from hardware and stores them in memory - * @store_to_vcpu: If 0, registers are written in-order to the stack, - * otherwise to the VCPU struct pointed to by vcpup - * - * Assumes vcpu pointer in vcpu reg - * - * Clobbers r2 - r12 - */ -.macro read_cp15_state store_to_vcpu - mrc p15, 0, r2, c1, c0, 0 @ SCTLR - mrc p15, 0, r3, c1, c0, 2 @ CPACR - mrc p15, 0, r4, c2, c0, 2 @ TTBCR - mrc p15, 0, r5, c3, c0, 0 @ DACR - mrrc p15, 0, r6, r7, c2 @ TTBR 0 - mrrc p15, 1, r8, r9, c2 @ TTBR 1 - mrc p15, 0, r10, c10, c2, 0 @ PRRR - mrc p15, 0, r11, c10, c2, 1 @ NMRR - mrc p15, 2, r12, c0, c0, 0 @ CSSELR - - .if \store_to_vcpu == 0 - push {r2-r12} @ Push CP15 registers - .else - str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] - str r3, [vcpu, #CP15_OFFSET(c1_CPACR)] - str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] - str r5, [vcpu, #CP15_OFFSET(c3_DACR)] - add r2, vcpu, #CP15_OFFSET(c2_TTBR0) - strd r6, r7, [r2] - add r2, vcpu, #CP15_OFFSET(c2_TTBR1) - strd r8, r9, [r2] - str r10, [vcpu, #CP15_OFFSET(c10_PRRR)] - str r11, [vcpu, #CP15_OFFSET(c10_NMRR)] - str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] - .endif - - mrc p15, 0, r2, c13, c0, 1 @ CID - mrc p15, 0, r3, c13, c0, 2 @ TID_URW - mrc p15, 0, r4, c13, c0, 3 @ TID_URO - mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV - mrc p15, 0, r6, c5, c0, 0 @ DFSR - mrc p15, 0, r7, c5, c0, 1 @ IFSR - mrc p15, 0, r8, c5, c1, 0 @ ADFSR - mrc p15, 0, r9, c5, c1, 1 @ AIFSR - mrc p15, 0, r10, c6, c0, 0 @ DFAR - mrc p15, 0, r11, c6, c0, 2 @ IFAR - mrc p15, 0, r12, c12, c0, 0 @ VBAR - - .if \store_to_vcpu == 0 - push {r2-r12} @ Push CP15 registers - .else - str r2, [vcpu, #CP15_OFFSET(c13_CID)] - str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] - str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] - str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] - str r6, [vcpu, #CP15_OFFSET(c5_DFSR)] - str r7, [vcpu, #CP15_OFFSET(c5_IFSR)] - str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] - str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] - str r10, [vcpu, #CP15_OFFSET(c6_DFAR)] - str r11, [vcpu, #CP15_OFFSET(c6_IFAR)] - str r12, [vcpu, #CP15_OFFSET(c12_VBAR)] - .endif - - mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL - mrrc p15, 0, r4, r5, c7 @ PAR - mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 - mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 - - .if \store_to_vcpu == 0 - push {r2,r4-r7} - .else - str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] - add r12, vcpu, #CP15_OFFSET(c7_PAR) - strd r4, r5, [r12] - str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] - str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] - .endif -.endm - -/* - * Reads cp15 registers from memory and writes them to hardware - * @read_from_vcpu: If 0, registers are read in-order from the stack, - * otherwise from the VCPU struct pointed to by vcpup - * - * Assumes vcpu pointer in vcpu reg - */ -.macro write_cp15_state read_from_vcpu - .if \read_from_vcpu == 0 - pop {r2,r4-r7} - .else - ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] - add r12, vcpu, #CP15_OFFSET(c7_PAR) - ldrd r4, r5, [r12] - ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] - ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] - .endif - - mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL - mcrr p15, 0, r4, r5, c7 @ PAR - mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 - mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 - - .if \read_from_vcpu == 0 - pop {r2-r12} - .else - ldr r2, [vcpu, #CP15_OFFSET(c13_CID)] - ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] - ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] - ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] - ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)] - ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)] - ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] - ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] - ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)] - ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)] - ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)] - .endif - - mcr p15, 0, r2, c13, c0, 1 @ CID - mcr p15, 0, r3, c13, c0, 2 @ TID_URW - mcr p15, 0, r4, c13, c0, 3 @ TID_URO - mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV - mcr p15, 0, r6, c5, c0, 0 @ DFSR - mcr p15, 0, r7, c5, c0, 1 @ IFSR - mcr p15, 0, r8, c5, c1, 0 @ ADFSR - mcr p15, 0, r9, c5, c1, 1 @ AIFSR - mcr p15, 0, r10, c6, c0, 0 @ DFAR - mcr p15, 0, r11, c6, c0, 2 @ IFAR - mcr p15, 0, r12, c12, c0, 0 @ VBAR - - .if \read_from_vcpu == 0 - pop {r2-r12} - .else - ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] - ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)] - ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] - ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)] - add r12, vcpu, #CP15_OFFSET(c2_TTBR0) - ldrd r6, r7, [r12] - add r12, vcpu, #CP15_OFFSET(c2_TTBR1) - ldrd r8, r9, [r12] - ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)] - ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)] - ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] - .endif - - mcr p15, 0, r2, c1, c0, 0 @ SCTLR - mcr p15, 0, r3, c1, c0, 2 @ CPACR - mcr p15, 0, r4, c2, c0, 2 @ TTBCR - mcr p15, 0, r5, c3, c0, 0 @ DACR - mcrr p15, 0, r6, r7, c2 @ TTBR 0 - mcrr p15, 1, r8, r9, c2 @ TTBR 1 - mcr p15, 0, r10, c10, c2, 0 @ PRRR - mcr p15, 0, r11, c10, c2, 1 @ NMRR - mcr p15, 2, r12, c0, c0, 0 @ CSSELR -.endm - -/* - * Save the VGIC CPU state into memory - * - * Assumes vcpu pointer in vcpu reg - */ -.macro save_vgic_state - /* Get VGIC VCTRL base into r2 */ - ldr r2, [vcpu, #VCPU_KVM] - ldr r2, [r2, #KVM_VGIC_VCTRL] - cmp r2, #0 - beq 2f - - /* Compute the address of struct vgic_cpu */ - add r11, vcpu, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr r4, [r2, #GICH_VMCR] - ldr r5, [r2, #GICH_MISR] - ldr r6, [r2, #GICH_EISR0] - ldr r7, [r2, #GICH_EISR1] - ldr r8, [r2, #GICH_ELRSR0] - ldr r9, [r2, #GICH_ELRSR1] - ldr r10, [r2, #GICH_APR] -ARM_BE8(rev r4, r4 ) -ARM_BE8(rev r5, r5 ) -ARM_BE8(rev r6, r6 ) -ARM_BE8(rev r7, r7 ) -ARM_BE8(rev r8, r8 ) -ARM_BE8(rev r9, r9 ) -ARM_BE8(rev r10, r10 ) - - str r4, [r11, #VGIC_V2_CPU_VMCR] - str r5, [r11, #VGIC_V2_CPU_MISR] -#ifdef CONFIG_CPU_ENDIAN_BE8 - str r6, [r11, #(VGIC_V2_CPU_EISR + 4)] - str r7, [r11, #VGIC_V2_CPU_EISR] - str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)] - str r9, [r11, #VGIC_V2_CPU_ELRSR] -#else - str r6, [r11, #VGIC_V2_CPU_EISR] - str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] - str r8, [r11, #VGIC_V2_CPU_ELRSR] - str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] -#endif - str r10, [r11, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - mov r5, #0 - str r5, [r2, #GICH_HCR] - - /* Save list registers */ - add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_V2_CPU_LR - ldr r4, [r11, #VGIC_CPU_NR_LR] -1: ldr r6, [r2], #4 -ARM_BE8(rev r6, r6 ) - str r6, [r3], #4 - subs r4, r4, #1 - bne 1b -2: -.endm - -/* - * Restore the VGIC CPU state from memory - * - * Assumes vcpu pointer in vcpu reg - */ -.macro restore_vgic_state - /* Get VGIC VCTRL base into r2 */ - ldr r2, [vcpu, #VCPU_KVM] - ldr r2, [r2, #KVM_VGIC_VCTRL] - cmp r2, #0 - beq 2f - - /* Compute the address of struct vgic_cpu */ - add r11, vcpu, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr r3, [r11, #VGIC_V2_CPU_HCR] - ldr r4, [r11, #VGIC_V2_CPU_VMCR] - ldr r8, [r11, #VGIC_V2_CPU_APR] -ARM_BE8(rev r3, r3 ) -ARM_BE8(rev r4, r4 ) -ARM_BE8(rev r8, r8 ) - - str r3, [r2, #GICH_HCR] - str r4, [r2, #GICH_VMCR] - str r8, [r2, #GICH_APR] - - /* Restore list registers */ - add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_V2_CPU_LR - ldr r4, [r11, #VGIC_CPU_NR_LR] -1: ldr r6, [r3], #4 -ARM_BE8(rev r6, r6 ) - str r6, [r2], #4 - subs r4, r4, #1 - bne 1b -2: -.endm - -#define CNTHCTL_PL1PCTEN (1 << 0) -#define CNTHCTL_PL1PCEN (1 << 1) - -/* - * Save the timer state onto the VCPU and allow physical timer/counter access - * for the host. - * - * Assumes vcpu pointer in vcpu reg - * Clobbers r2-r5 - */ -.macro save_timer_state - ldr r4, [vcpu, #VCPU_KVM] - ldr r2, [r4, #KVM_TIMER_ENABLED] - cmp r2, #0 - beq 1f - - mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL - str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - - isb - - mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL - ldr r4, =VCPU_TIMER_CNTV_CVAL - add r5, vcpu, r4 - strd r2, r3, [r5] - - @ Ensure host CNTVCT == CNTPCT - mov r2, #0 - mcrr p15, 4, r2, r2, c14 @ CNTVOFF - -1: - mov r2, #0 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL - - @ Allow physical timer/counter access for the host - mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL - orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) - mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL -.endm - -/* - * Load the timer state from the VCPU and deny physical timer/counter access - * for the host. - * - * Assumes vcpu pointer in vcpu reg - * Clobbers r2-r5 - */ -.macro restore_timer_state - @ Disallow physical timer access for the guest - @ Physical counter access is allowed - mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL - orr r2, r2, #CNTHCTL_PL1PCTEN - bic r2, r2, #CNTHCTL_PL1PCEN - mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL - - ldr r4, [vcpu, #VCPU_KVM] - ldr r2, [r4, #KVM_TIMER_ENABLED] - cmp r2, #0 - beq 1f - - ldr r2, [r4, #KVM_TIMER_CNTVOFF] - ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] - mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF - - ldr r4, =VCPU_TIMER_CNTV_CVAL - add r5, vcpu, r4 - ldrd r2, r3, [r5] - mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL - isb - - ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - and r2, r2, #3 - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL -1: -.endm - -.equ vmentry, 0 -.equ vmexit, 1 - -/* Configures the HSTR (Hyp System Trap Register) on entry/return - * (hardware reset value is 0) */ -.macro set_hstr operation - mrc p15, 4, r2, c1, c1, 3 - ldr r3, =HSTR_T(15) - .if \operation == vmentry - orr r2, r2, r3 @ Trap CR{15} - .else - bic r2, r2, r3 @ Don't trap any CRx accesses - .endif - mcr p15, 4, r2, c1, c1, 3 -.endm - -/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0). Keep previous value in r2. - * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if - * VFP wasn't already enabled (always executed on vmtrap). - * If a label is specified with vmexit, it is branched to if VFP wasn't - * enabled. - */ -.macro set_hcptr operation, mask, label = none - mrc p15, 4, r2, c1, c1, 2 - ldr r3, =\mask - .if \operation == vmentry - orr r3, r2, r3 @ Trap coproc-accesses defined in mask - .else - bic r3, r2, r3 @ Don't trap defined coproc-accesses - .endif - mcr p15, 4, r3, c1, c1, 2 - .if \operation != vmentry - .if \operation == vmexit - tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - beq 1f - .endif - isb - .if \label != none - b \label - .endif -1: - .endif -.endm - -/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return - * (hardware reset value is 0) */ -.macro set_hdcr operation - mrc p15, 4, r2, c1, c1, 1 - ldr r3, =(HDCR_TPM|HDCR_TPMCR) - .if \operation == vmentry - orr r2, r2, r3 @ Trap some perfmon accesses - .else - bic r2, r2, r3 @ Don't trap any perfmon accesses - .endif - mcr p15, 4, r2, c1, c1, 1 -.endm - -/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ -.macro configure_hyp_role operation - .if \operation == vmentry - ldr r2, [vcpu, #VCPU_HCR] - ldr r3, [vcpu, #VCPU_IRQ_LINES] - orr r2, r2, r3 - .else - mov r2, #0 - .endif - mcr p15, 4, r2, c1, c1, 0 @ HCR -.endm - -.macro load_vcpu - mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR -.endm diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index aba61fd..58dbd5c 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -28,6 +28,7 @@ #include <asm/kvm_mmio.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> +#include <asm/virt.h> #include "trace.h" @@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to) unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); + if (is_kernel_in_hyp_mode()) + return 0; + start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); + if (is_kernel_in_hyp_mode()) + return 0; + /* Check for a valid kernel IO mapping */ if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) return -EINVAL; @@ -1431,6 +1438,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) } /* + * Check for a cache maintenance operation. Since we + * ended-up here, we know it is outside of any memory + * slot. But we can't find out if that is for a device, + * or if the guest is just being stupid. The only thing + * we know for sure is that this range cannot be cached. + * + * So let's assume that the guest is just being + * cautious, and skip the instruction. + */ + if (kvm_vcpu_dabt_is_cm(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + ret = 1; + goto out_unlock; + } + + /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the * faulting VA. This is always 12 bits, irrespective diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index a9b3b90..c2b1315 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; - wait_queue_head_t *wq; + struct swait_queue_head *wq; unsigned long cpu_id; unsigned long context_id; phys_addr_t target_pc; @@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); - wake_up_interruptible(wq); + swake_up(wq); return PSCI_RET_SUCCESS; } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index eeb8585..0048b5a 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -71,7 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Reset core registers */ - memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c index de1316b..62ebac5 100644 --- a/arch/arm/mach-davinci/board-mityomapl138.c +++ b/arch/arm/mach-davinci/board-mityomapl138.c @@ -115,13 +115,14 @@ static void mityomapl138_cpufreq_init(const char *partnum) static void mityomapl138_cpufreq_init(const char *partnum) { } #endif -static void read_factory_config(struct memory_accessor *a, void *context) +static void read_factory_config(struct nvmem_device *nvmem, void *context) { int ret; const char *partnum = NULL; struct davinci_soc_info *soc_info = &davinci_soc_info; - ret = a->read(a, (char *)&factory_config, 0, sizeof(factory_config)); + ret = nvmem_device_read(nvmem, 0, sizeof(factory_config), + &factory_config); if (ret != sizeof(struct factory_config)) { pr_warn("Read Factory Config Failed: %d\n", ret); goto bad_config; diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index a794f6d..f55ef2e 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -28,13 +28,13 @@ EXPORT_SYMBOL(davinci_soc_info); void __iomem *davinci_intc_base; int davinci_intc_type; -void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context) +void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context) { char *mac_addr = davinci_soc_info.emac_pdata->mac_addr; off_t offset = (off_t)context; /* Read MAC addr from EEPROM */ - if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN) + if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN) pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr); } diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c index 2478d9f..469a76e 100644 --- a/arch/arm/mach-gemini/gpio.c +++ b/arch/arm/mach-gemini/gpio.c @@ -17,7 +17,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/irq.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <mach/hardware.h> #include <mach/irqs.h> @@ -227,5 +227,5 @@ void __init gemini_gpio_init(void) (void *)i); } - BUG_ON(gpiochip_add(&gemini_gpio_chip)); + BUG_ON(gpiochip_add_data(&gemini_gpio_chip, NULL)); } diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index cfc696b..fd87205 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -374,8 +374,13 @@ static struct pu_domain imx6q_pu_domain = { .name = "PU", .power_off = imx6q_pm_pu_power_off, .power_on = imx6q_pm_pu_power_on, - .power_off_latency_ns = 25000, - .power_on_latency_ns = 2000000, + .states = { + [0] = { + .power_off_latency_ns = 25000, + .power_on_latency_ns = 2000000, + }, + }, + .state_count = 1, }, }; diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c index eb1c347..f510c43 100644 --- a/arch/arm/mach-imx/mach-mx27ads.c +++ b/arch/arm/mach-imx/mach-mx27ads.c @@ -13,6 +13,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ +#include <linux/gpio/driver.h> +/* Needed for gpio_to_irq() */ #include <linux/gpio.h> #include <linux/platform_device.h> #include <linux/mtd/mtd.h> @@ -243,7 +245,7 @@ static void __init mx27ads_regulator_init(void) vchip->ngpio = 1; vchip->direction_output = vgpio_dir_out; vchip->set = vgpio_set; - gpiochip_add(vchip); + gpiochip_add_data(vchip, NULL); platform_device_register_data(NULL, "reg-fixed-voltage", PLATFORM_DEVID_AUTO, diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 1cb6f2f..26874f6 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -27,7 +27,7 @@ #include <linux/clockchips.h> #include <linux/io.h> #include <linux/export.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <linux/cpu.h> #include <linux/pci.h> #include <linux/sched_clock.h> @@ -461,7 +461,7 @@ void __init ixp4xx_sys_init(void) platform_add_devices(ixp4xx_devices, ARRAY_SIZE(ixp4xx_devices)); - gpiochip_add(&ixp4xx_gpio_chip); + gpiochip_add_data(&ixp4xx_gpio_chip, NULL); if (cpu_is_ixp46x()) { int region; diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h index c5bae9c..b7ddd27 100644 --- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h +++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h @@ -395,204 +395,6 @@ #define CRP_AD_CBE_BESL 20 #define CRP_AD_CBE_WRITE 0x00010000 - -/* - * USB Device Controller - * - * These are used by the USB gadget driver, so they don't follow the - * IXP4XX_ naming convetions. - * - */ -# define IXP4XX_USB_REG(x) (*((volatile u32 *)(x))) - -/* UDC Undocumented - Reserved1 */ -#define UDC_RES1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0004) -/* UDC Undocumented - Reserved2 */ -#define UDC_RES2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0008) -/* UDC Undocumented - Reserved3 */ -#define UDC_RES3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x000C) -/* UDC Control Register */ -#define UDCCR IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0000) -/* UDC Endpoint 0 Control/Status Register */ -#define UDCCS0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0010) -/* UDC Endpoint 1 (IN) Control/Status Register */ -#define UDCCS1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0014) -/* UDC Endpoint 2 (OUT) Control/Status Register */ -#define UDCCS2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0018) -/* UDC Endpoint 3 (IN) Control/Status Register */ -#define UDCCS3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x001C) -/* UDC Endpoint 4 (OUT) Control/Status Register */ -#define UDCCS4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0020) -/* UDC Endpoint 5 (Interrupt) Control/Status Register */ -#define UDCCS5 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0024) -/* UDC Endpoint 6 (IN) Control/Status Register */ -#define UDCCS6 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0028) -/* UDC Endpoint 7 (OUT) Control/Status Register */ -#define UDCCS7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x002C) -/* UDC Endpoint 8 (IN) Control/Status Register */ -#define UDCCS8 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0030) -/* UDC Endpoint 9 (OUT) Control/Status Register */ -#define UDCCS9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0034) -/* UDC Endpoint 10 (Interrupt) Control/Status Register */ -#define UDCCS10 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0038) -/* UDC Endpoint 11 (IN) Control/Status Register */ -#define UDCCS11 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x003C) -/* UDC Endpoint 12 (OUT) Control/Status Register */ -#define UDCCS12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0040) -/* UDC Endpoint 13 (IN) Control/Status Register */ -#define UDCCS13 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0044) -/* UDC Endpoint 14 (OUT) Control/Status Register */ -#define UDCCS14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0048) -/* UDC Endpoint 15 (Interrupt) Control/Status Register */ -#define UDCCS15 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x004C) -/* UDC Frame Number Register High */ -#define UFNRH IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0060) -/* UDC Frame Number Register Low */ -#define UFNRL IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0064) -/* UDC Byte Count Reg 2 */ -#define UBCR2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0068) -/* UDC Byte Count Reg 4 */ -#define UBCR4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x006c) -/* UDC Byte Count Reg 7 */ -#define UBCR7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0070) -/* UDC Byte Count Reg 9 */ -#define UBCR9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0074) -/* UDC Byte Count Reg 12 */ -#define UBCR12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0078) -/* UDC Byte Count Reg 14 */ -#define UBCR14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x007c) -/* UDC Endpoint 0 Data Register */ -#define UDDR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0080) -/* UDC Endpoint 1 Data Register */ -#define UDDR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0100) -/* UDC Endpoint 2 Data Register */ -#define UDDR2 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0180) -/* UDC Endpoint 3 Data Register */ -#define UDDR3 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0200) -/* UDC Endpoint 4 Data Register */ -#define UDDR4 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0400) -/* UDC Endpoint 5 Data Register */ -#define UDDR5 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00A0) -/* UDC Endpoint 6 Data Register */ -#define UDDR6 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0600) -/* UDC Endpoint 7 Data Register */ -#define UDDR7 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0680) -/* UDC Endpoint 8 Data Register */ -#define UDDR8 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0700) -/* UDC Endpoint 9 Data Register */ -#define UDDR9 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0900) -/* UDC Endpoint 10 Data Register */ -#define UDDR10 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00C0) -/* UDC Endpoint 11 Data Register */ -#define UDDR11 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0B00) -/* UDC Endpoint 12 Data Register */ -#define UDDR12 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0B80) -/* UDC Endpoint 13 Data Register */ -#define UDDR13 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0C00) -/* UDC Endpoint 14 Data Register */ -#define UDDR14 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0E00) -/* UDC Endpoint 15 Data Register */ -#define UDDR15 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x00E0) -/* UDC Interrupt Control Register 0 */ -#define UICR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0050) -/* UDC Interrupt Control Register 1 */ -#define UICR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0054) -/* UDC Status Interrupt Register 0 */ -#define USIR0 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x0058) -/* UDC Status Interrupt Register 1 */ -#define USIR1 IXP4XX_USB_REG(IXP4XX_USB_BASE_VIRT+0x005C) - -#define UDCCR_UDE (1 << 0) /* UDC enable */ -#define UDCCR_UDA (1 << 1) /* UDC active */ -#define UDCCR_RSM (1 << 2) /* Device resume */ -#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ -#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ -#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ -#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ -#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ - -#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ -#define UDCCS0_IPR (1 << 1) /* IN packet ready */ -#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ -#define UDCCS0_SST (1 << 4) /* Sent stall */ -#define UDCCS0_FST (1 << 5) /* Force stall */ -#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ -#define UDCCS0_SA (1 << 7) /* Setup active */ - -#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_BI_SST (1 << 4) /* Sent stall */ -#define UDCCS_BI_FST (1 << 5) /* Force stall */ -#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ - -#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_BO_DME (1 << 3) /* DMA enable */ -#define UDCCS_BO_SST (1 << 4) /* Sent stall */ -#define UDCCS_BO_FST (1 << 5) /* Force stall */ -#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ - -#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ - -#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_IO_ROF (1 << 3) /* Receive overflow */ -#define UDCCS_IO_DME (1 << 3) /* DMA enable */ -#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ - -#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_INT_SST (1 << 4) /* Sent stall */ -#define UDCCS_INT_FST (1 << 5) /* Force stall */ -#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ - -#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ -#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ -#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ -#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ -#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ -#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ -#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ -#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ - -#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ -#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ -#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ -#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ -#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ -#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ -#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ -#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ - -#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ -#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ -#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ -#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ -#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ -#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ -#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ -#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ - -#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ -#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ -#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ -#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ -#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ -#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ -#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ -#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ - #define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ /* "fuse" bits of IXP_EXP_CFG2 */ diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c index 77d6b1b..ee06fab 100644 --- a/arch/arm/mach-lpc32xx/phy3250.c +++ b/arch/arm/mach-lpc32xx/phy3250.c @@ -86,8 +86,8 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb) { dma_addr_t dma; - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, - PANEL_SIZE, &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { printk(KERN_ERR "CLCD: unable to map framebuffer\n"); return -ENOMEM; @@ -116,15 +116,14 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb) static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } static void lpc32xx_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } /* diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 64e3d2c..b003e3a 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU depends on ARCH_MULTI_V7 || ARCH_MULTI_V5 select ARCH_SUPPORTS_BIG_ENDIAN select CLKSRC_MMIO - select GENERIC_IRQ_CHIP select PINCTRL select PLAT_ORION select SOC_BUS @@ -29,6 +28,7 @@ config MACH_ARMADA_370 bool "Marvell Armada 370 boards" depends on ARCH_MULTI_V7 select ARMADA_370_CLK + select ARMADA_370_XP_IRQ select CPU_PJ4B select MACH_MVEBU_V7 select PINCTRL_ARMADA_370 @@ -39,6 +39,7 @@ config MACH_ARMADA_370 config MACH_ARMADA_375 bool "Marvell Armada 375 boards" depends on ARCH_MULTI_V7 + select ARMADA_370_XP_IRQ select ARM_ERRATA_720789 select ARM_ERRATA_753970 select ARM_GIC @@ -58,6 +59,7 @@ config MACH_ARMADA_38X select ARM_ERRATA_720789 select ARM_ERRATA_753970 select ARM_GIC + select ARMADA_370_XP_IRQ select ARMADA_38X_CLK select HAVE_ARM_SCU select HAVE_ARM_TWD if SMP @@ -72,6 +74,7 @@ config MACH_ARMADA_39X bool "Marvell Armada 39x boards" depends on ARCH_MULTI_V7 select ARM_GIC + select ARMADA_370_XP_IRQ select ARMADA_39X_CLK select CACHE_L2X0 select HAVE_ARM_SCU @@ -86,6 +89,7 @@ config MACH_ARMADA_39X config MACH_ARMADA_XP bool "Marvell Armada XP boards" depends on ARCH_MULTI_V7 + select ARMADA_370_XP_IRQ select ARMADA_XP_CLK select CPU_PJ4B select MACH_MVEBU_V7 diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c index d122ee6..8814ee5 100644 --- a/arch/arm/mach-netx/fb.c +++ b/arch/arm/mach-netx/fb.c @@ -42,8 +42,8 @@ int netx_clcd_setup(struct clcd_fb *fb) fb->panel = netx_panel; - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024, - &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { printk(KERN_ERR "CLCD: unable to map framebuffer\n"); return -ENOMEM; @@ -57,16 +57,14 @@ int netx_clcd_setup(struct clcd_fb *fb) int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, - fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } void netx_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL); diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c index abea126..ea0e5b2 100644 --- a/arch/arm/mach-nspire/clcd.c +++ b/arch/arm/mach-nspire/clcd.c @@ -90,8 +90,8 @@ int nspire_clcd_setup(struct clcd_fb *fb) panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8; panel_size = ALIGN(panel_size, PAGE_SIZE); - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, - panel_size, &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { pr_err("CLCD: unable to map framebuffer\n"); @@ -107,13 +107,12 @@ int nspire_clcd_setup(struct clcd_fb *fb) int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } void nspire_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index e9f65fe..b6d62e4 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh) */ static int _idle(struct omap_hwmod *oh) { + if (oh->flags & HWMOD_NO_IDLE) { + oh->_int_flags |= _HWMOD_SKIP_ENABLE; + return 0; + } + pr_debug("omap_hwmod: %s: idling\n", oh->name); if (oh->_state != _HWMOD_STATE_ENABLED) { @@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->flags |= HWMOD_INIT_NO_RESET; if (of_find_property(np, "ti,no-idle-on-init", NULL)) oh->flags |= HWMOD_INIT_NO_IDLE; + if (of_find_property(np, "ti,no-idle", NULL)) + oh->flags |= HWMOD_NO_IDLE; } oh->_state = _HWMOD_STATE_INITIALIZED; @@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh) * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data - * it should be set by the core code as a runtime flag during startup */ - if ((oh->flags & HWMOD_INIT_NO_IDLE) && + if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) && (postsetup_state == _HWMOD_STATE_IDLE)) { oh->_int_flags |= _HWMOD_SKIP_ENABLE; postsetup_state = _HWMOD_STATE_ENABLED; diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 76bce11..7c7a311 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm { * or idled. * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to * operate and they need to be handled at the same time as the main_clk. + * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain + * IPs like CPSW on DRA7, where clocks to this module cannot be disabled. */ #define HWMOD_SWSUP_SIDLE (1 << 0) #define HWMOD_SWSUP_MSTANDBY (1 << 1) @@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm { #define HWMOD_SWSUP_SIDLE_ACT (1 << 12) #define HWMOD_RECONFIG_IO_CHAIN (1 << 13) #define HWMOD_OPT_CLKS_NEEDED (1 << 14) +#define HWMOD_NO_IDLE (1 << 15) /* * omap_hwmod._int_flags definitions diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index f164c6b..8e072de 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -252,7 +252,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata, info = omap_serial_default_info; oh = uart->oh; - name = DRIVER_NAME; + name = OMAP_SERIAL_DRIVER_NAME; omap_up.dma_enabled = info->dma_enabled; omap_up.uartclk = OMAP24XX_BASE_BAUD * 16; diff --git a/arch/arm/mach-pxa/include/mach/pxa25x-udc.h b/arch/arm/mach-pxa/include/mach/pxa25x-udc.h index 1b80a48..e69de29 100644 --- a/arch/arm/mach-pxa/include/mach/pxa25x-udc.h +++ b/arch/arm/mach-pxa/include/mach/pxa25x-udc.h @@ -1,163 +0,0 @@ -#ifndef _ASM_ARCH_PXA25X_UDC_H -#define _ASM_ARCH_PXA25X_UDC_H - -#ifdef _ASM_ARCH_PXA27X_UDC_H -#error "You can't include both PXA25x and PXA27x UDC support" -#endif - -#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */ -#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */ -#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */ - -#define UDCCR __REG(0x40600000) /* UDC Control Register */ -#define UDCCR_UDE (1 << 0) /* UDC enable */ -#define UDCCR_UDA (1 << 1) /* UDC active */ -#define UDCCR_RSM (1 << 2) /* Device resume */ -#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ -#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ -#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ -#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ -#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ - -#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */ -#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ -#define UDCCS0_IPR (1 << 1) /* IN packet ready */ -#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ -#define UDCCS0_SST (1 << 4) /* Sent stall */ -#define UDCCS0_FST (1 << 5) /* Force stall */ -#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ -#define UDCCS0_SA (1 << 7) /* Setup active */ - -/* Bulk IN - Endpoint 1,6,11 */ -#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */ -#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */ -#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */ - -#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_BI_SST (1 << 4) /* Sent stall */ -#define UDCCS_BI_FST (1 << 5) /* Force stall */ -#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ - -/* Bulk OUT - Endpoint 2,7,12 */ -#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */ -#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */ -#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */ - -#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_BO_DME (1 << 3) /* DMA enable */ -#define UDCCS_BO_SST (1 << 4) /* Sent stall */ -#define UDCCS_BO_FST (1 << 5) /* Force stall */ -#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ - -/* Isochronous IN - Endpoint 3,8,13 */ -#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */ -#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */ -#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */ - -#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ - -/* Isochronous OUT - Endpoint 4,9,14 */ -#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */ -#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */ -#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */ - -#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */ -#define UDCCS_IO_DME (1 << 3) /* DMA enable */ -#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ - -/* Interrupt IN - Endpoint 5,10,15 */ -#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */ -#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */ -#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */ - -#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_INT_SST (1 << 4) /* Sent stall */ -#define UDCCS_INT_FST (1 << 5) /* Force stall */ -#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ - -#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */ -#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */ -#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */ -#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */ -#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */ -#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */ -#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */ -#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */ -#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */ -#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */ -#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */ -#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */ -#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */ -#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */ -#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */ -#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */ -#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */ -#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */ -#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */ -#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */ -#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */ -#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */ -#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */ -#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */ - -#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */ - -#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ -#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ -#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ -#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ -#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ -#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ -#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ -#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ - -#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */ - -#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ -#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ -#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ -#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ -#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ -#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ -#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ -#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ - -#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */ - -#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ -#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ -#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ -#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ -#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ -#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ -#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ -#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ - -#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */ - -#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ -#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ -#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ -#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ -#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ -#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ -#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ -#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ - -#endif diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index 8347d87..5a941bd 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -18,12 +18,13 @@ #include <linux/init.h> #include <linux/kernel.h> +#include <linux/property.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/smsc911x.h> #include <linux/input.h> -#include <linux/rotary_encoder.h> #include <linux/gpio_keys.h> #include <linux/input/eeti_ts.h> #include <linux/leds.h> @@ -366,22 +367,31 @@ static struct pxaohci_platform_data raumfeld_ohci_info = { * Rotary encoder input device */ -static struct rotary_encoder_platform_data raumfeld_rotary_encoder_info = { - .steps = 24, - .axis = REL_X, - .relative_axis = 1, - .gpio_a = GPIO_VOLENC_A, - .gpio_b = GPIO_VOLENC_B, - .inverted_a = 1, - .inverted_b = 0, +static struct gpiod_lookup_table raumfeld_rotary_gpios_table = { + .dev_id = "rotary-encoder.0", + .table = { + GPIO_LOOKUP_IDX("gpio-0", + GPIO_VOLENC_A, NULL, 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-0", + GPIO_VOLENC_B, NULL, 1, GPIO_ACTIVE_HIGH), + { }, + }, +}; + +static struct property_entry raumfeld_rotary_properties[] = { + PROPERTY_ENTRY_INTEGER("rotary-encoder,steps-per-period", u32, 24), + PROPERTY_ENTRY_INTEGER("linux,axis", u32, REL_X), + PROPERTY_ENTRY_INTEGER("rotary-encoder,relative_axis", u32, 1), + { }, +}; + +static struct property_set raumfeld_rotary_property_set = { + .properties = raumfeld_rotary_properties, }; static struct platform_device rotary_encoder_device = { .name = "rotary-encoder", .id = 0, - .dev = { - .platform_data = &raumfeld_rotary_encoder_info, - } }; /** @@ -1051,7 +1061,12 @@ static void __init __maybe_unused raumfeld_controller_init(void) int ret; pxa3xx_mfp_config(ARRAY_AND_SIZE(raumfeld_controller_pin_config)); + + gpiod_add_lookup_table(&raumfeld_rotary_gpios_table); + device_add_property_set(&rotary_encoder_device.dev, + &raumfeld_rotary_property_set); platform_device_register(&rotary_encoder_device); + spi_register_board_info(ARRAY_AND_SIZE(controller_spi_devices)); i2c_register_board_info(0, &raumfeld_controller_i2c_board_info, 1); @@ -1086,6 +1101,10 @@ static void __init __maybe_unused raumfeld_speaker_init(void) i2c_register_board_info(0, &raumfeld_connector_i2c_board_info, 1); platform_device_register(&smc91x_device); + + gpiod_add_lookup_table(&raumfeld_rotary_gpios_table); + device_add_property_set(&rotary_encoder_device.dev, + &raumfeld_rotary_property_set); platform_device_register(&rotary_encoder_device); raumfeld_audio_init(); diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c index 9f54300..7ed7861 100644 --- a/arch/arm/mach-s3c24xx/mach-h1940.c +++ b/arch/arm/mach-s3c24xx/mach-h1940.c @@ -664,7 +664,7 @@ static void __init h1940_map_io(void) /* Add latch gpio chip, set latch initial value */ h1940_latch_control(0, 0); - WARN_ON(gpiochip_add(&h1940_latch_gpiochip)); + WARN_ON(gpiochip_add_data(&h1940_latch_gpiochip, NULL)); } static void __init h1940_init_time(void) diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index d8965c6..bb3ca9c 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -15,7 +15,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/io.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <mach/hardware.h> #include <asm/setup.h> @@ -369,7 +369,7 @@ static int __init simpad_init(void) cs3_gpio.get = cs3_gpio_get; cs3_gpio.direction_input = cs3_gpio_direction_input; cs3_gpio.direction_output = cs3_gpio_direction_output; - ret = gpiochip_add(&cs3_gpio); + ret = gpiochip_add_data(&cs3_gpio, NULL); if (ret) printk(KERN_WARNING "simpad: Unable to register cs3 GPIO device"); diff --git a/arch/arm/mach-socfpga/Makefile b/arch/arm/mach-socfpga/Makefile index b8f9e23..ed15db1 100644 --- a/arch/arm/mach-socfpga/Makefile +++ b/arch/arm/mach-socfpga/Makefile @@ -5,3 +5,5 @@ obj-y := socfpga.o obj-$(CONFIG_SMP) += headsmp.o platsmp.o obj-$(CONFIG_SOCFPGA_SUSPEND) += pm.o self-refresh.o +obj-$(CONFIG_EDAC_ALTERA_L2C) += l2_cache.o +obj-$(CONFIG_EDAC_ALTERA_OCRAM) += ocram.o diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index 5bc6ea8..575195b 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -36,6 +36,8 @@ extern void socfpga_init_clocks(void); extern void socfpga_sysmgr_init(void); +void socfpga_init_l2_ecc(void); +void socfpga_init_ocram_ecc(void); extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c new file mode 100644 index 0000000..e3907ab --- /dev/null +++ b/arch/arm/mach-socfpga/l2_cache.c @@ -0,0 +1,41 @@ +/* + * Copyright Altera Corporation (C) 2016. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/io.h> +#include <linux/of_platform.h> +#include <linux/of_address.h> + +void socfpga_init_l2_ecc(void) +{ + struct device_node *np; + void __iomem *mapped_l2_edac_addr; + + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-l2-ecc"); + if (!np) { + pr_err("Unable to find socfpga-l2-ecc in dtb\n"); + return; + } + + mapped_l2_edac_addr = of_iomap(np, 0); + of_node_put(np); + if (!mapped_l2_edac_addr) { + pr_err("Unable to find L2 ECC mapping in dtb\n"); + return; + } + + /* Enable ECC */ + writel(0x01, mapped_l2_edac_addr); + iounmap(mapped_l2_edac_addr); +} diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c new file mode 100644 index 0000000..60ec643 --- /dev/null +++ b/arch/arm/mach-socfpga/ocram.c @@ -0,0 +1,49 @@ +/* + * Copyright Altera Corporation (C) 2016. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/io.h> +#include <linux/genalloc.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> + +#define ALTR_OCRAM_CLEAR_ECC 0x00000018 +#define ALTR_OCRAM_ECC_EN 0x00000019 + +void socfpga_init_ocram_ecc(void) +{ + struct device_node *np; + void __iomem *mapped_ocr_edac_addr; + + /* Find the OCRAM EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc"); + if (!np) { + pr_err("Unable to find socfpga-ocram-ecc\n"); + return; + } + + mapped_ocr_edac_addr = of_iomap(np, 0); + of_node_put(np); + if (!mapped_ocr_edac_addr) { + pr_err("Unable to map OCRAM ecc regs.\n"); + return; + } + + /* Clear any pending OCRAM ECC interrupts, then enable ECC */ + writel(ALTR_OCRAM_CLEAR_ECC, mapped_ocr_edac_addr); + writel(ALTR_OCRAM_ECC_EN, mapped_ocr_edac_addr); + + iounmap(mapped_ocr_edac_addr); +} diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index a1c0efa..7e0aad2 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -59,6 +59,11 @@ static void __init socfpga_init_irq(void) { irqchip_init(); socfpga_sysmgr_init(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C)) + socfpga_init_l2_ecc(); + + if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM)) + socfpga_init_ocram_ecc(); } static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) diff --git a/arch/arm/mach-w90x900/gpio.c b/arch/arm/mach-w90x900/gpio.c index ba05aec..55d1a00 100644 --- a/arch/arm/mach-w90x900/gpio.c +++ b/arch/arm/mach-w90x900/gpio.c @@ -20,7 +20,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/io.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <mach/hardware.h> @@ -30,7 +30,6 @@ #define GPIO_IN (0x0C) #define GROUPINERV (0x10) #define GPIO_GPIO(Nb) (0x00000001 << (Nb)) -#define to_nuc900_gpio_chip(c) container_of(c, struct nuc900_gpio_chip, chip) #define NUC900_GPIO_CHIP(name, base_gpio, nr_gpio) \ { \ @@ -53,7 +52,7 @@ struct nuc900_gpio_chip { static int nuc900_gpio_get(struct gpio_chip *chip, unsigned offset) { - struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip); + struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip); void __iomem *pio = nuc900_gpio->regbase + GPIO_IN; unsigned int regval; @@ -65,7 +64,7 @@ static int nuc900_gpio_get(struct gpio_chip *chip, unsigned offset) static void nuc900_gpio_set(struct gpio_chip *chip, unsigned offset, int val) { - struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip); + struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip); void __iomem *pio = nuc900_gpio->regbase + GPIO_OUT; unsigned int regval; unsigned long flags; @@ -86,7 +85,7 @@ static void nuc900_gpio_set(struct gpio_chip *chip, unsigned offset, int val) static int nuc900_dir_input(struct gpio_chip *chip, unsigned offset) { - struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip); + struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip); void __iomem *pio = nuc900_gpio->regbase + GPIO_DIR; unsigned int regval; unsigned long flags; @@ -104,7 +103,7 @@ static int nuc900_dir_input(struct gpio_chip *chip, unsigned offset) static int nuc900_dir_output(struct gpio_chip *chip, unsigned offset, int val) { - struct nuc900_gpio_chip *nuc900_gpio = to_nuc900_gpio_chip(chip); + struct nuc900_gpio_chip *nuc900_gpio = gpiochip_get_data(chip); void __iomem *outreg = nuc900_gpio->regbase + GPIO_OUT; void __iomem *pio = nuc900_gpio->regbase + GPIO_DIR; unsigned int regval; @@ -149,6 +148,6 @@ void __init nuc900_init_gpio(int nr_group) gpio_chip = &nuc900_gpio[i]; spin_lock_init(&gpio_chip->gpio_lock); gpio_chip->regbase = GPIO_BASE + i * GROUPINERV; - gpiochip_add(&gpio_chip->chip); + gpiochip_add_data(&gpio_chip->chip, gpio_chip); } } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index daafcf1..ad58418 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -346,7 +346,7 @@ retry: up_read(&mm->mmap_sem); /* - * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR + * Handle the "normal" case first - VM_FAULT_MAJOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 434d76f..88fbe0d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -732,7 +732,7 @@ static void *__init late_alloc(unsigned long sz) return ptr; } -static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr, +static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot, void *(*alloc)(unsigned long sz)) { @@ -747,7 +747,7 @@ static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr, static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) { - return pte_alloc(pmd, addr, prot, early_alloc); + return arm_pte_alloc(pmd, addr, prot, early_alloc); } static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, @@ -756,7 +756,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, void *(*alloc)(unsigned long sz), bool ng) { - pte_t *pte = pte_alloc(pmd, addr, type->prot_l1, alloc); + pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc); do { set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), ng ? PTE_EXT_NG : 0); diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index e683db1..b8d4773 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -80,7 +80,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_map(mm, NULL, new_pmd, 0); + new_pte = pte_alloc_map(mm, new_pmd, 0); if (!new_pte) goto no_pte; diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 7bd22d8..f740693 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -154,8 +154,7 @@ err_out: */ static int orion_gpio_request(struct gpio_chip *chip, unsigned pin) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); if (orion_gpio_is_valid(ochip, pin, GPIO_INPUT_OK) || orion_gpio_is_valid(ochip, pin, GPIO_OUTPUT_OK)) @@ -166,8 +165,7 @@ static int orion_gpio_request(struct gpio_chip *chip, unsigned pin) static int orion_gpio_direction_input(struct gpio_chip *chip, unsigned pin) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); unsigned long flags; if (!orion_gpio_is_valid(ochip, pin, GPIO_INPUT_OK)) @@ -182,8 +180,7 @@ static int orion_gpio_direction_input(struct gpio_chip *chip, unsigned pin) static int orion_gpio_get(struct gpio_chip *chip, unsigned pin) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); int val; if (readl(GPIO_IO_CONF(ochip)) & (1 << pin)) { @@ -198,8 +195,7 @@ static int orion_gpio_get(struct gpio_chip *chip, unsigned pin) static int orion_gpio_direction_output(struct gpio_chip *chip, unsigned pin, int value) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); unsigned long flags; if (!orion_gpio_is_valid(ochip, pin, GPIO_OUTPUT_OK)) @@ -216,8 +212,7 @@ orion_gpio_direction_output(struct gpio_chip *chip, unsigned pin, int value) static void orion_gpio_set(struct gpio_chip *chip, unsigned pin, int value) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); unsigned long flags; spin_lock_irqsave(&ochip->lock, flags); @@ -227,8 +222,7 @@ static void orion_gpio_set(struct gpio_chip *chip, unsigned pin, int value) static int orion_gpio_to_irq(struct gpio_chip *chip, unsigned pin) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); return irq_create_mapping(ochip->domain, ochip->secondary_irq_base + pin); @@ -445,8 +439,8 @@ static void gpio_irq_handler(struct irq_desc *desc) static void orion_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) { - struct orion_gpio_chip *ochip = - container_of(chip, struct orion_gpio_chip, chip); + + struct orion_gpio_chip *ochip = gpiochip_get_data(chip); u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk; int i; @@ -567,7 +561,7 @@ void __init orion_gpio_init(struct device_node *np, ochip->mask_offset = mask_offset; ochip->secondary_irq_base = secondary_irq_base; - gpiochip_add(&ochip->chip); + gpiochip_add_data(&ochip->chip, ochip); /* * Mask and clear GPIO interrupts. diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c index 04aff2c..70f2f69 100644 --- a/arch/arm/plat-samsung/pm-check.c +++ b/arch/arm/plat-samsung/pm-check.c @@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg) if (ptr->child != NULL) s3c_pm_run_res(ptr->child, fn, arg); - if ((ptr->flags & IORESOURCE_MEM) && - strcmp(ptr->name, "System RAM") == 0) { + if ((ptr->flags & IORESOURCE_SYSTEM_RAM) + == IORESOURCE_SYSTEM_RAM) { S3C_PMDBG("Found system RAM at %08lx..%08lx\n", (unsigned long)ptr->start, (unsigned long)ptr->end); diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S index b2b97e3..a62a7b6 100644 --- a/arch/arm/vdso/vdso.S +++ b/arch/arm/vdso/vdso.S @@ -23,9 +23,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .data..ro_after_init .balign PAGE_SIZE vdso_start: .incbin "arch/arm/vdso/vdso.so" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8cc6228..4f43622 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -14,6 +14,7 @@ config ARM64 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC @@ -49,6 +50,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB @@ -235,8 +237,6 @@ config PCI_SYSCALL def_bool PCI source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" -source "drivers/pci/hotplug/Kconfig" endmenu @@ -393,6 +393,7 @@ config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" depends on MODULES default y + select ARM64_MODULE_CMODEL_LARGE help This option builds kernel modules using the large memory model in order to avoid the use of the ADRP instruction, which can cause @@ -432,6 +433,17 @@ config CAVIUM_ERRATUM_23154 If unsure, say Y. +config CAVIUM_ERRATUM_27456 + bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption" + default y + help + On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI + instructions may cause the icache to become corrupted if it + contains data for a non-current ASID. The fix is to + invalidate the icache when changing the mm context. + + If unsure, say Y. + endmenu @@ -537,6 +549,9 @@ config HOTPLUG_CPU source kernel/Kconfig.preempt source kernel/Kconfig.hz +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + config ARCH_HAS_HOLES_MEMORYMODEL def_bool y if SPARSEMEM @@ -750,12 +765,112 @@ config ARM64_LSE_ATOMICS not support these instructions and requires the kernel to be built with binutils >= 2.25. +config ARM64_VHE + bool "Enable support for Virtualization Host Extensions (VHE)" + default y + help + Virtualization Host Extensions (VHE) allow the kernel to run + directly at EL2 (instead of EL1) on processors that support + it. This leads to better performance for KVM, as they reduce + the cost of the world switch. + + Selecting this option allows the VHE feature to be detected + at runtime, and does not affect processors that do not + implement this feature. + +endmenu + +menu "ARMv8.2 architectural features" + +config ARM64_UAO + bool "Enable support for User Access Override (UAO)" + default y + help + User Access Override (UAO; part of the ARMv8.2 Extensions) + causes the 'unprivileged' variant of the load/store instructions to + be overriden to be privileged. + + This option changes get_user() and friends to use the 'unprivileged' + variant of the load/store instructions. This ensures that user-space + really did have access to the supplied memory. When addr_limit is + set to kernel memory the UAO bit will be set, allowing privileged + access to kernel memory. + + Choosing this option will cause copy_to_user() et al to use user-space + memory permissions. + + The feature is detected at runtime, the kernel will use the + regular load/store instructions if the cpu does not implement the + feature. + endmenu +config ARM64_MODULE_CMODEL_LARGE + bool + +config ARM64_MODULE_PLTS + bool + select ARM64_MODULE_CMODEL_LARGE + select HAVE_MOD_ARCH_SPECIFIC + +config RELOCATABLE + bool + help + This builds the kernel as a Position Independent Executable (PIE), + which retains all relocation metadata required to relocate the + kernel binary at runtime to a different virtual address than the + address it was linked at. + Since AArch64 uses the RELA relocation format, this requires a + relocation pass at runtime even if the kernel is loaded at the + same address it was linked at. + +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + select ARM64_MODULE_PLTS + select RELOCATABLE + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts + relying on knowledge of the location of kernel internals. + + It is the bootloader's job to provide entropy, by passing a + random u64 value in /chosen/kaslr-seed at kernel entry. + + When booting via the UEFI stub, it will invoke the firmware's + EFI_RNG_PROTOCOL implementation (if available) to supply entropy + to the kernel proper. In addition, it will randomise the physical + location of the kernel Image as well. + + If unsure, say N. + +config RANDOMIZE_MODULE_REGION_FULL + bool "Randomize the module region independently from the core kernel" + depends on RANDOMIZE_BASE + default y + help + Randomizes the location of the module region without considering the + location of the core kernel. This way, it is impossible for modules + to leak information about the location of core kernel data structures + but it does imply that function calls between modules and the core + kernel will need to be resolved via veneers in the module PLT. + + When this option is not set, the module region will be randomized over + a limited range that contains the [_stext, _etext] interval of the + core kernel, so branch relocations are always in range. + endmenu menu "Boot options" +config ARM64_ACPI_PARKING_PROTOCOL + bool "Enable support for the ARM64 ACPI parking protocol" + depends on ACPI + help + Enable support for the ARM64 ACPI parking protocol. If disabled + the kernel will not allow booting through the ARM64 ACPI parking + protocol even if the corresponding data is present in the ACPI + MADT table. + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index e13c4bf..7e76845 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -50,13 +50,13 @@ config DEBUG_SET_MODULE_RONX config DEBUG_RODATA bool "Make kernel text and rodata read-only" + default y help If this is set, kernel text and rodata will be made read-only. This is to help catch accidental or malicious attempts to change the - kernel's executable code. Additionally splits rodata from kernel - text so it can be made explicitly non-executable. + kernel's executable code. - If in doubt, say Y + If in doubt, say Y config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA && ARM64_4K_PAGES diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b5e3f6d..354d754 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 +ifneq ($(CONFIG_RELOCATABLE),) +LDFLAGS_vmlinux += -pie +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions @@ -43,10 +47,14 @@ endif CHECKFLAGS += -D__aarch64__ -ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large endif +ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index cafb2c2..b7d7109 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -493,6 +493,11 @@ reg = <0x0 0x1054a000 0x0 0x20>; }; + rb: rb@7e000000 { + compatible = "apm,xgene-rb", "syscon"; + reg = <0x0 0x7e000000 0x0 0x10>; + }; + edac@78800000 { compatible = "apm,xgene-edac"; #address-cells = <2>; @@ -502,6 +507,7 @@ regmap-mcba = <&mcba>; regmap-mcbb = <&mcbb>; regmap-efuse = <&efuse>; + regmap-rb = <&rb>; reg = <0x0 0x78800000 0x0 0x100>; interrupts = <0x0 0x20 0x4>, <0x0 0x21 0x4>, diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi index e8bb460..6e28e41 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi @@ -313,7 +313,7 @@ /* * There are two serial driver i.e. 8250 based simple serial * driver and APB DMA based serial driver for higher baudrate - * and performace. To enable the 8250 based driver, the compatible + * and performance. To enable the 8250 based driver, the compatible * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable * the APB DMA based serial driver, the comptible is * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart". diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index bc23f4d..23b0630 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -345,7 +345,7 @@ /* * There are two serial driver i.e. 8250 based simple serial * driver and APB DMA based serial driver for higher baudrate - * and performace. To enable the 8250 based driver, the compatible + * and performance. To enable the 8250 based driver, the compatible * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable * the APB DMA based serial driver, the comptible is * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart". diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 7a3d22a..5c88804 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -15,6 +15,7 @@ #include <crypto/algapi.h> #include <linux/module.h> #include <linux/cpufeature.h> +#include <crypto/xts.h> #include "aes-ce-setkey.h" @@ -85,6 +86,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); int ret; + ret = xts_check_key(tfm, in_key, key_len); + if (ret) + return ret; + ret = aes_expandkey(&ctx->key1, in_key, key_len / 2); if (!ret) ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2], diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 70fd9ff..cff532a 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,5 +1,3 @@ - - generic-y += bug.h generic-y += bugs.h generic-y += checksum.h @@ -31,7 +29,6 @@ generic-y += msgbuf.h generic-y += msi.h generic-y += mutex.h generic-y += pci.h -generic-y += pci-bridge.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index caafd63..aee323b 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -87,9 +87,26 @@ void __init acpi_init_cpus(void); static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +bool acpi_parking_protocol_valid(int cpu); +void __init +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor); +#else +static inline bool acpi_parking_protocol_valid(int cpu) { return false; } +static inline void +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor) +{} +#endif + static inline const char *acpi_get_enable_method(int cpu) { - return acpi_psci_present() ? "psci" : NULL; + if (acpi_psci_present()) + return "psci"; + + if (acpi_parking_protocol_valid(cpu)) + return "parking-protocol"; + + return NULL; } #ifdef CONFIG_ACPI_APEI diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index e4962f0..beccbde 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,6 +1,8 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#include <asm/cpufeature.h> + #ifndef __ASSEMBLY__ #include <linux/init.h> @@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length); #else +#include <asm/assembler.h> + .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len .word \orig_offset - . .word \alt_offset - . @@ -136,6 +140,65 @@ void apply_alternatives(void *start, size_t length); alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +/* + * Generate the assembly for UAO alternatives with exception table entries. + * This is complicated as there is no post-increment or pair versions of the + * unprivileged instructions, and USER() only works for single instructions. + */ +#ifdef CONFIG_ARM64_UAO + .macro uao_ldp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: ldp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + ldtr \reg1, [\addr]; + ldtr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro uao_stp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: stp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + sttr \reg1, [\addr]; + sttr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: \inst \reg, [\addr], \post_inc; + nop; + alternative_else + \alt_inst \reg, [\addr]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + .endm +#else + .macro uao_ldp l, reg1, reg2, addr, post_inc + USER(\l, ldp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_stp l, reg1, reg2, addr, post_inc + USER(\l, stp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + USER(\l, \inst \reg, [\addr], \post_inc) + .endm +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bb7b727..70f7b9e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -94,12 +94,19 @@ dmb \opt .endm +/* + * Emit an entry into the exception table + */ + .macro _asm_extable, from, to + .pushsection __ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection + .endm + #define USER(l, x...) \ 9999: x; \ - .section __ex_table,"a"; \ - .align 3; \ - .quad 9999b,l; \ - .previous + _asm_extable 9999b, l /* * Register aliases. @@ -215,4 +222,15 @@ lr .req x30 // link register .size __pi_##x, . - x; \ ENDPROC(x) + /* + * Emit a 64-bit absolute little endian symbol reference in a way that + * ensures that it will be resolved at build time, even when building a + * PIE binary. This requires cooperation from the linker script, which + * must emit the lo32/hi32 halves individually. + */ + .macro le64sym, sym + .long \sym\()_lo32 + .long \sym\()_hi32 + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 197e06a..39c1d34 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v) " stclr %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_or(int i, atomic_t *v) @@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v) " stset %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_xor(int i, atomic_t *v) @@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v) " steor %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_add(int i, atomic_t *v) @@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v) " stadd %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ @@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return w0; \ } @@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v) " stclr %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_sub(int i, atomic_t *v) @@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stadd %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ @@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS , ##cl); \ \ return w0; \ } @@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v) " stclr %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_or(long i, atomic64_t *v) @@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v) " stset %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_xor(long i, atomic64_t *v) @@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v) " steor %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_add(long i, atomic64_t *v) @@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v) " stadd %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ @@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v) " stclr %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_sub(long i, atomic64_t *v) @@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) " stadd %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ @@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) "2:") : [ret] "+&r" (x0), [v] "+Q" (v->counter) : - : "x30", "cc", "memory"); + : __LL_SC_CLOBBERS, "cc", "memory"); return x0; } @@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ " mov %" #w "[ret], " #w "30") \ : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ : [old] "r" (x1), [new] "r" (x2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index 81151b6..ebf2481 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -11,4 +11,10 @@ #define MIN_FDT_ALIGN 8 #define MAX_FDT_SIZE SZ_2M +/* + * arm64 requires the kernel image to placed + * TEXT_OFFSET bytes beyond a 2 MB aligned base + */ +#define MIN_KIMG_ALIGN SZ_2M + #endif diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 0000000..ed693c5 --- /dev/null +++ b/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + */ +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 + +#endif diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 4a748ce..561190d 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -18,7 +18,7 @@ #ifndef _ARCH_ARM64_ASM_BUG_H #define _ARCH_ARM64_ASM_BUG_H -#include <asm/debug-monitors.h> +#include <asm/brk-imm.h> #ifdef CONFIG_GENERIC_BUG #define HAVE_ARCH_BUG diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7fc294c..22dda61 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #endif diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b5e9cee..13a6103 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -36,6 +36,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; + u64 reg_id_aa64mmfr2; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8f271b8..b9b6494 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -30,8 +30,13 @@ #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 +#define ARM64_HAS_NO_HW_PREFETCH 8 +#define ARM64_HAS_UAO 9 +#define ARM64_ALT_PAN_NOT_UAO 10 +#define ARM64_HAS_VIRT_HOST_EXTN 11 +#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_NCAPS 8 +#define ARM64_NCAPS 13 #ifndef __ASSEMBLY__ @@ -85,9 +90,10 @@ struct arm64_cpu_capabilities { struct { /* Feature register checking */ u32 sys_reg; - int field_pos; - int min_field_value; - int hwcap_type; + u8 field_pos; + u8 min_field_value; + u8 hwcap_type; + bool sign; unsigned long hwcap; }; }; @@ -117,15 +123,15 @@ static inline void cpus_set_cap(unsigned int num) } static inline int __attribute_const__ -cpuid_feature_extract_field_width(u64 features, int field, int width) +cpuid_feature_extract_signed_field_width(u64 features, int field, int width) { return (s64)(features << (64 - width - field)) >> (64 - width); } static inline int __attribute_const__ -cpuid_feature_extract_field(u64 features, int field) +cpuid_feature_extract_signed_field(u64 features, int field) { - return cpuid_feature_extract_field_width(features, field, 4); + return cpuid_feature_extract_signed_field_width(features, field, 4); } static inline unsigned int __attribute_const__ @@ -145,17 +151,23 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); } +static inline int __attribute_const__ +cpuid_feature_extract_field(u64 features, int field, bool sign) +{ + return (sign) ? + cpuid_feature_extract_signed_field(features, field) : + cpuid_feature_extract_unsigned_field(features, field); +} + static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) { - return ftrp->sign ? - cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) : - cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width); + return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign); } static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { - return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || - cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; + return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } void __init setup_cpu_features(void); @@ -164,13 +176,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -#ifdef CONFIG_HOTPLUG_CPU void verify_local_cpu_capabilities(void); -#else -static inline void verify_local_cpu_capabilities(void) -{ -} -#endif u64 read_system_reg(u32 id); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1a59493..f2309a2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -32,12 +32,6 @@ #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs %0, " #reg : "=r" (__val)); \ - __val; \ -}) - #define MIDR_REVISION_MASK 0xf #define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) #define MIDR_PARTNUM_SHIFT 4 @@ -57,11 +51,22 @@ #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) -#define MIDR_CPU_PART(imp, partnum) \ +#define MIDR_CPU_MODEL(imp, partnum) \ (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) +#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \ +({ \ + u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \ + u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \ + \ + _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ + }) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 @@ -75,8 +80,20 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) + #ifndef __ASSEMBLY__ +#include <asm/sysreg.h> + +#define read_cpuid(reg) ({ \ + u64 __val; \ + asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \ + __val; \ +}) + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 279c85b5..2fcb9b7 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -20,6 +20,7 @@ #include <linux/errno.h> #include <linux/types.h> +#include <asm/brk-imm.h> #include <asm/esr.h> #include <asm/insn.h> #include <asm/ptrace.h> @@ -47,19 +48,6 @@ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE /* - * #imm16 values used for BRK instruction generation - * Allowed values for kgbd are 0x400 - 0x7ff - * 0x100: for triggering a fault on purpose (reserved) - * 0x400: for dynamic BRK instruction - * 0x401: for compile time BRK instruction - * 0x800: kernel-mode BUG() and WARN() traps - */ -#define FAULT_BRK_IMM 0x100 -#define KGDB_DYN_DBG_BRK_IMM 0x400 -#define KGDB_COMPILED_DBG_BRK_IMM 0x401 -#define BUG_BRK_IMM 0x800 - -/* * BRK instruction encoding * The #imm16 value should be placed at bits[20:5] within BRK ins */ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index faad6df..24ed037 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -24,15 +24,6 @@ #include <asm/ptrace.h> #include <asm/user.h> -typedef unsigned long elf_greg_t; - -#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) -#define ELF_CORE_COPY_REGS(dest, regs) \ - *(struct user_pt_regs *)&(dest) = (regs)->user_regs; - -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -typedef struct user_fpsimd_state elf_fpregset_t; - /* * AArch64 static relocation types. */ @@ -86,6 +77,8 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define R_AARCH64_MOVW_PREL_G2_NC 292 #define R_AARCH64_MOVW_PREL_G3 293 +#define R_AARCH64_RELATIVE 1027 + /* * These are used to set parameters in the core dumps. */ @@ -127,6 +120,17 @@ typedef struct user_fpsimd_state elf_fpregset_t; */ #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#ifndef __ASSEMBLY__ + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) +#define ELF_CORE_COPY_REGS(dest, regs) \ + *(struct user_pt_regs *)&(dest) = (regs)->user_regs; + +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; +typedef struct user_fpsimd_state elf_fpregset_t; + /* * When the program starts, a1 contains a pointer to a function to be * registered with atexit, as per the SVR4 ABI. A value of 0 means we have no @@ -186,4 +190,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +#endif /* !__ASSEMBLY__ */ + #endif diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 3097045..caf86be 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -20,6 +20,7 @@ #include <linux/sizes.h> #include <asm/boot.h> #include <asm/page.h> +#include <asm/pgtable-prot.h> /* * Here we define all the compile-time 'special' virtual @@ -62,6 +63,16 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + + /* + * Used for kernel page table creation, so unmapped memory may be used + * for tables. + */ + FIX_PTE, + FIX_PMD, + FIX_PUD, + FIX_PGD, + __end_of_fixed_addresses }; diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 3c60f37..caa955f 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -48,7 +48,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) * See kernel/trace/trace_syscalls.c * * x86 code says: - * If the user realy wants these, then they should use the + * If the user really wants these, then they should use the * raw syscall tracepoints with filtering. */ #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5f3ab8c..f2585cd 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -42,10 +42,8 @@ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ -" .pushsection __ex_table,\"a\"\n" \ -" .align 3\n" \ -" .quad 1b, 4b, 2b, 4b\n" \ -" .popsection\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ @@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) "4: mov %w0, %w6\n" " b 3b\n" " .popsection\n" -" .pushsection __ex_table,\"a\"\n" -" .align 3\n" -" .quad 1b, 4b, 2b, 4b\n" -" .popsection\n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index a57601f..8740297 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 9732908..115ea2a 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> +#include <asm/virt.h> #ifdef __KERNEL__ @@ -35,10 +36,21 @@ struct arch_hw_breakpoint { struct arch_hw_breakpoint_ctrl ctrl; }; +/* Privilege Levels */ +#define AARCH64_BREAKPOINT_EL1 1 +#define AARCH64_BREAKPOINT_EL0 2 + +#define DBG_HMC_HYP (1 << 13) + static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) { - return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | + u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | ctrl.enabled; + + if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1) + val |= DBG_HMC_HYP; + + return val; } static inline void decode_ctrl_reg(u32 reg, @@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_BREAKPOINT_STORE 2 #define AARCH64_ESR_ACCESS_MASK (1 << 6) -/* Privilege Levels */ -#define AARCH64_BREAKPOINT_EL1 1 -#define AARCH64_BREAKPOINT_EL0 2 - /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 #define ARM_BREAKPOINT_LEN_2 0x3 diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 2774fa3..71ad0f9 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -7,13 +7,14 @@ #include <linux/linkage.h> #include <asm/memory.h> +#include <asm/pgtable-types.h> /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. */ #define KASAN_SHADOW_START (VA_START) -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) /* * This value is used to map an address to the corresponding shadow @@ -28,10 +29,12 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) void kasan_init(void); +void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a459714..5c6375d 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -79,5 +79,17 @@ #define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) #endif +/* + * To make optimal use of block mappings when laying out the linear + * mapping, round down the base of physical memory to a size that can + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE + * (64k granule), or a multiple that can be mapped using contiguous bits + * in the page tables: 32 * PMD_SIZE (16k granule) + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define ARM64_MEMSTART_ALIGN SZ_512M +#else +#define ARM64_MEMSTART_ALIGN SZ_1G +#endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d201d4b..0e391db 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_E2H (UL(1) << 34) #define HCR_ID (UL(1) << 33) #define HCR_CD (UL(1) << 32) #define HCR_RW_SHIFT 31 @@ -61,7 +62,7 @@ /* * The bits we set in HCR: - * RW: 64bit by default, can be overriden for 32bit VMs + * RW: 64bit by default, can be overridden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC * TVM: Trap VM ops (until M+C set in SCTLR_EL1) @@ -81,7 +82,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) - +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* Hyp System Control Register (SCTLR_EL2) bits */ #define SCTLR_EL2_EE (1 << 25) @@ -216,4 +217,7 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) +#define CPACR_EL1_FPEN (3 << 20) +#define CPACR_EL1_TTA (1 << 28) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 52b777b..226f49d 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -26,6 +26,8 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; @@ -35,9 +37,6 @@ extern char __kvm_hyp_init_end[]; extern char __kvm_hyp_vector[]; -#define __kvm_hyp_code_start __hyp_text_start -#define __kvm_hyp_code_end __hyp_text_end - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); @@ -45,9 +44,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); +extern void __init_stage2_translation(void); + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 779a587..40bc168 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -29,6 +29,7 @@ #include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> +#include <asm/virt.h> unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); @@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + if (is_kernel_in_hyp_mode()) + vcpu->arch.hcr_el2 |= HCR_E2H; if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; } @@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } +static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); +} + static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 689d4c9..227ed47 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include <linux/types.h> #include <linux/kvm_types.h> #include <asm/kvm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#include <asm/kvm_perf_event.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -36,10 +38,11 @@ #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h> +#include <kvm/arm_pmu.h> #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS -#define KVM_VCPU_MAX_FEATURES 3 +#define KVM_VCPU_MAX_FEATURES 4 int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -99,8 +102,8 @@ enum vcpu_sysreg { TTBR1_EL1, /* Translation Table Base Register 1 */ TCR_EL1, /* Translation Control Register */ ESR_EL1, /* Exception Syndrome Register */ - AFSR0_EL1, /* Auxilary Fault Status Register 0 */ - AFSR1_EL1, /* Auxilary Fault Status Register 1 */ + AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ + AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ FAR_EL1, /* Fault Address Register */ MAIR_EL1, /* Memory Attribute Indirection Register */ VBAR_EL1, /* Vector Base Address Register */ @@ -114,6 +117,21 @@ enum vcpu_sysreg { MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + /* Performance Monitors Registers */ + PMCR_EL0, /* Control Register */ + PMSELR_EL0, /* Event Counter Selection Register */ + PMEVCNTR0_EL0, /* Event Counter Register (0-30) */ + PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30, + PMCCNTR_EL0, /* Cycle Counter Register */ + PMEVTYPER0_EL0, /* Event Type Register (0-30) */ + PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30, + PMCCFILTR_EL0, /* Cycle Count Filter Register */ + PMCNTENSET_EL0, /* Count Enable Set Register */ + PMINTENSET_EL1, /* Interrupt Enable Set Register */ + PMOVSSET_EL0, /* Overflow Flag Status Set Register */ + PMSWINC_EL0, /* Software Increment Register */ + PMUSERENR_EL0, /* User Enable Register */ + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -211,6 +229,7 @@ struct kvm_vcpu_arch { /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; + struct kvm_pmu pmu; /* * Anything that is not used directly from assembly code goes @@ -307,7 +326,9 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -u64 kvm_call_hyp(void *hypfn, ...); +u64 __kvm_call_hyp(void *hypfn, ...); +#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) + void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); @@ -328,8 +349,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * Call initialization code, and switch to the full blown * HYP code. */ - kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, - hyp_stack_ptr, vector_ptr); + __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); } static inline void kvm_arch_hardware_disable(void) {} @@ -342,5 +363,18 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); + +/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */ + +static inline void __cpu_init_stage2(void) +{ + kvm_call_hyp(__init_stage2_translation); +} #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h new file mode 100644 index 0000000..a46b019 --- /dev/null +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HYP_H__ +#define __ARM64_KVM_HYP_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_perf_event.h> +#include <asm/sysreg.h> + +#define __hyp_text __section(.hyp.text) notrace + +static inline unsigned long __kern_hyp_va(unsigned long v) +{ + asm volatile(ALTERNATIVE("and %0, %0, %1", + "nop", + ARM64_HAS_VIRT_HOST_EXTN) + : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK)); + return v; +} + +#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) + +static inline unsigned long __hyp_kern_va(unsigned long v) +{ + u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET; + asm volatile(ALTERNATIVE("add %0, %0, %1", + "nop", + ARM64_HAS_VIRT_HOST_EXTN) + : "+r" (v) : "r" (offset)); + return v; +} + +#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v))) + +#define read_sysreg_elx(r,nvh,vh) \ + ({ \ + u64 reg; \ + asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ + "mrs_s %0, " __stringify(r##vh),\ + ARM64_HAS_VIRT_HOST_EXTN) \ + : "=r" (reg)); \ + reg; \ + }) + +#define write_sysreg_elx(v,r,nvh,vh) \ + do { \ + u64 __val = (u64)(v); \ + asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ + "msr_s " __stringify(r##vh) ", %x0",\ + ARM64_HAS_VIRT_HOST_EXTN) \ + : : "rZ" (__val)); \ + } while (0) + +/* + * Unified accessors for registers that have a different encoding + * between VHE and non-VHE. They must be specified without their "ELx" + * encoding. + */ +#define read_sysreg_el2(r) \ + ({ \ + u64 reg; \ + asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\ + "mrs %0, " __stringify(r##_EL1),\ + ARM64_HAS_VIRT_HOST_EXTN) \ + : "=r" (reg)); \ + reg; \ + }) + +#define write_sysreg_el2(v,r) \ + do { \ + u64 __val = (u64)(v); \ + asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\ + "msr " __stringify(r##_EL1) ", %x0",\ + ARM64_HAS_VIRT_HOST_EXTN) \ + : : "rZ" (__val)); \ + } while (0) + +#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) +#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) +#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) +#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) + +/* The VHE specific system registers and their encoding */ +#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0) +#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2) +#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0) +#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1) +#define tcr_EL12 sys_reg(3, 5, 2, 0, 2) +#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0) +#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1) +#define esr_EL12 sys_reg(3, 5, 5, 2, 0) +#define far_EL12 sys_reg(3, 5, 6, 0, 0) +#define mair_EL12 sys_reg(3, 5, 10, 2, 0) +#define amair_EL12 sys_reg(3, 5, 10, 3, 0) +#define vbar_EL12 sys_reg(3, 5, 12, 0, 0) +#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1) +#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0) +#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0) +#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1) +#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2) +#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0) +#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1) +#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2) +#define spsr_EL12 sys_reg(3, 5, 4, 0, 0) +#define elr_EL12 sys_reg(3, 5, 4, 0, 1) + +/** + * hyp_alternate_select - Generates patchable code sequences that are + * used to switch between two implementations of a function, depending + * on the availability of a feature. + * + * @fname: a symbol name that will be defined as a function returning a + * function pointer whose type will match @orig and @alt + * @orig: A pointer to the default function, as returned by @fname when + * @cond doesn't hold + * @alt: A pointer to the alternate function, as returned by @fname + * when @cond holds + * @cond: a CPU feature (as described in asm/cpufeature.h) + */ +#define hyp_alternate_select(fname, orig, alt, cond) \ +typeof(orig) * __hyp_text fname(void) \ +{ \ + typeof(alt) *val = orig; \ + asm volatile(ALTERNATIVE("nop \n", \ + "mov %0, %1 \n", \ + cond) \ + : "+r" (val) : "r" (alt)); \ + return val; \ +} + +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + +void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); +void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt); +void __sysreg32_save_state(struct kvm_vcpu *vcpu); +void __sysreg32_restore_state(struct kvm_vcpu *vcpu); + +void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); +void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); + +void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); +void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +bool __fpsimd_enabled(void); + +u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __noreturn __hyp_do_panic(unsigned long, ...); + +#endif /* __ARM64_KVM_HYP_H__ */ + diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7364339..22732a5 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -23,13 +23,16 @@ #include <asm/cpufeature.h> /* - * As we only have the TTBR0_EL2 register, we cannot express + * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express * "negative" addresses. This makes it impossible to directly share * mappings with the kernel. * * Instead, give the HYP mode its own VA region at a fixed offset from * the kernel by just masking the top bits (which are all ones for a * kernel address). + * + * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these + * macros (the entire kernel runs at EL2). */ #define HYP_PAGE_OFFSET_SHIFT VA_BITS #define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) @@ -56,12 +59,19 @@ #ifdef __ASSEMBLY__ +#include <asm/alternative.h> +#include <asm/cpufeature.h> + /* * Convert a kernel VA into a HYP VA. * reg: VA to be converted. */ .macro kern_hyp_va reg +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN and \reg, \reg, #HYP_PAGE_OFFSET_MASK +alternative_else + nop +alternative_endif .endm #else @@ -307,7 +317,7 @@ static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); - return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; + return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h new file mode 100644 index 0000000..c18fdeb --- /dev/null +++ b/arch/arm64/include/asm/kvm_perf_event.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_KVM_PERF_EVENT_H +#define __ASM_KVM_PERF_EVENT_H + +#define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +/* Determines which bit of PMCCNTR_EL0 generates an overflow */ +#define ARMV8_PMU_PMCR_LC (1 << 6) +#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMU_PMCR_N_MASK 0x1f +#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ + +#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ + +/* + * Event filters for PMUv3 + */ +#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30) +#define ARMV8_PMU_INCLUDE_EL2 (1 << 27) + +/* + * PMUSERENR: user enable reg + */ +#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ +#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ +#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ +#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ + +#endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3de42d6..23acc00 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -26,6 +26,7 @@ __asm__(".arch_extension lse"); /* Macro for constructing calls to out-of-line ll/sc atomics */ #define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n" +#define __LL_SC_CLOBBERS "x16", "x17", "x30" /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 853953c..12f8a00 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -24,6 +24,7 @@ #include <linux/compiler.h> #include <linux/const.h> #include <linux/types.h> +#include <asm/bug.h> #include <asm/sizes.h> /* @@ -45,15 +46,15 @@ * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. - * The module space lives between the addresses given by TASK_SIZE - * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) -#define MODULES_END (PAGE_OFFSET) -#define MODULES_VADDR (MODULES_END - SZ_64M) -#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define KIMAGE_VADDR (MODULES_END) +#define MODULES_END (MODULES_VADDR + MODULES_VSIZE) +#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) +#define MODULES_VSIZE (SZ_128M) +#define PCI_IO_END (PAGE_OFFSET - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) @@ -71,12 +72,27 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) /* + * The size of the KASAN shadow region. This should be 1/8th of the + * size of the entire kernel virtual address space. + */ +#ifdef CONFIG_KASAN +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#else +#define KASAN_SHADOW_SIZE (0) +#endif + +/* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. */ -#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET)) -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) +#define __virt_to_phys(x) ({ \ + phys_addr_t __x = (phys_addr_t)(x); \ + __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ + (__x - kimage_voffset); }) + +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) +#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* * Convert a page to/from a physical address @@ -100,19 +116,40 @@ #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 +#ifdef CONFIG_ARM64_4K_PAGES +#define IOREMAP_MAX_ORDER (PUD_SHIFT) +#else +#define IOREMAP_MAX_ORDER (PMD_SHIFT) +#endif + +#ifdef CONFIG_BLK_DEV_INITRD +#define __early_init_dt_declare_initrd(__start, __end) \ + do { \ + initrd_start = (__start); \ + initrd_end = (__end); \ + } while (0) +#endif + #ifndef __ASSEMBLY__ -extern phys_addr_t memstart_addr; +#include <linux/bitops.h> +#include <linux/mmdebug.h> + +extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ -#define PHYS_OFFSET ({ memstart_addr; }) +#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) + +/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +extern u64 kimage_vaddr; + +/* the offset between the kernel virtual and physical mappings */ +extern u64 kimage_voffset; /* - * The maximum physical address that the linear direct mapping - * of system RAM can cover. (PAGE_OFFSET can be interpreted as - * a 2's complement signed quantity and negated to derive the - * maximum size of the linear mapping.) + * Allow all memory at the discovery stage. We will clip it later. */ -#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) +#define MIN_MEMBLOCK_ADDR 0 +#define MAX_MEMBLOCK_ADDR U64_MAX /* * PFNs are used to describe any physical page; this means diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 2416578..b1892a0 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,6 +27,7 @@ #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> +#include <asm/tlbflush.h> #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) @@ -48,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" @@ -73,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void) /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ -static inline void cpu_set_default_tcr_t0sz(void) +static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr; @@ -86,7 +87,62 @@ static inline void cpu_set_default_tcr_t0sz(void) " msr tcr_el1, %0 ;" " isb" : "=&r" (tcr) - : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); +} + +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) + +/* + * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. + * + * The idmap lives in the same VA range as userspace, but uses global entries + * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from + * speculative TLB fetches, we must temporarily install the reserved page + * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ. + * + * If current is a not a user task, the mm covers the TTBR1_EL1 page tables, + * which should not be installed in TTBR0_EL1. In this case we can leave the + * reserved page tables in place. + */ +static inline void cpu_uninstall_idmap(void) +{ + struct mm_struct *mm = current->active_mm; + + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); +} + +static inline void cpu_install_idmap(void) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); + + cpu_switch_mm(idmap_pg_dir, &init_mm); +} + +/* + * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, + * avoiding the possibility of conflicting TLB entries being allocated. + */ +static inline void cpu_replace_ttbr1(pgd_t *pgd) +{ + typedef void (ttbr_replace_func)(phys_addr_t); + extern ttbr_replace_func idmap_cpu_replace_ttbr1; + ttbr_replace_func *replace_phys; + + phys_addr_t pgd_phys = virt_to_phys(pgd); + + replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + + cpu_install_idmap(); + replace_phys(pgd_phys); + cpu_uninstall_idmap(); } /* @@ -147,4 +203,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev,next) switch_mm(prev, next, NULL) +void verify_cpu_asid_bits(void); + #endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index e80e232..e12af67 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -20,4 +20,21 @@ #define MODULE_ARCH_VERMAGIC "aarch64" +#ifdef CONFIG_ARM64_MODULE_PLTS +struct mod_arch_specific { + struct elf64_shdr *plt; + int plt_num_entries; + int plt_max_entries; +}; +#endif + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym); + +#ifdef CONFIG_RANDOMIZE_BASE +extern u64 module_alloc_base; +#else +#define module_alloc_base ((u64)_etext - MODULES_VSIZE) +#endif + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index b008a72..b9a7ba9 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -7,8 +7,6 @@ #include <linux/dma-mapping.h> #include <asm/io.h> -#include <asm-generic/pci-bridge.h> -#include <asm-generic/pci-dma-compat.h> #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0 diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index c150539..ff98585 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) { - set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); + set_pud(pud, __pud(pmd | prot)); } +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); +} +#else +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) free_page((unsigned long)pud); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) { - set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); + set_pgd(pgdp, __pgd(pud | prot)); } +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); +} +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h new file mode 100644 index 0000000..29fcb33 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_PROT_H +#define __ASM_PGTABLE_PROT_H + +#include <asm/memory.h> +#include <asm/pgtable-hwdef.h> + +#include <linux/const.h> + +/* + * Software defined PTE bits definition. + */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ +#define PTE_DIRTY (_AT(pteval_t, 1) << 55) +#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ + +#ifndef __ASSEMBLY__ + +#include <asm/pgtable-types.h> + +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) + +#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) + +#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) + +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f506086..989fef1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -21,43 +21,31 @@ #include <asm/memory.h> #include <asm/pgtable-hwdef.h> - -/* - * Software defined PTE bits definition. - */ -#define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ -#define PTE_DIRTY (_AT(pteval_t, 1) << 55) -#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +#include <asm/pgtable-prot.h> /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). - * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_START: beginning of the kernel vmalloc space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ -#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE) - -#ifndef CONFIG_KASAN -#define VMALLOC_START (VA_START) -#else -#include <asm/kasan.h> -#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) -#endif +#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) +#define VMALLOC_START (MODULES_END) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define VMEMMAP_START (VMALLOC_END + SZ_64K) -#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) +#define vmemmap ((struct page *)VMEMMAP_START - \ + SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ +#include <asm/fixmap.h> #include <linux/mmdebug.h> extern void __pte_error(const char *file, int line, unsigned long val); @@ -65,65 +53,12 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) - -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) - -#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) - -#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) - -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) - -#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) -#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) - -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) - -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) @@ -135,16 +70,6 @@ extern struct page *empty_zero_page; #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -/* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - /* * The following only work if pte_present(). Undefined behaviour otherwise. */ @@ -278,7 +203,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid(pte)) { + if (pte_present(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else @@ -411,7 +336,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#ifdef CONFIG_ARM64_64K_PAGES +#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 #define pud_sect(pud) (0) #define pud_table(pud) (1) #else @@ -433,13 +358,31 @@ static inline void pmd_clear(pmd_t *pmdp) set_pmd(pmdp, __pmd(0)); } -static inline pte_t *pmd_page_vaddr(pmd_t pmd) +static inline phys_addr_t pmd_page_paddr(pmd_t pmd) { - return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); + return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) +#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) +#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) +#define pte_clear_fixmap() clear_fixmap(FIX_PTE) + #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -466,21 +409,37 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -static inline pmd_t *pud_page_vaddr(pud_t pud) +static inline phys_addr_t pud_page_paddr(pud_t pud) { - return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); + return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} +#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) + +#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) +#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) +#define pmd_clear_fixmap() clear_fixmap(FIX_PMD) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) + +#else + +#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) + +/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ +#define pmd_set_fixmap(addr) NULL +#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) +#define pmd_clear_fixmap() + +#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -502,21 +461,37 @@ static inline void pgd_clear(pgd_t *pgdp) set_pgd(pgdp, __pgd(0)); } -static inline pud_t *pgd_page_vaddr(pgd_t pgd) +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) { - return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); + return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) -{ - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); -} +#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) + +#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) +#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) +#define pud_clear_fixmap() clear_fixmap(FIX_PUD) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) + +#else + +#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) + +/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ +#define pud_set_fixmap(addr) NULL +#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) +#define pud_clear_fixmap() + +#define pud_offset_kimg(dir,addr) ((pud_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -524,11 +499,16 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) +#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) + +#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) +#define pgd_clear_fixmap() clear_fixmap(FIX_PGD) + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | @@ -648,6 +628,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * bits 0-1: present (must be zero) * bits 2-7: swap type * bits 8-57: swap offset + * bit 58: PTE_PROT_NONE (must be zero) */ #define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4acb7ca..cef1cf3 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -29,8 +29,10 @@ #include <linux/string.h> +#include <asm/alternative.h> #include <asm/fpsimd.h> #include <asm/hw_breakpoint.h> +#include <asm/lse.h> #include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/types.h> @@ -177,9 +179,11 @@ static inline void prefetchw(const void *ptr) } #define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *x) +static inline void spin_lock_prefetch(const void *ptr) { - prefetchw(x); + asm volatile(ARM64_LSE_ATOMIC_INSN( + "prfm pstl1strm, %a0", + "nop") : : "p" (ptr)); } #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -187,5 +191,6 @@ static inline void spin_lock_prefetch(const void *x) #endif void cpu_enable_pan(void *__unused); +void cpu_enable_uao(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e9e5467..a307eb6 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -58,6 +58,7 @@ #define COMPAT_PSR_Z_BIT 0x40000000 #define COMPAT_PSR_N_BIT 0x80000000 #define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ +#define COMPAT_PSR_GE_MASK 0x000f0000 #ifdef CONFIG_CPU_BIG_ENDIAN #define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT @@ -151,35 +152,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->regs[0]; } -/* - * Are the current registers suitable for user mode? (used to maintain - * security in signal handlers) - */ -static inline int valid_user_regs(struct user_pt_regs *regs) -{ - if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) { - regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT); - - /* The T bit is reserved for AArch64 */ - if (!(regs->pstate & PSR_MODE32_BIT)) - regs->pstate &= ~COMPAT_PSR_T_BIT; - - return 1; - } - - /* - * Force PSR to something logical... - */ - regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \ - COMPAT_PSR_T_BIT | PSR_MODE32_BIT; - - if (!(regs->pstate & PSR_MODE32_BIT)) { - regs->pstate &= ~COMPAT_PSR_T_BIT; - regs->pstate |= PSR_MODE_EL0t; - } - - return 0; -} +/* We must avoid circular header include via sched.h */ +struct task_struct; +int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); #define instruction_pointer(regs) ((unsigned long)(regs)->pc) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d9c3d6a..817a067 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -16,6 +16,19 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +/* Values for secondary_data.status */ + +#define CPU_MMU_OFF (-1) +#define CPU_BOOT_SUCCESS (0) +/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */ +#define CPU_KILL_ME (1) +/* The cpu couldn't die gracefully and is looping in the kernel */ +#define CPU_STUCK_IN_KERNEL (2) +/* Fatal system error detected by secondary CPU, crash the system */ +#define CPU_PANIC_KERNEL (3) + +#ifndef __ASSEMBLY__ + #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> @@ -54,19 +67,52 @@ asmlinkage void secondary_start_kernel(void); /* * Initial data for bringing up a secondary CPU. + * @stack - sp for the secondary CPU + * @status - Result passed back from the secondary CPU to + * indicate failure. */ struct secondary_data { void *stack; + long status; }; + extern struct secondary_data secondary_data; +extern long __early_cpu_boot_status; extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +#else +static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + BUILD_BUG(); +} +#endif + extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void cpu_die(void); +extern void cpu_die_early(void); + +static inline void cpu_park_loop(void) +{ + for (;;) { + wfe(); + wfi(); + } +} + +static inline void update_cpu_boot_status(int val) +{ + WRITE_ONCE(secondary_data.status, val); + /* Ensure the visibility of the status update */ + dsb(ishst); +} + +#endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4aeebec..1a78d6e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -72,15 +72,19 @@ #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ + (!!x)<<8 | 0x1f) /* SCTLR_EL1 */ #define SCTLR_EL1_CP15BEN (0x1 << 5) @@ -137,6 +141,9 @@ #define ID_AA64MMFR1_VMIDBITS_SHIFT 4 #define ID_AA64MMFR1_HADBS_SHIFT 0 +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_UAO_SHIFT 4 + /* id_aa64dfr0 */ #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 @@ -196,16 +203,16 @@ #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ __reg_num_x\num, \num + .equ .L__reg_num_x\num, \num .endr - .equ __reg_num_xzr, 31 + .equ .L__reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5200000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5000000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt) .endm #else @@ -214,16 +221,16 @@ asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ __reg_num_x\\num, \\num\n" +" .equ .L__reg_num_x\\num, \\num\n" " .endr\n" -" .equ __reg_num_xzr, 31\n" +" .equ .L__reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" ); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index b2ede967..0685d74 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -36,11 +36,11 @@ #define VERIFY_WRITE 1 /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -50,9 +50,11 @@ struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; +#define ARCH_HAS_RELATIVE_EXTABLE + extern int fixup_exception(struct pt_regs *regs); #define KERNEL_DS (-1UL) @@ -64,6 +66,16 @@ extern int fixup_exception(struct pt_regs *regs); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Enable/disable UAO so that copy_to_user() etc can access + * kernel memory with the unprivileged instructions. + */ + if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); } #define segment_eq(a, b) ((a) == (b)) @@ -105,6 +117,12 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs +#define _ASM_EXTABLE(from, to) \ + " .pushsection __ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + /* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" @@ -113,9 +131,10 @@ static inline void set_fs(mm_segment_t fs) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_user_asm(instr, reg, x, addr, err) \ +#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup, \"ax\"\n" \ " .align 2\n" \ @@ -123,10 +142,7 @@ static inline void set_fs(mm_segment_t fs) " mov %1, #0\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) @@ -134,26 +150,30 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) @@ -181,19 +201,17 @@ do { \ ((x) = 0, -EFAULT); \ }) -#define __put_user_asm(instr, reg, x, addr, err) \ +#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %w0, %3\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) @@ -201,25 +219,29 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __put_user_asm("str", "%", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7a5df52..9f22dd6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -23,6 +23,8 @@ #ifndef __ASSEMBLY__ +#include <asm/ptrace.h> + /* * __boot_cpu_mode records what mode CPUs were booted in. * A correctly-implemented bootloader must start all CPUs in the same mode: @@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +static inline bool is_kernel_in_hyp_mode(void) +{ + u64 el; + + asm("mrs %0, CurrentEL" : "=r" (el)); + return el == CurrentEL_EL2; +} + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index aab5bf0..2b79b8a 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -16,6 +16,8 @@ #ifndef __ASM_WORD_AT_A_TIME_H #define __ASM_WORD_AT_A_TIME_H +#include <asm/uaccess.h> + #ifndef __AARCH64EB__ #include <linux/kernel.h> @@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif " b 2b\n" " .popsection\n" - " .pushsection __ex_table,\"a\"\n" - " .align 3\n" - " .quad 1b, 3b\n" - " .popsection" + _ASM_EXTABLE(1b, 3b) : "=&r" (ret), "=&r" (offset) : "r" (addr), "Q" (*(unsigned long *)addr)); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 361c8a8..a739287 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -28,5 +28,7 @@ #define HWCAP_SHA2 (1 << 6) #define HWCAP_CRC32 (1 << 7) #define HWCAP_ATOMICS (1 << 8) +#define HWCAP_FPHP (1 << 9) +#define HWCAP_ASIMDHP (1 << 10) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 2d4ca4b..f209ea1 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -94,6 +94,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ +#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ struct kvm_vcpu_init { __u32 target; @@ -204,6 +205,11 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 208db3d..b5c3933 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -45,6 +45,7 @@ #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 #define PSR_PAN_BIT 0x00400000 +#define PSR_UAO_BIT 0x00800000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 83cd7e6..3793003 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o @@ -41,7 +42,9 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o +arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c new file mode 100644 index 0000000..a32b401 --- /dev/null +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -0,0 +1,141 @@ +/* + * ARM64 ACPI Parking Protocol implementation + * + * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/acpi.h> +#include <linux/types.h> + +#include <asm/cpu_ops.h> + +struct parking_protocol_mailbox { + __le32 cpu_id; + __le32 reserved; + __le64 entry_point; +}; + +struct cpu_mailbox_entry { + struct parking_protocol_mailbox __iomem *mailbox; + phys_addr_t mailbox_addr; + u8 version; + u8 gic_cpu_id; +}; + +static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS]; + +void __init acpi_set_mailbox_entry(int cpu, + struct acpi_madt_generic_interrupt *p) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + cpu_entry->mailbox_addr = p->parked_address; + cpu_entry->version = p->parking_version; + cpu_entry->gic_cpu_id = p->cpu_interface_number; +} + +bool acpi_parking_protocol_valid(int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + return cpu_entry->mailbox_addr && cpu_entry->version; +} + +static int acpi_parking_protocol_cpu_init(unsigned int cpu) +{ + pr_debug("%s: ACPI parked addr=%llx\n", __func__, + cpu_mailbox_entries[cpu].mailbox_addr); + + return 0; +} + +static int acpi_parking_protocol_cpu_prepare(unsigned int cpu) +{ + return 0; +} + +static int acpi_parking_protocol_cpu_boot(unsigned int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le32 cpu_id; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return -EIO; + + cpu_id = readl_relaxed(&mailbox->cpu_id); + /* + * Check if firmware has set-up the mailbox entry properly + * before kickstarting the respective cpu. + */ + if (cpu_id != ~0U) { + iounmap(mailbox); + return -ENXIO; + } + + /* + * stash the mailbox address mapping to use it for further FW + * checks in the postboot method + */ + cpu_entry->mailbox = mailbox; + + /* + * We write the entry point and cpu id as LE regardless of the + * native endianness of the kernel. Therefore, any boot-loaders + * that read this address need to convert this address to the + * Boot-Loader's endianness before jumping. + */ + writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point); + writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); + + arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + + return 0; +} + +static void acpi_parking_protocol_cpu_postboot(void) +{ + int cpu = smp_processor_id(); + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; + __le64 entry_point; + + entry_point = readl_relaxed(&mailbox->entry_point); + /* + * Check if firmware has cleared the entry_point as expected + * by the protocol specification. + */ + WARN_ON(entry_point); +} + +const struct cpu_operations acpi_parking_protocol_ops = { + .name = "parking-protocol", + .cpu_init = acpi_parking_protocol_cpu_init, + .cpu_prepare = acpi_parking_protocol_cpu_prepare, + .cpu_boot = acpi_parking_protocol_cpu_boot, + .cpu_postboot = acpi_parking_protocol_cpu_postboot +}; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 3e01207..c37202c 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 0b, 4b\n" \ - " .quad 1b, 4b\n" \ - " .popsection\n" \ + _ASM_EXTABLE(0b, 4b) \ + _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index fffa4ac6..3ae6b31 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -104,15 +104,14 @@ int main(void) DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); BLANK(); + DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); - DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); - DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); - DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); #endif #ifdef CONFIG_CPU_PM diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index feb6b4e..06afd04 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -21,24 +21,12 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> -#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) - -#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ - MIDR_ARCHITECTURE_MASK) - static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry) { - u32 midr = read_cpuid_id(); - - if ((midr & CPU_MODEL_MASK) != entry->midr_model) - return false; - - midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; - - return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); + return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, + entry->midr_range_min, + entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ @@ -100,6 +88,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_27456 + { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 1), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index b6bd7d4..c7cfb8f 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -25,19 +25,30 @@ #include <asm/smp_plat.h> extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; -static const struct cpu_operations *supported_cpu_ops[] __initconst = { +static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; +static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + &acpi_parking_protocol_ops, +#endif + &cpu_psci_ops, + NULL, +}; + static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops = supported_cpu_ops; + const struct cpu_operations **ops; + + ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; while (*ops) { if (!strcmp(name, (*ops)->name)) @@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu) } } else { enable_method = acpi_get_enable_method(cpu); - if (!enable_method) - pr_err("Unsupported ACPI enable-method\n"); + if (!enable_method) { + /* + * In ACPI systems the boot CPU does not require + * checking the enable method since for some + * boot protocol (ie parking protocol) it need not + * be initialized. Don't warn spuriously. + */ + if (cpu != 0) + pr_err("Unsupported ACPI enable-method\n"); + } } return enable_method; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5c90aa4..943f514 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -24,8 +24,10 @@ #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> +#include <asm/mmu_context.h> #include <asm/processor.h> #include <asm/sysreg.h> +#include <asm/virt.h> unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -54,19 +56,23 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); .safe_val = SAFE_VAL, \ } -/* Define a feature with signed values */ +/* Define a feature with unsigned values */ #define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) - -/* Define a feature with unsigned value */ -#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) +/* Define a feature with a signed value */ +#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + #define ARM64_FTR_END \ { \ .width = 0, \ } +/* meta feature for alternatives */ +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), @@ -84,8 +90,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), /* Linux doesn't care about the EL3 */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), @@ -96,8 +102,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ @@ -108,7 +114,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ - U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -123,29 +129,34 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static struct arm64_ftr_bits ftr_ctr[] = { - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine */ - U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; static struct arm64_ftr_bits ftr_id_mmfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */ + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */ + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ ARM64_FTR_END, @@ -153,12 +164,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, }; @@ -204,6 +215,18 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_dfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -249,7 +272,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), @@ -284,6 +307,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), @@ -408,6 +432,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); @@ -517,6 +542,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); /* * EL3 is not our concern. @@ -592,7 +619,7 @@ u64 read_system_reg(u32 id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos); + int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); return val >= entry->min_field_value; } @@ -621,6 +648,23 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + u32 rv_min, rv_max; + + /* Cavium ThunderX pass 1.x and 2.x */ + rv_min = 0; + rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK; + + return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); +} + +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +{ + return is_kernel_in_hyp_mode(); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -628,6 +672,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PAN @@ -637,6 +682,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 1, .enable = cpu_enable_pan, }, @@ -648,38 +694,69 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + { + .desc = "Software prefetching using PRFM", + .capability = ARM64_HAS_NO_HW_PREFETCH, + .matches = has_no_hw_prefetch, + }, +#ifdef CONFIG_ARM64_UAO + { + .desc = "User Access Override", + .capability = ARM64_HAS_UAO, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .field_pos = ID_AA64MMFR2_UAO_SHIFT, + .min_field_value = 1, + .enable = cpu_enable_uao, + }, +#endif /* CONFIG_ARM64_UAO */ +#ifdef CONFIG_ARM64_PAN + { + .capability = ARM64_ALT_PAN_NOT_UAO, + .matches = cpufeature_pan_not_uao, + }, +#endif /* CONFIG_ARM64_PAN */ + { + .desc = "Virtualization Host Extensions", + .capability = ARM64_HAS_VIRT_HOST_EXTN, + .matches = runs_at_el2, + }, {}, }; -#define HWCAP_CAP(reg, field, min_value, type, cap) \ +#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ { \ .desc = #cap, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .sign = s, \ .min_field_value = min_value, \ .hwcap_type = type, \ .hwcap = cap, \ } static const struct arm64_cpu_capabilities arm64_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), #ifdef CONFIG_COMPAT - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; @@ -734,7 +811,7 @@ static void __init setup_cpu_hwcaps(void) int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - for (i = 0; hwcaps[i].desc; i++) + for (i = 0; hwcaps[i].matches; i++) if (hwcaps[i].matches(&hwcaps[i])) cap_set_hwcap(&hwcaps[i]); } @@ -744,11 +821,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, { int i; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!caps[i].matches(&caps[i])) continue; - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) && caps[i].desc) pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } @@ -763,13 +840,11 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; - for (i = 0; caps[i].desc; i++) + for (i = 0; caps[i].matches; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) on_each_cpu(caps[i].enable, NULL, true); } -#ifdef CONFIG_HOTPLUG_CPU - /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -791,35 +866,36 @@ static inline void set_sys_caps_initialised(void) static u64 __raw_read_system_reg(u32 sys_id) { switch (sys_id) { - case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); - - case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); - - case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); default: BUG(); return 0; @@ -827,25 +903,12 @@ static u64 __raw_read_system_reg(u32 sys_id) } /* - * Park the CPU which doesn't have the capability as advertised - * by the system. + * Check for CPU features that are used in early boot + * based on the Boot CPU value. */ -static void fail_incapable_cpu(char *cap_type, - const struct arm64_cpu_capabilities *cap) +static void check_early_cpu_features(void) { - int cpu = smp_processor_id(); - - pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc); - /* Mark this CPU absent */ - set_cpu_present(cpu, 0); - - /* Check if we can park ourselves */ - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) - cpu_ops[cpu]->cpu_die(cpu); - asm( - "1: wfe\n" - " wfi\n" - " b 1b"); + verify_cpu_asid_bits(); } /* @@ -861,6 +924,8 @@ void verify_local_cpu_capabilities(void) int i; const struct arm64_cpu_capabilities *caps; + check_early_cpu_features(); + /* * If we haven't computed the system capabilities, there is nothing * to verify. @@ -869,35 +934,33 @@ void verify_local_cpu_capabilities(void) return; caps = arm64_features; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) continue; /* * If the new CPU misses an advertised feature, we cannot proceed * further, park the cpu. */ - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) - fail_incapable_cpu("arm64_features", &caps[i]); + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { + pr_crit("CPU%d: missing feature: %s\n", + smp_processor_id(), caps[i].desc); + cpu_die_early(); + } if (caps[i].enable) caps[i].enable(NULL); } - for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { if (!cpus_have_hwcap(&caps[i])) continue; - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) - fail_incapable_cpu("arm64_hwcaps", &caps[i]); + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { + pr_crit("CPU%d: missing HWCAP: %s\n", + smp_processor_id(), caps[i].desc); + cpu_die_early(); + } } } -#else /* !CONFIG_HOTPLUG_CPU */ - -static inline void set_sys_caps_initialised(void) -{ -} - -#endif /* CONFIG_HOTPLUG_CPU */ - static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); @@ -928,3 +991,9 @@ void __init setup_cpu_features(void) pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", L1_CACHE_BYTES, cls); } + +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +{ + return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 212ae63..84c8684 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -59,6 +59,8 @@ static const char *const hwcap_str[] = { "sha2", "crc32", "atomics", + "fphp", + "asimdhp", NULL }; @@ -210,6 +212,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c536c9e..c45f296 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -34,7 +34,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } @@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); -static DEFINE_RWLOCK(step_hook_lock); +static DEFINE_SPINLOCK(step_hook_lock); void register_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_add(&hook->node, &step_hook); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_add_rcu(&hook->node, &step_hook); + spin_unlock(&step_hook_lock); } void unregister_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_del(&hook->node); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_del_rcu(&hook->node); + spin_unlock(&step_hook_lock); + synchronize_rcu(); } /* @@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) struct step_hook *hook; int retval = DBG_HOOK_ERROR; - read_lock(&step_hook_lock); + rcu_read_lock(); - list_for_each_entry(hook, &step_hook, node) { + list_for_each_entry_rcu(hook, &step_hook, node) { retval = hook->fn(regs, esr); if (retval == DBG_HOOK_HANDLED) break; } - read_unlock(&step_hook_lock); + rcu_read_unlock(); return retval; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index a773db9..cae3112 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -35,6 +35,7 @@ ENTRY(entry) * for image_addr variable passed to efi_entry(). */ stp x29, x30, [sp, #-32]! + mov x29, sp /* * Call efi_entry to do the real work. @@ -61,7 +62,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - ldr x21, =stext_offset + movz x21, #:abs_g0:stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index acc1afd..975b274 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -45,7 +45,7 @@ * been used to perform kernel mode NEON in the meantime. * * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to - * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * the id of the current CPU every time the state is loaded onto a CPU. For (b), * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the * address of the userland FPSIMD state of the task that was loaded onto the CPU * the most recently, or NULL if kernel mode NEON has been performed after that. diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 917d981..6ebd204 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,11 +29,14 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/elf.h> #include <asm/kernel-pgtable.h> +#include <asm/kvm_arm.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/smp.h> #include <asm/sysreg.h> #include <asm/thread_info.h> #include <asm/virt.h> @@ -67,12 +70,11 @@ * in the entry routines. */ __HEAD - +_head: /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ #ifdef CONFIG_EFI -efi_head: /* * This add instruction has no meaningful effect except that * its opcode forms the magic "MZ" signature required by UEFI. @@ -83,9 +85,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad _kernel_offset_le // Image load offset from start of RAM, little-endian - .quad _kernel_size_le // Effective size of kernel image, little-endian - .quad _kernel_flags_le // Informative flags, little-endian + le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + le64sym _kernel_size_le // Effective size of kernel image, little-endian + le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved @@ -94,14 +96,14 @@ efi_head: .byte 0x4d .byte 0x64 #ifdef CONFIG_EFI - .long pe_header - efi_head // Offset to the PE header. + .long pe_header - _head // Offset to the PE header. #else .word 0 // reserved #endif #ifdef CONFIG_EFI .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - efi_head + .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -124,7 +126,7 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_entry - _head // AddressOfEntryPoint .long __efistub_stext_offset // BaseOfCode extra_header_fields: @@ -139,7 +141,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _end - efi_head // SizeOfImage + .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header .long __efistub_stext_offset // SizeOfHeaders @@ -210,6 +212,7 @@ section_table: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode + mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 @@ -219,11 +222,13 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, =__mmap_switched // address to jump to after + ldr x27, 0f // address to jump to after // MMU has been enabled adr_l lr, __enable_mmu // return (PIC) address b __cpu_setup // initialise processor ENDPROC(stext) + .align 3 +0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -311,7 +316,7 @@ ENDPROC(preserve_boot_args) __create_page_tables: adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir - mov x27, lr + mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential @@ -389,9 +394,11 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET + ldr x5, =KIMAGE_VADDR + add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END // __va(KERNEL_END) + ldr w6, kernel_img_size + add x6, x6, x5 mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -405,9 +412,11 @@ __create_page_tables: dmb sy bl __inval_cache_range - mov lr, x27 - ret + ret x28 ENDPROC(__create_page_tables) + +kernel_img_size: + .long _end - (_head - TEXT_OFFSET) .ltorg /* @@ -415,23 +424,81 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: + mov x28, lr // preserve LR + adr_l x8, vectors // load VBAR_EL1 with virtual + msr vbar_el1, x8 // vector table address + isb + // Clear BSS adr_l x0, __bss_start mov x1, xzr adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + dsb ishst // Make zero page visible to PTW + +#ifdef CONFIG_RELOCATABLE + + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + adr_l x8, __dynsym_start // start of symbol table + adr_l x9, __reloc_start // start of reloc table + adr_l x10, __reloc_end // end of reloc table + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr + dc cvac, x8 // value visible to secondaries + dsb sy // with MMU off +#endif adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - str_l x24, memstart_addr, x6 // Save PHYS_OFFSET + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x24 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif +#ifdef CONFIG_RANDOMIZE_BASE + cbnz x23, 0f // already running randomized? + mov x0, x21 // pass FDT address in x0 + bl kaslr_early_init // parse FDT for KASLR options + cbz x0, 0f // KASLR disabled? just proceed + mov x23, x0 // record KASLR offset + ret x28 // we must enable KASLR, return + // to __enable_mmu() +0: +#endif b start_kernel ENDPROC(__mmap_switched) @@ -440,6 +507,10 @@ ENDPROC(__mmap_switched) * hotplug and needs to have the same protections as the text region */ .section ".text","ax" + +ENTRY(kimage_vaddr) + .quad _text - TEXT_OFFSET + /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -464,9 +535,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 isb ret +2: +#ifdef CONFIG_ARM64_VHE + /* + * Check for VHE being present. For the rest of the EL2 setup, + * x2 being non-zero indicates that we do have VHE, and that the + * kernel is intended to run at EL2. + */ + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #8, #4 +#else + mov x2, xzr +#endif + /* Hyp configuration. */ -2: mov x0, #(1 << 31) // 64-bit EL1 + mov x0, #HCR_RW // 64-bit EL1 + cbz x2, set_hcr + orr x0, x0, #HCR_TGE // Enable Host Extensions + orr x0, x0, #HCR_E2H +set_hcr: msr hcr_el2, x0 + isb /* Generic timers. */ mrs x0, cnthctl_el2 @@ -526,6 +615,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems /* Stage-2 translation */ msr vttbr_el2, xzr + cbz x2, install_el2_stub + + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + isb + ret + +install_el2_stub: /* Hypervisor stub */ adrp x0, __hyp_stub_vectors add x0, x0, #:lo12:__hyp_stub_vectors @@ -605,13 +701,20 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x21, =secondary_data - ldr x27, =__secondary_switched // address to jump to after enabling the MMU + ldr x8, kimage_vaddr + ldr w9, 0f + sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) +0: .long (_text - TEXT_OFFSET) - __secondary_switched ENTRY(__secondary_switched) - ldr x0, [x21] // get secondary_data.stack + adr_l x5, vectors + msr vbar_el1, x5 + isb + + adr_l x0, secondary_data + ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack mov sp, x0 and x0, x0, #~(THREAD_SIZE - 1) msr sp_el0, x0 // save thread_info @@ -620,6 +723,29 @@ ENTRY(__secondary_switched) ENDPROC(__secondary_switched) /* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + * + * update_early_cpu_boot_status tmp, status + * - Corrupts tmp1, tmp2 + * - Writes 'status' to __early_cpu_boot_status and makes sure + * it is committed to memory. + */ + + .macro update_early_cpu_boot_status status, tmp1, tmp2 + mov \tmp2, #\status + str_l \tmp2, __early_cpu_boot_status, \tmp1 + dmb sy + dc ivac, \tmp1 // Invalidate potentially stale cache line + .endm + + .pushsection .data..cacheline_aligned + .align L1_CACHE_SHIFT +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection + +/* * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. @@ -632,12 +758,12 @@ ENDPROC(__secondary_switched) */ .section ".idmap.text", "ax" __enable_mmu: + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - ldr x5, =vectors - msr vbar_el1, x5 + update_early_cpu_boot_status 0, x1, x2 msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb @@ -651,10 +777,33 @@ __enable_mmu: ic iallu dsb nsh isb +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + blr x27 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x18 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic ialluis // flush instructions fetched + isb // via old mapping + add x27, x27, x23 // relocated __mmap_switched +#endif br x27 ENDPROC(__enable_mmu) __no_granule_support: + /* Indicate that this CPU can't boot and is stuck in the kernel */ + update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2 +1: wfe - b __no_granule_support + wfi + b 1b ENDPROC(__no_granule_support) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 352f7ab..5e360ce 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -26,31 +26,40 @@ * There aren't any ELF relocations we can use to endian-swap values known only * at link time (e.g. the subtraction of two symbol addresses), so we must get * the linker to endian-swap certain values before emitting them. + * + * Note that, in order for this to work when building the ELF64 PIE executable + * (for KASLR), these values should not be referenced via R_AARCH64_ABS64 + * relocations, since these are fixed up at runtime rather than at build time + * when PIE is in effect. So we need to split them up in 32-bit high and low + * words. */ #ifdef CONFIG_CPU_BIG_ENDIAN -#define DATA_LE64(data) \ - ((((data) & 0x00000000000000ff) << 56) | \ - (((data) & 0x000000000000ff00) << 40) | \ - (((data) & 0x0000000000ff0000) << 24) | \ - (((data) & 0x00000000ff000000) << 8) | \ - (((data) & 0x000000ff00000000) >> 8) | \ - (((data) & 0x0000ff0000000000) >> 24) | \ - (((data) & 0x00ff000000000000) >> 40) | \ - (((data) & 0xff00000000000000) >> 56)) +#define DATA_LE32(data) \ + ((((data) & 0x000000ff) << 24) | \ + (((data) & 0x0000ff00) << 8) | \ + (((data) & 0x00ff0000) >> 8) | \ + (((data) & 0xff000000) >> 24)) #else -#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#define DATA_LE32(data) ((data) & 0xffffffff) #endif +#define DEFINE_IMAGE_LE64(sym, data) \ + sym##_lo32 = DATA_LE32((data) & 0xffffffff); \ + sym##_hi32 = DATA_LE32((data) >> 32) + #ifdef CONFIG_CPU_BIG_ENDIAN -#define __HEAD_FLAG_BE 1 +#define __HEAD_FLAG_BE 1 #else -#define __HEAD_FLAG_BE 0 +#define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) + +#define __HEAD_FLAG_PHYS_BASE 1 -#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ - (__HEAD_FLAG_PAGE_SIZE << 1)) +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1) | \ + (__HEAD_FLAG_PHYS_BASE << 3)) /* * These will output as part of the Image header, which should be little-endian @@ -58,9 +67,9 @@ * endian swapped in head.S, all are done here for consistency. */ #define HEAD_SYMBOLS \ - _kernel_size_le = DATA_LE64(_end - _text); \ - _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ - _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ + DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ + DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #ifdef CONFIG_EFI diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c new file mode 100644 index 0000000..5829839 --- /dev/null +++ b/arch/arm64/kernel/kaslr.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/mm_types.h> +#include <linux/sched.h> +#include <linux/types.h> + +#include <asm/fixmap.h> +#include <asm/kernel-pgtable.h> +#include <asm/memory.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/sections.h> + +u64 __read_mostly module_alloc_base; +u16 __initdata memstart_offset_seed; + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + u64 *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init const u8 *get_cmdline(void *fdt) +{ + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + return prop; + } +out: + return default_cmdline; +} + +extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, + pgprot_t prot); + +/* + * This routine will be executed with the kernel mapped at its default virtual + * address, and if it returns successfully, the kernel will be remapped, and + * start_kernel() will be executed from a randomized virtual offset. The + * relocation will result in all absolute references (e.g., static variables + * containing function pointers) to be reinitialized, and zero-initialized + * .bss variables will be reset to 0. + */ +u64 __init kaslr_early_init(u64 dt_phys) +{ + void *fdt; + u64 seed, offset, mask, module_range; + const u8 *cmdline, *str; + int size; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; + + /* + * Try to map the FDT early. If this fails, we simply bail, + * and proceed with KASLR disabled. We will make another + * attempt at mapping the FDT in setup_machine() + */ + early_fixmap_init(); + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + if (!fdt) + return 0; + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(fdt); + if (!seed) + return 0; + + /* + * Check if 'nokaslr' appears on the command line, and + * return 0 if that is the case. + */ + cmdline = get_cmdline(fdt); + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + return 0; + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * lower half of the VMALLOC area (VA_BITS - 2). + * Even if we could randomize at page granularity for 16k and 64k pages, + * let's always round to 2 MB so we don't interfere with the ability to + * map using contiguous PTEs + */ + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); + offset = seed & mask; + + /* use the top 16 bits to randomize the linear region */ + memstart_offset_seed = seed >> 48; + + /* + * The kernel Image should not extend across a 1GB/32MB/512MB alignment + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this + * happens, increase the KASLR offset by the size of the kernel image. + */ + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = (offset + (u64)(_end - _text)) & mask; + + if (IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN does not expect the module region to intersect the + * vmalloc region, since shadow memory is allocated for each + * module at load time, whereas the vmalloc region is shadowed + * by KASAN zero pages. So keep modules out of the vmalloc + * region if KASAN is enabled. + */ + return offset; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region independently from the core + * kernel. This prevents modules from leaking any information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; + module_alloc_base = VMALLOC_START; + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return offset; +} diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index bcac81e..b67531a 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -292,8 +292,8 @@ static struct notifier_block kgdb_notifier = { }; /* - * kgdb_arch_init - Perform any architecture specific initalization. - * This function will handle the initalization of any architecture + * kgdb_arch_init - Perform any architecture specific initialization. + * This function will handle the initialization of any architecture * specific callbacks. */ int kgdb_arch_init(void) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c new file mode 100644 index 0000000..1ce90d8 --- /dev/null +++ b/arch/arm64/kernel/module-plts.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sort.h> + +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym) +{ + struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; + int i = mod->arch.plt_num_entries; + u64 val = sym->st_value + rela->r_addend; + + /* + * We only emit PLT entries against undefined (SHN_UNDEF) symbols, + * which are listed in the ELF symtab section, but without a type + * or a size. + * So, similar to how the module loader uses the Elf64_Sym::st_value + * field to store the resolved addresses of undefined symbols, let's + * borrow the Elf64_Sym::st_size field (whose value is never used by + * the module loader, even for symbols that are defined) to record + * the address of a symbol's associated PLT entry as we emit it for a + * zero addend relocation (which is the only kind we have to deal with + * in practice). This allows us to find duplicates without having to + * go through the table every time. + */ + if (rela->r_addend == 0 && sym->st_size != 0) { + BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); + return sym->st_size; + } + + mod->arch.plt_num_entries++; + BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); + + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + plt[i] = (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; + + if (rela->r_addend == 0) + sym->st_size = (u64)&plt[i]; + + return (u64)&plt[i]; +} + +#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) +{ + const Elf64_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); + if (i == 0) + i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; +} + +static bool duplicate_rel(const Elf64_Rela *rela, int num) +{ + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding + * slot. + */ + return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; +} + +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +{ + unsigned int ret = 0; + Elf64_Sym *s; + int i; + + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + /* + * We only have to consider branch targets that resolve + * to undefined symbols. This is not simply a heuristic, + * it is a fundamental limitation, since the PLT itself + * is part of the module, and needs to be within 128 MB + * as well, so modules can never grow beyond that limit. + */ + s = syms + ELF64_R_SYM(rela[i].r_info); + if (s->st_shndx != SHN_UNDEF) + break; + + /* + * Jump relocations with non-zero addends against + * undefined symbols are supported by the ELF spec, but + * do not occur in practice (e.g., 'jump n bytes past + * the entry point of undefined function symbol f'). + * So we need to support them, but there is no need to + * take them into consideration when trying to optimize + * this code. So let's only check for duplicates when + * the addend is zero: this allows us to record the PLT + * entry address in the symbol table itself, rather than + * having to search the list for duplicates each time we + * emit one. + */ + if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) + ret++; + break; + } + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long plt_max_entries = 0; + Elf64_Sym *syms = NULL; + int i; + + /* + * Find the empty .plt section so we can expand it to store the PLT + * entries. Record the symtab address as well. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) + mod->arch.plt = sechdrs + i; + else if (sechdrs[i].sh_type == SHT_SYMTAB) + syms = (Elf64_Sym *)sechdrs[i].sh_addr; + } + + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); + return -ENOEXEC; + } + if (!syms) { + pr_err("%s: module symtab section missing\n", mod->name); + return -ENOEXEC; + } + + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; + int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dstsec->sh_flags & SHF_EXECINSTR)) + continue; + + /* sort by type, symbol index and addend */ + sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + + plt_max_entries += count_plts(syms, rels, numrels); + } + + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); + mod->arch.plt_num_entries = 0; + mod->arch.plt_max_entries = plt_max_entries; + return 0; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 93e9702..7f31698 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -34,10 +34,26 @@ void *module_alloc(unsigned long size) { void *p; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + MODULES_VSIZE, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + !IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN can only deal with module allocations being served + * from the reserved module region, since the remainder of + * the vmalloc region is already backed by zero shadow pages, + * and punching holes into it is non-trivial. Since the module + * region is not randomized when KASAN is enabled, it is even + * less likely that the module region gets exhausted, so we + * can simply omit this fallback in that case. + */ + p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); return NULL; @@ -361,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + ovf == -ERANGE) { + val = module_emit_plt_entry(me, &rel[i], sym); + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, + 26, AARCH64_INSN_IMM_26); + } break; default: diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds new file mode 100644 index 0000000..8949f6c --- /dev/null +++ b/arch/arm64/kernel/module.lds @@ -0,0 +1,3 @@ +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } +} diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index b3d098b..c72de66 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -19,8 +19,6 @@ #include <linux/of_platform.h> #include <linux/slab.h> -#include <asm/pci-bridge.h> - /* * Called after each bus is probed, but before its children are examined */ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7ab14c..1b52269 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -20,6 +20,7 @@ */ #include <asm/irq_regs.h> +#include <asm/virt.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> @@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, if (attr->exclude_idle) return -EPERM; + if (is_kernel_in_hyp_mode() && + attr->exclude_kernel != attr->exclude_hv) + return -EINVAL; if (attr->exclude_user) config_base |= ARMV8_EXCLUDE_EL0; - if (attr->exclude_kernel) + if (!is_kernel_in_hyp_mode() && attr->exclude_kernel) config_base |= ARMV8_EXCLUDE_EL1; if (!attr->exclude_hv) config_base |= ARMV8_INCLUDE_EL2; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 88d742b..8062482 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <trace/events/power.h> +#include <asm/alternative.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/fpsimd.h> @@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; + if (IS_ENABLED(CONFIG_ARM64_UAO) && + cpus_have_cap(ARM64_HAS_UAO)) + childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -308,6 +312,17 @@ static void tls_thread_switch(struct task_struct *next) : : "r" (tpidr), "r" (tpidrro)); } +/* Restore the UAO state depending on next's addr_limit */ +static void uao_thread_switch(struct task_struct *next) +{ + if (IS_ENABLED(CONFIG_ARM64_UAO)) { + if (task_thread_info(next)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); + } +} + /* * Thread switching. */ @@ -320,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + uao_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ff7f132..3f6cd5c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -500,7 +500,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - if (!valid_user_regs(&newregs)) + if (!valid_user_regs(&newregs, target)) return -EINVAL; task_pt_regs(target)->user_regs = newregs; @@ -770,7 +770,7 @@ static int compat_gpr_set(struct task_struct *target, } - if (valid_user_regs(&newregs.user_regs)) + if (valid_user_regs(&newregs.user_regs, target)) *task_pt_regs(target) = newregs; else ret = -EINVAL; @@ -1272,3 +1272,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); } + +/* + * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * Userspace cannot use these until they have an architectural meaning. + * We also reserve IL for the kernel; SS is handled dynamically. + */ +#define SPSR_EL1_AARCH64_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ + GENMASK_ULL(5, 5)) +#define SPSR_EL1_AARCH32_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + +static int valid_compat_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS; + + if (!system_supports_mixed_endian_el0()) { + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + regs->pstate |= COMPAT_PSR_E_BIT; + else + regs->pstate &= ~COMPAT_PSR_E_BIT; + } + + if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & COMPAT_PSR_A_BIT) == 0 && + (regs->pstate & COMPAT_PSR_I_BIT) == 0 && + (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + return 1; + } + + /* + * Force PSR to a valid 32-bit EL0t, preserving the same bits as + * arch/arm. + */ + regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | + COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | + COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | + COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | + COMPAT_PSR_T_BIT; + regs->pstate |= PSR_MODE32_BIT; + + return 0; +} + +static int valid_native_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS; + + if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & PSR_D_BIT) == 0 && + (regs->pstate & PSR_A_BIT) == 0 && + (regs->pstate & PSR_I_BIT) == 0 && + (regs->pstate & PSR_F_BIT) == 0) { + return 1; + } + + /* Force PSR to a valid 64-bit EL0t */ + regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT; + + return 0; +} + +/* + * Are the current registers suitable for user mode? (used to maintain + * security in signal handlers) + */ +int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) +{ + if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) + regs->pstate &= ~DBG_SPSR_SS; + + if (is_compat_thread(task_thread_info(task))) + return valid_compat_regs(regs); + else + return valid_native_regs(regs); +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479..9dc6776 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -62,6 +62,7 @@ #include <asm/memblock.h> #include <asm/efi.h> #include <asm/xen/hypervisor.h> +#include <asm/mmu_context.h> phys_addr_t __fdt_pointer __initdata; @@ -73,13 +74,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -210,7 +211,7 @@ static void __init request_standard_resources(void) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); @@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); @@ -381,3 +388,32 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", + kaslr_offset, KIMAGE_VADDR); + } else { + pr_emerg("Kernel Offset: disabled\n"); + } + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e18c48c..a8eafdb 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); if (err == 0) { struct fpsimd_context *fpsimd_ctx = @@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Check that the resulting registers are actually sane. */ - ret |= !valid_user_regs(®s->user_regs); + ret |= !valid_user_regs(®s->user_regs, current); /* * Fast forward the stepping logic so we step into the signal diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 71ef6dc..b7063de 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -166,7 +166,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) #ifdef BUS_MCEERR_AO /* * Other callers might not initialize the si_lsb field, - * so check explicitely for the right codes here. + * so check explicitly for the right codes here. */ if (from->si_signo == SIGBUS && (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) @@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; if (err == 0) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index e33fe33..fd10eb6 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu) ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif mov x0, #0 // return zero on success ldp x19, x20, [sp, #16] ldp x21, x22, [sp, #32] diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51..b2d5f4e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -63,6 +63,8 @@ * where to place its SVC stack */ struct secondary_data secondary_data; +/* Number of CPUs which aren't online, but looping in kernel text. */ +int cpus_stuck_in_kernel; enum ipi_msg_type { IPI_RESCHEDULE, @@ -70,8 +72,19 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, + IPI_WAKEUP }; +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_kill(unsigned int cpu); +#else +static inline int op_cpu_kill(unsigned int cpu) +{ + return -ENOSYS; +} +#endif + + /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. @@ -89,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running); int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; + long status; /* * We need to tell the secondary core where to find its stack and the * page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; + update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); /* @@ -118,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) } secondary_data.stack = NULL; + status = READ_ONCE(secondary_data.status); + if (ret && status) { + + if (status == CPU_MMU_OFF) + status = READ_ONCE(__early_cpu_boot_status); + + switch (status) { + default: + pr_err("CPU%u: failed in unknown state : 0x%lx\n", + cpu, status); + break; + case CPU_KILL_ME: + if (!op_cpu_kill(cpu)) { + pr_crit("CPU%u: died during early boot\n", cpu); + break; + } + /* Fall through */ + pr_crit("CPU%u: may not have shut down cleanly\n", cpu); + case CPU_STUCK_IN_KERNEL: + pr_crit("CPU%u: is stuck in kernel\n", cpu); + cpus_stuck_in_kernel++; + break; + case CPU_PANIC_KERNEL: + panic("CPU%u detected unsupported configuration\n", cpu); + } + } return ret; } @@ -149,9 +190,7 @@ asmlinkage void secondary_start_kernel(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); @@ -185,6 +224,9 @@ asmlinkage void secondary_start_kernel(void) */ pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); + update_cpu_boot_status(CPU_BOOT_SUCCESS); + /* Make sure the status update is visible before we complete */ + smp_wmb(); set_cpu_online(cpu, true); complete(&cpu_running); @@ -195,7 +237,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } #ifdef CONFIG_HOTPLUG_CPU @@ -313,6 +355,30 @@ void cpu_die(void) } #endif +/* + * Kill the calling secondary CPU, early in bringup before it is turned + * online. + */ +void cpu_die_early(void) +{ + int cpu = smp_processor_id(); + + pr_crit("CPU%d: will not boot\n", cpu); + + /* Mark this CPU absent */ + set_cpu_present(cpu, 0); + +#ifdef CONFIG_HOTPLUG_CPU + update_cpu_boot_status(CPU_KILL_ME); + /* Check if we can park ourselves */ + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); +#endif + update_cpu_boot_status(CPU_STUCK_IN_KERNEL); + + cpu_park_loop(); +} + static void __init hyp_mode_check(void) { if (is_hyp_mode_available()) @@ -445,6 +511,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + /* + * Set-up the ACPI parking protocol cpu entries + * while initializing the cpu_logical_map to + * avoid parsing MADT entries multiple times for + * nothing (ie a valid cpu_logical_map entry should + * contain a valid parking protocol data set to + * initialize the cpu if the parking protocol is + * the only available enable method). + */ + acpi_set_mailbox_entry(cpu_count, processor); + cpu_count++; } @@ -627,6 +704,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_WAKEUP, "CPU wake-up interrupts"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) @@ -670,6 +748,13 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_WAKEUP); +} +#endif + #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -747,6 +832,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + case IPI_WAKEUP: + WARN_ONCE(!acpi_parking_protocol_valid(cpu), + "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", + cpu); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa4..6605539 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; int ret; unsigned long flags; @@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); + cpu_uninstall_idmap(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index 60c1db5..82379a7 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -21,9 +21,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .rodata .balign PAGE_SIZE vdso_start: .incbin "arch/arm64/kernel/vdso/vdso.so" diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e3928f5..4c56e7a 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -87,15 +87,16 @@ SECTIONS EXIT_CALL *(.discard) *(.discard.*) + *(.interp .dynamic) } - . = PAGE_OFFSET + TEXT_OFFSET; + . = KIMAGE_VADDR + TEXT_OFFSET; .head.text : { _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -113,13 +114,13 @@ SECTIONS *(.got) /* Global offset table */ } - RO_DATA(PAGE_SIZE) - EXCEPTION_TABLE(8) + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ NOTES - ALIGN_DEBUG_RO - _etext = .; /* End of text and rodata section */ ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) @@ -150,6 +151,21 @@ SECTIONS .altinstr_replacement : { *(.altinstr_replacement) } + .rela : ALIGN(8) { + __reloc_start = .; + *(.rela .rela*) + __reloc_end = .; + } + .dynsym : ALIGN(8) { + __dynsym_start = .; + *(.dynsym) + } + .dynstr : { + *(.dynstr) + } + .hash : { + *(.hash) + } . = ALIGN(PAGE_SIZE); __init_end = .; @@ -187,4 +203,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a5272c0..de7450d 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD select KVM_ARM_VGIC_V3 + select KVM_ARM_PMU if HW_PERF_EVENTS ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple @@ -48,6 +49,12 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +config KVM_ARM_PMU + bool + ---help--- + Adds support for a virtual Performance Monitoring Unit (PMU) in + virtual machines. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index caee9ee..122cff4 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9e54ad7..32fad75 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } return 0; } + +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_PMU_V3_CTRL: + ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_PMU_V3_CTRL: + ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_PMU_V3_CTRL: + ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index d073b5a..7d8747c 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -87,26 +87,13 @@ __do_hyp_init: #endif /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * TCR_EL2 and VTCR_EL2. + * TCR_EL2. */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 msr tcr_el2, x4 - ldr x4, =VTCR_EL2_FLAGS - bfi x4, x5, #16, #3 - /* - * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in - * VTCR_EL2. - */ - mrs x5, ID_AA64MMFR1_EL1 - ubfx x5, x5, #5, #1 - lsl x5, x5, #VTCR_EL2_VS - orr x4, x4, x5 - - msr vtcr_el2, x4 - mrs x4, mair_el1 msr mair_el2, x4 isb diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 0ccdcbb..48f19a3 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,10 +17,12 @@ #include <linux/linkage.h> +#include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cpufeature.h> /* - * u64 kvm_call_hyp(void *hypfn, ...); + * u64 __kvm_call_hyp(void *hypfn, ...); * * This is not really a variadic function in the classic C-way and care must * be taken when calling this to ensure parameters are passed in registers @@ -37,7 +39,12 @@ * used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. */ -ENTRY(kvm_call_hyp) +ENTRY(__kvm_call_hyp) +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN hvc #0 ret -ENDPROC(kvm_call_hyp) +alternative_else + b __vhe_hyp_call + nop +alternative_endif +ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 826032b..b6a8fc5 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -2,9 +2,12 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o +KVM=../../../../virt/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o + obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o @@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index c9c1e97..33342a7 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -18,10 +18,9 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> +#include <asm/debug-monitors.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> - -#include "hyp.h" +#include <asm/kvm_hyp.h> #define read_debug(r,n) read_sysreg(r##n##_el1) #define write_debug(v,r,n) write_sysreg(v, r##n##_el1) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fd0fbe9..ce9e5e5 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -130,9 +130,15 @@ ENDPROC(__guest_exit) ENTRY(__fpsimd_guest_restore) stp x4, lr, [sp, #-16]! +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x2, cptr_el2 bic x2, x2, #CPTR_EL2_TFP msr cptr_el2, x2 +alternative_else + mrs x2, cpacr_el1 + orr x2, x2, #CPACR_EL1_FPEN + msr cpacr_el1, x2 +alternative_endif isb mrs x3, tpidr_el2 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 93e8d983..3488894 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -19,7 +19,6 @@ #include <asm/alternative.h> #include <asm/assembler.h> -#include <asm/asm-offsets.h> #include <asm/cpufeature.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> @@ -38,10 +37,42 @@ ldp x0, x1, [sp], #16 .endm +.macro do_el2_call + /* + * Shuffle the parameters before calling the function + * pointed to in x0. Assumes parameters in x[1,2,3]. + */ + sub sp, sp, #16 + str lr, [sp] + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + ldr lr, [sp] + add sp, sp, #16 +.endm + +ENTRY(__vhe_hyp_call) + do_el2_call + /* + * We used to rely on having an exception return to get + * an implicit isb. In the E2H case, we don't have it anymore. + * rather than changing all the leaf functions, just do it here + * before returning to the rest of the kernel. + */ + isb + ret +ENDPROC(__vhe_hyp_call) + el1_sync: // Guest trapped into EL2 save_x0_to_x3 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x1, esr_el2 +alternative_else + mrs x1, esr_el1 +alternative_endif lsr x2, x1, #ESR_ELx_EC_SHIFT cmp x2, #ESR_ELx_EC_HVC64 @@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2 mrs x0, vbar_el2 b 2f -1: stp lr, xzr, [sp, #-16]! - +1: /* - * Compute the function address in EL2, and shuffle the parameters. + * Perform the EL2 call */ kern_hyp_va x0 - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr + do_el2_call - ldp lr, xzr, [sp], #16 2: eret el1_trap: @@ -83,72 +108,10 @@ el1_trap: cmp x2, #ESR_ELx_EC_FP_ASIMD b.eq __fpsimd_guest_restore - cmp x2, #ESR_ELx_EC_DABT_LOW - mov x0, #ESR_ELx_EC_IABT_LOW - ccmp x2, x0, #4, ne - b.ne 1f // Not an abort we care about - - /* This is an abort. Check for permission fault */ -alternative_if_not ARM64_WORKAROUND_834220 - and x2, x1, #ESR_ELx_FSC_TYPE - cmp x2, #FSC_PERM - b.ne 1f // Not a permission fault -alternative_else - nop // Use the permission fault path to - nop // check for a valid S1 translation, - nop // regardless of the ESR value. -alternative_endif - - /* - * Check for Stage-1 page table walk, which is guaranteed - * to give a valid HPFAR_EL2. - */ - tbnz x1, #7, 1f // S1PTW is set - - /* Preserve PAR_EL1 */ - mrs x3, par_el1 - stp x3, xzr, [sp, #-16]! - - /* - * Permission fault, HPFAR_EL2 is invalid. - * Resolve the IPA the hard way using the guest VA. - * Stage-1 translation already validated the memory access rights. - * As such, we can use the EL1 translation regime, and don't have - * to distinguish between EL0 and EL1 access. - */ - mrs x2, far_el2 - at s1e1r, x2 - isb - - /* Read result */ - mrs x3, par_el1 - ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack - msr par_el1, x0 - tbnz x3, #0, 3f // Bail out if we failed the translation - ubfx x3, x3, #12, #36 // Extract IPA - lsl x3, x3, #4 // and present it like HPFAR - b 2f - -1: mrs x3, hpfar_el2 - mrs x2, far_el2 - -2: mrs x0, tpidr_el2 - str w1, [x0, #VCPU_ESR_EL2] - str x2, [x0, #VCPU_FAR_EL2] - str x3, [x0, #VCPU_HPFAR_EL2] - + mrs x0, tpidr_el2 mov x1, #ARM_EXCEPTION_TRAP b __guest_exit - /* - * Translation failed. Just return to the guest and - * let it fault again. Another CPU is probably playing - * behind our back. - */ -3: restore_x0_to_x3 - - eret - el1_irq: save_x0_to_x3 mrs x0, tpidr_el2 diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h deleted file mode 100644 index fb27517..0000000 --- a/arch/arm64/kvm/hyp/hyp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __ARM64_KVM_HYP_H__ -#define __ARM64_KVM_HYP_H__ - -#include <linux/compiler.h> -#include <linux/kvm_host.h> -#include <asm/kvm_mmu.h> -#include <asm/sysreg.h> - -#define __hyp_text __section(.hyp.text) notrace - -#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) -#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ - + PAGE_OFFSET) - -/** - * hyp_alternate_select - Generates patchable code sequences that are - * used to switch between two implementations of a function, depending - * on the availability of a feature. - * - * @fname: a symbol name that will be defined as a function returning a - * function pointer whose type will match @orig and @alt - * @orig: A pointer to the default function, as returned by @fname when - * @cond doesn't hold - * @alt: A pointer to the alternate function, as returned by @fname - * when @cond holds - * @cond: a CPU feature (as described in asm/cpufeature.h) - */ -#define hyp_alternate_select(fname, orig, alt, cond) \ -typeof(orig) * __hyp_text fname(void) \ -{ \ - typeof(alt) *val = orig; \ - asm volatile(ALTERNATIVE("nop \n", \ - "mov %0, %1 \n", \ - cond) \ - : "+r" (val) : "r" (alt)); \ - return val; \ -} - -void __vgic_v2_save_state(struct kvm_vcpu *vcpu); -void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); - -void __vgic_v3_save_state(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); - -void __timer_save_state(struct kvm_vcpu *vcpu); -void __timer_restore_state(struct kvm_vcpu *vcpu); - -void __sysreg_save_state(struct kvm_cpu_context *ctxt); -void __sysreg_restore_state(struct kvm_cpu_context *ctxt); -void __sysreg32_save_state(struct kvm_vcpu *vcpu); -void __sysreg32_restore_state(struct kvm_vcpu *vcpu); - -void __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); -void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); - -void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); -void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); -static inline bool __fpsimd_enabled(void) -{ - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); -} - -u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); -void __noreturn __hyp_do_panic(unsigned long, ...); - -#endif /* __ARM64_KVM_HYP_H__ */ - diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c new file mode 100644 index 0000000..bfc54fd --- /dev/null +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/types.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> + +void __hyp_text __init_stage2_translation(void) +{ + u64 val = VTCR_EL2_FLAGS; + u64 tmp; + + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS + * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while + * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... + */ + val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; + + /* + * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS + * bit in VTCR_EL2. + */ + tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf; + val |= (tmp == 2) ? VTCR_EL2_VS : 0; + + write_sysreg(val, vtcr_el2); +} diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index f0e7bdf..437cfad 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -15,7 +15,53 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "hyp.h" +#include <linux/types.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> + +static bool __hyp_text __fpsimd_enabled_nvhe(void) +{ + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); +} + +static bool __hyp_text __fpsimd_enabled_vhe(void) +{ + return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); +} + +static hyp_alternate_select(__fpsimd_is_enabled, + __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +bool __hyp_text __fpsimd_enabled(void) +{ + return __fpsimd_is_enabled()(); +} + +static void __hyp_text __activate_traps_vhe(void) +{ + u64 val; + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_FPEN; + write_sysreg(val, cpacr_el1); + + write_sysreg(__kvm_hyp_vector, vbar_el1); +} + +static void __hyp_text __activate_traps_nvhe(void) +{ + u64 val; + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TFP; + write_sysreg(val, cptr_el2); +} + +static hyp_alternate_select(__activate_traps_arch, + __activate_traps_nvhe, __activate_traps_vhe, + ARM64_HAS_VIRT_HOST_EXTN); static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) { @@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, hcr_el2); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); + /* Make sure we trap PMU access from EL0 to EL2 */ + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + __activate_traps_arch()(); +} - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TFP; - write_sysreg(val, cptr_el2); +static void __hyp_text __deactivate_traps_vhe(void) +{ + extern char vectors[]; /* kernel exception vectors */ - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + write_sysreg(CPACR_EL1_FPEN, cpacr_el1); + write_sysreg(vectors, vbar_el1); } -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +static void __hyp_text __deactivate_traps_nvhe(void) { write_sysreg(HCR_RW, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static hyp_alternate_select(__deactivate_traps_arch, + __deactivate_traps_nvhe, __deactivate_traps_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + __deactivate_traps_arch()(); write_sysreg(0, hstr_el2); write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); + write_sysreg(0, pmuserenr_el0); } static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) @@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) __vgic_call_restore_state()(vcpu); } +static bool __hyp_text __true_value(void) +{ + return true; +} + +static bool __hyp_text __false_value(void) +{ + return false; +} + +static hyp_alternate_select(__check_arm_834220, + __false_value, __true_value, + ARM64_WORKAROUND_834220); + +static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + asm volatile("at s1e1r, %0" : : "r" (far)); + isb(); + + tmp = read_sysreg(par_el1); + write_sysreg(par, par_el1); + + if (unlikely(tmp & 1)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; + return true; +} + +static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u64 esr = read_sysreg_el2(esr); + u8 ec = esr >> ESR_ELx_EC_SHIFT; + u64 hpfar, far; + + vcpu->arch.fault.esr_el2 = esr; + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(far); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; @@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); guest_ctxt = &vcpu->arch.ctxt; - __sysreg_save_state(host_ctxt); + __sysreg_save_host_state(host_ctxt); __debug_cond_save_host_state(vcpu); __activate_traps(vcpu); @@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) * to Cortex-A57 erratum #852523. */ __sysreg32_restore_state(vcpu); - __sysreg_restore_state(guest_ctxt); + __sysreg_restore_guest_state(guest_ctxt); __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); /* Jump in the fire! */ +again: exit_code = __guest_enter(vcpu, host_ctxt); /* And we're baaack! */ + if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) + goto again; + fp_enabled = __fpsimd_enabled(); - __sysreg_save_state(guest_ctxt); + __sysreg_save_guest_state(guest_ctxt); __sysreg32_save_state(vcpu); __timer_save_state(vcpu); __vgic_save_state(vcpu); @@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); __deactivate_vm(vcpu); - __sysreg_restore_state(host_ctxt); + __sysreg_restore_host_state(host_ctxt); if (fp_enabled) { __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); @@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; -void __hyp_text __noreturn __hyp_panic(void) +static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) { unsigned long str_va = (unsigned long)__hyp_panic_string; - u64 spsr = read_sysreg(spsr_el2); - u64 elr = read_sysreg(elr_el2); + + __hyp_do_panic(hyp_kern_va(str_va), + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(far), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); +} + +static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par) +{ + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(esr), read_sysreg_el2(far), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); +} + +static hyp_alternate_select(__hyp_call_panic, + __hyp_call_panic_nvhe, __hyp_call_panic_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +void __hyp_text __noreturn __hyp_panic(void) +{ + u64 spsr = read_sysreg_el2(spsr); + u64 elr = read_sysreg_el2(elr); u64 par = read_sysreg(par_el1); if (read_sysreg(vttbr_el2)) { @@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void) host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); __deactivate_traps(vcpu); __deactivate_vm(vcpu); - __sysreg_restore_state(host_ctxt); + __sysreg_restore_host_state(host_ctxt); } /* Call panic for real */ - __hyp_do_panic(hyp_kern_va(str_va), - spsr, elr, - read_sysreg(esr_el2), read_sysreg(far_el2), - read_sysreg(hpfar_el2), par, - (void *)read_sysreg(tpidr_el2)); + __hyp_call_panic()(spsr, elr, par); unreachable(); } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 42563098..0f7c40e 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -19,75 +19,122 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> +#include <asm/kvm_hyp.h> -#include "hyp.h" +/* Yes, this does nothing, on purpose */ +static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } -/* ctxt is already in the HYP VA space */ -void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +/* + * Non-VHE: Both host and guest must save everything. + * + * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and + * guest must save everything. + */ + +static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); - ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); - ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); - ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); - ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); - ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); - ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); + ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); + ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); +} + +static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); + ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); + ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr); + ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1); + ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far); + ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair); + ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); - ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); - ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); - ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); + ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); +} + +static hyp_alternate_select(__sysreg_call_save_host_state, + __sysreg_save_state, __sysreg_do_nothing, + ARM64_HAS_VIRT_HOST_EXTN); + +void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt) +{ + __sysreg_call_save_host_state()(ctxt); + __sysreg_save_common_state(ctxt); +} + +void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_state(ctxt); + __sysreg_save_common_state(ctxt); } -void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); - write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); - write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); - write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); - write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); - write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); - write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); - write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); - write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); - write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); - write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); - write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); - write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); - - write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); - write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); - write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); - write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); + write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); + write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); +} + +static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); + write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); + write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); + write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr); + write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr); + write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0); + write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1); + write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far); + write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair); + write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar); + write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr); + write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); + write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); + write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); +} + +static hyp_alternate_select(__sysreg_call_restore_host_state, + __sysreg_restore_state, __sysreg_do_nothing, + ARM64_HAS_VIRT_HOST_EXTN); + +void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt) +{ + __sysreg_call_restore_host_state()(ctxt); + __sysreg_restore_common_state(ctxt); +} + +void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_state(ctxt); + __sysreg_restore_common_state(ctxt); } void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c deleted file mode 100644 index 1051e5d..0000000 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <clocksource/arm_arch_timer.h> -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_mmu.h> - -#include "hyp.h" - -/* vcpu is already in the HYP VA space */ -void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 val; - - if (kvm->arch.timer.enabled) { - timer->cntv_ctl = read_sysreg(cntv_ctl_el0); - timer->cntv_cval = read_sysreg(cntv_cval_el0); - } - - /* Disable the virtual timer */ - write_sysreg(0, cntv_ctl_el0); - - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - - /* Clear cntvoff for the host */ - write_sysreg(0, cntvoff_el2); -} - -void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 val; - - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - - if (kvm->arch.timer.enabled) { - write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); - write_sysreg(timer->cntv_cval, cntv_cval_el0); - isb(); - write_sysreg(timer->cntv_ctl, cntv_ctl_el0); - } -} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 2a7e0d8..be8177c 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "hyp.h" +#include <asm/kvm_hyp.h> static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c deleted file mode 100644 index e717612..0000000 --- a/arch/arm64/kvm/hyp/vgic-v2-sr.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/compiler.h> -#include <linux/irqchip/arm-gic.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_mmu.h> - -#include "hyp.h" - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - u32 eisr0, eisr1, elrsr0, elrsr1; - int i, nr_lr; - - if (!base) - return; - - nr_lr = vcpu->arch.vgic_cpu.nr_lr; - cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); - cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); - eisr0 = readl_relaxed(base + GICH_EISR0); - elrsr0 = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(nr_lr > 32)) { - eisr1 = readl_relaxed(base + GICH_EISR1); - elrsr1 = readl_relaxed(base + GICH_ELRSR1); - } else { - eisr1 = elrsr1 = 0; - } -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; - cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; -#else - cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; - cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -#endif - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - - writel_relaxed(0, base + GICH_HCR); - - for (i = 0; i < nr_lr; i++) - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int i, nr_lr; - - if (!base) - return; - - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - - nr_lr = vcpu->arch.vgic_cpu.nr_lr; - for (i = 0; i < nr_lr; i++) - writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); -} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 5dd2a26..fff7cd4 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -19,9 +19,7 @@ #include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm_host.h> -#include <asm/kvm_mmu.h> - -#include "hyp.h" +#include <asm/kvm_hyp.h> #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) @@ -39,12 +37,133 @@ asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ } while (0) -/* vcpu is already in the HYP VA space */ +static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +{ + switch (lr & 0xf) { + case 0: + return read_gicreg(ICH_LR0_EL2); + case 1: + return read_gicreg(ICH_LR1_EL2); + case 2: + return read_gicreg(ICH_LR2_EL2); + case 3: + return read_gicreg(ICH_LR3_EL2); + case 4: + return read_gicreg(ICH_LR4_EL2); + case 5: + return read_gicreg(ICH_LR5_EL2); + case 6: + return read_gicreg(ICH_LR6_EL2); + case 7: + return read_gicreg(ICH_LR7_EL2); + case 8: + return read_gicreg(ICH_LR8_EL2); + case 9: + return read_gicreg(ICH_LR9_EL2); + case 10: + return read_gicreg(ICH_LR10_EL2); + case 11: + return read_gicreg(ICH_LR11_EL2); + case 12: + return read_gicreg(ICH_LR12_EL2); + case 13: + return read_gicreg(ICH_LR13_EL2); + case 14: + return read_gicreg(ICH_LR14_EL2); + case 15: + return read_gicreg(ICH_LR15_EL2); + } + + unreachable(); +} + +static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +{ + switch (lr & 0xf) { + case 0: + write_gicreg(val, ICH_LR0_EL2); + break; + case 1: + write_gicreg(val, ICH_LR1_EL2); + break; + case 2: + write_gicreg(val, ICH_LR2_EL2); + break; + case 3: + write_gicreg(val, ICH_LR3_EL2); + break; + case 4: + write_gicreg(val, ICH_LR4_EL2); + break; + case 5: + write_gicreg(val, ICH_LR5_EL2); + break; + case 6: + write_gicreg(val, ICH_LR6_EL2); + break; + case 7: + write_gicreg(val, ICH_LR7_EL2); + break; + case 8: + write_gicreg(val, ICH_LR8_EL2); + break; + case 9: + write_gicreg(val, ICH_LR9_EL2); + break; + case 10: + write_gicreg(val, ICH_LR10_EL2); + break; + case 11: + write_gicreg(val, ICH_LR11_EL2); + break; + case 12: + write_gicreg(val, ICH_LR12_EL2); + break; + case 13: + write_gicreg(val, ICH_LR13_EL2); + break; + case 14: + write_gicreg(val, ICH_LR14_EL2); + break; + case 15: + write_gicreg(val, ICH_LR15_EL2); + break; + } +} + +static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + int i; + bool expect_mi; + + expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE); + + for (i = 0; i < nr_lr; i++) { + if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) + continue; + + expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) && + (cpu_if->vgic_lr[i] & ICH_LR_EOI)); + } + + if (expect_mi) { + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); + + if (cpu_if->vgic_misr & ICH_MISR_EOI) + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); + else + cpu_if->vgic_eisr = 0; + } else { + cpu_if->vgic_misr = 0; + cpu_if->vgic_eisr = 0; + } +} + void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 val; - u32 max_lr_idx, nr_pri_bits; /* * Make sure stores to the GIC via the memory mapped interface @@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) dsb(st); cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); - cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); - cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); - write_gicreg(0, ICH_HCR_EL2); - val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); - nr_pri_bits = vtr_to_nr_pri_bits(val); + if (vcpu->arch.vgic_cpu.live_lrs) { + int i; + u32 max_lr_idx, nr_pri_bits; - switch (max_lr_idx) { - case 15: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); - case 14: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); - case 13: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); - case 12: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); - case 11: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); - case 10: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); - case 9: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); - case 8: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); - case 7: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); - case 6: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); - case 5: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); - case 4: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); - case 3: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); - case 2: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); - case 1: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); - case 0: - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); - } + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); - switch (nr_pri_bits) { - case 7: - cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); - cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); - case 6: - cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); - default: - cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); - } + write_gicreg(0, ICH_HCR_EL2); + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); - switch (nr_pri_bits) { - case 7: - cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); - cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); - case 6: - cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); - default: - cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + save_maint_int_state(vcpu, max_lr_idx + 1); + + for (i = 0; i <= max_lr_idx; i++) { + if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) + continue; + + if (cpu_if->vgic_elrsr & (1 << i)) { + cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; + continue; + } + + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); + __gic_v3_set_lr(0, i); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + case 6: + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + default: + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + case 6: + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + default: + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + } + + vcpu->arch.vgic_cpu.live_lrs = 0; + } else { + cpu_if->vgic_misr = 0; + cpu_if->vgic_eisr = 0; + cpu_if->vgic_elrsr = 0xffff; + cpu_if->vgic_ap0r[0] = 0; + cpu_if->vgic_ap0r[1] = 0; + cpu_if->vgic_ap0r[2] = 0; + cpu_if->vgic_ap0r[3] = 0; + cpu_if->vgic_ap1r[0] = 0; + cpu_if->vgic_ap1r[1] = 0; + cpu_if->vgic_ap1r[2] = 0; + cpu_if->vgic_ap1r[3] = 0; } val = read_gicreg(ICC_SRE_EL2); @@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 val; u32 max_lr_idx, nr_pri_bits; + u16 live_lrs = 0; + int i; /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); isb(); - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - val = read_gicreg(ICH_VTR_EL2); max_lr_idx = vtr_to_max_lr_idx(val); nr_pri_bits = vtr_to_nr_pri_bits(val); - switch (nr_pri_bits) { - case 7: - write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); - write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + for (i = 0; i <= max_lr_idx; i++) { + if (cpu_if->vgic_lr[i] & ICH_LR_STATE) + live_lrs |= (1 << i); } - switch (nr_pri_bits) { - case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); - } + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - switch (max_lr_idx) { - case 15: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); - case 14: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); - case 13: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); - case 12: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); - case 11: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); - case 10: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); - case 9: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); - case 8: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); - case 7: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); - case 6: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); - case 5: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); - case 4: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); - case 3: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); - case 2: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); - case 1: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); - case 0: - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); + if (live_lrs) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + + for (i = 0; i <= max_lr_idx; i++) { + if (!(live_lrs & (1 << i))) + continue; + + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } } /* @@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) */ isb(); dsb(sy); + vcpu->arch.vgic_cpu.live_lrs = live_lrs; /* * Prevent the guest from touching the GIC system registers if @@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) } } +void __hyp_text __vgic_v3_init_lrs(void) +{ + int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); + int i; + + for (i = 0; i <= max_lr_idx; i++) + __gic_v3_set_lr(0, i); +} + static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f34745c..9677bf0 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext) case KVM_CAP_GUEST_DEBUG_HW_WPS: r = get_num_wrps(); break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); + break; case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; default: @@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset system registers */ kvm_reset_sys_regs(vcpu); + /* Reset PMU */ + kvm_pmu_vcpu_reset(vcpu); + /* Reset timer */ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2e90371..7bbe3ff 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,6 +20,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/bsearch.h> #include <linux/kvm_host.h> #include <linux/mm.h> #include <linux/uaccess.h> @@ -34,6 +35,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> #include <asm/kvm_mmu.h> +#include <asm/perf_event.h> #include <trace/events/kvm.h> @@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; } +static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 pmcr, val; + + asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr)); + /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN + * except PMCR.E resetting to zero. + */ + val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) + | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + vcpu_sys_reg(vcpu, PMCR_EL0) = val; +} + +static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) +{ + u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + + return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu)); +} + +static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) +{ + u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + + return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN)) + || vcpu_mode_priv(vcpu)); +} + +static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu) +{ + u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + + return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN)) + || vcpu_mode_priv(vcpu)); +} + +static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu) +{ + u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + + return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN)) + || vcpu_mode_priv(vcpu)); +} + +static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_access_el0_disabled(vcpu)) + return false; + + if (p->is_write) { + /* Only update writeable bits of PMCR */ + val = vcpu_sys_reg(vcpu, PMCR_EL0); + val &= ~ARMV8_PMU_PMCR_MASK; + val |= p->regval & ARMV8_PMU_PMCR_MASK; + vcpu_sys_reg(vcpu, PMCR_EL0) = val; + kvm_pmu_handle_pmcr(vcpu, val); + } else { + /* PMCR.P & PMCR.C are RAZ */ + val = vcpu_sys_reg(vcpu, PMCR_EL0) + & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); + p->regval = val; + } + + return true; +} + +static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + if (p->is_write) + vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + else + /* return PMSELR.SEL field */ + p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0) + & ARMV8_PMU_COUNTER_MASK; + + return true; +} + +static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 pmceid; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + BUG_ON(p->is_write); + + if (pmu_access_el0_disabled(vcpu)) + return false; + + if (!(p->Op2 & 1)) + asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid)); + else + asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid)); + + p->regval = pmceid; + + return true; +} + +static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) +{ + u64 pmcr, val; + + pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); + val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) + return false; + + return true; +} + +static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 idx; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (r->CRn == 9 && r->CRm == 13) { + if (r->Op2 == 2) { + /* PMXEVCNTR_EL0 */ + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + idx = vcpu_sys_reg(vcpu, PMSELR_EL0) + & ARMV8_PMU_COUNTER_MASK; + } else if (r->Op2 == 0) { + /* PMCCNTR_EL0 */ + if (pmu_access_cycle_counter_el0_disabled(vcpu)) + return false; + + idx = ARMV8_PMU_CYCLE_IDX; + } else { + BUG(); + } + } else if (r->CRn == 14 && (r->CRm & 12) == 8) { + /* PMEVCNTRn_EL0 */ + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); + } else { + BUG(); + } + + if (!pmu_counter_idx_valid(vcpu, idx)) + return false; + + if (p->is_write) { + if (pmu_access_el0_disabled(vcpu)) + return false; + + kvm_pmu_set_counter_value(vcpu, idx, p->regval); + } else { + p->regval = kvm_pmu_get_counter_value(vcpu, idx); + } + + return true; +} + +static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 idx, reg; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_access_el0_disabled(vcpu)) + return false; + + if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { + /* PMXEVTYPER_EL0 */ + idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; + reg = PMEVTYPER0_EL0 + idx; + } else if (r->CRn == 14 && (r->CRm & 12) == 12) { + idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); + if (idx == ARMV8_PMU_CYCLE_IDX) + reg = PMCCFILTR_EL0; + else + /* PMEVTYPERn_EL0 */ + reg = PMEVTYPER0_EL0 + idx; + } else { + BUG(); + } + + if (!pmu_counter_idx_valid(vcpu, idx)) + return false; + + if (p->is_write) { + kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); + vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; + } else { + p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; + } + + return true; +} + +static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val, mask; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_access_el0_disabled(vcpu)) + return false; + + mask = kvm_pmu_valid_counter_mask(vcpu); + if (p->is_write) { + val = p->regval & mask; + if (r->Op2 & 0x1) { + /* accessing PMCNTENSET_EL0 */ + vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + kvm_pmu_enable_counter(vcpu, val); + } else { + /* accessing PMCNTENCLR_EL0 */ + vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + kvm_pmu_disable_counter(vcpu, val); + } + } else { + p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; + } + + return true; +} + +static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 mask = kvm_pmu_valid_counter_mask(vcpu); + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (!vcpu_mode_priv(vcpu)) + return false; + + if (p->is_write) { + u64 val = p->regval & mask; + + if (r->Op2 & 0x1) + /* accessing PMINTENSET_EL1 */ + vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + else + /* accessing PMINTENCLR_EL1 */ + vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + } else { + p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; + } + + return true; +} + +static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 mask = kvm_pmu_valid_counter_mask(vcpu); + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_access_el0_disabled(vcpu)) + return false; + + if (p->is_write) { + if (r->CRm & 0x2) + /* accessing PMOVSSET_EL0 */ + kvm_pmu_overflow_set(vcpu, p->regval & mask); + else + /* accessing PMOVSCLR_EL0 */ + vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + } else { + p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; + } + + return true; +} + +static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 mask; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (pmu_write_swinc_el0_disabled(vcpu)) + return false; + + if (p->is_write) { + mask = kvm_pmu_valid_counter_mask(vcpu); + kvm_pmu_software_increment(vcpu, p->regval & mask); + return true; + } + + return false; +} + +static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_arm_pmu_v3_ready(vcpu)) + return trap_raz_wi(vcpu, p, r); + + if (p->is_write) { + if (!vcpu_mode_priv(vcpu)) + return false; + + vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval + & ARMV8_PMU_USERENR_MASK; + } else { + p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0) + & ARMV8_PMU_USERENR_MASK; + } + + return true; +} + /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ /* DBGBVRn_EL1 */ \ @@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } +/* Macro to expand the PMEVCNTRn_EL0 register */ +#define PMU_PMEVCNTR_EL0(n) \ + /* PMEVCNTRn_EL0 */ \ + { Op0(0b11), Op1(0b011), CRn(0b1110), \ + CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } + +/* Macro to expand the PMEVTYPERn_EL0 register */ +#define PMU_PMEVTYPER_EL0(n) \ + /* PMEVTYPERn_EL0 */ \ + { Op0(0b11), Op1(0b011), CRn(0b1110), \ + CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - trap_raz_wi }, + access_pminten, reset_unknown, PMINTENSET_EL1 }, /* PMINTENCLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - trap_raz_wi }, + access_pminten, NULL, PMINTENSET_EL1 }, /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), @@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMCR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - trap_raz_wi }, + access_pmcr, reset_pmcr, }, /* PMCNTENSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - trap_raz_wi }, + access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, /* PMCNTENCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - trap_raz_wi }, + access_pmcnten, NULL, PMCNTENSET_EL0 }, /* PMOVSCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - trap_raz_wi }, + access_pmovs, NULL, PMOVSSET_EL0 }, /* PMSWINC_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - trap_raz_wi }, + access_pmswinc, reset_unknown, PMSWINC_EL0 }, /* PMSELR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - trap_raz_wi }, + access_pmselr, reset_unknown, PMSELR_EL0 }, /* PMCEID0_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - trap_raz_wi }, + access_pmceid }, /* PMCEID1_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - trap_raz_wi }, + access_pmceid }, /* PMCCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - trap_raz_wi }, + access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, /* PMXEVTYPER_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - trap_raz_wi }, + access_pmu_evtyper }, /* PMXEVCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - trap_raz_wi }, - /* PMUSERENR_EL0 */ + access_pmu_evcntr }, + /* PMUSERENR_EL0 + * This register resets as unknown in 64bit mode while it resets as zero + * in 32bit mode. Here we choose to reset it as zero for consistency. + */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - trap_raz_wi }, + access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, /* PMOVSSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - trap_raz_wi }, + access_pmovs, reset_unknown, PMOVSSET_EL0 }, /* TPIDR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), @@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), NULL, reset_unknown, TPIDRRO_EL0 }, + /* PMEVCNTRn_EL0 */ + PMU_PMEVCNTR_EL0(0), + PMU_PMEVCNTR_EL0(1), + PMU_PMEVCNTR_EL0(2), + PMU_PMEVCNTR_EL0(3), + PMU_PMEVCNTR_EL0(4), + PMU_PMEVCNTR_EL0(5), + PMU_PMEVCNTR_EL0(6), + PMU_PMEVCNTR_EL0(7), + PMU_PMEVCNTR_EL0(8), + PMU_PMEVCNTR_EL0(9), + PMU_PMEVCNTR_EL0(10), + PMU_PMEVCNTR_EL0(11), + PMU_PMEVCNTR_EL0(12), + PMU_PMEVCNTR_EL0(13), + PMU_PMEVCNTR_EL0(14), + PMU_PMEVCNTR_EL0(15), + PMU_PMEVCNTR_EL0(16), + PMU_PMEVCNTR_EL0(17), + PMU_PMEVCNTR_EL0(18), + PMU_PMEVCNTR_EL0(19), + PMU_PMEVCNTR_EL0(20), + PMU_PMEVCNTR_EL0(21), + PMU_PMEVCNTR_EL0(22), + PMU_PMEVCNTR_EL0(23), + PMU_PMEVCNTR_EL0(24), + PMU_PMEVCNTR_EL0(25), + PMU_PMEVCNTR_EL0(26), + PMU_PMEVCNTR_EL0(27), + PMU_PMEVCNTR_EL0(28), + PMU_PMEVCNTR_EL0(29), + PMU_PMEVCNTR_EL0(30), + /* PMEVTYPERn_EL0 */ + PMU_PMEVTYPER_EL0(0), + PMU_PMEVTYPER_EL0(1), + PMU_PMEVTYPER_EL0(2), + PMU_PMEVTYPER_EL0(3), + PMU_PMEVTYPER_EL0(4), + PMU_PMEVTYPER_EL0(5), + PMU_PMEVTYPER_EL0(6), + PMU_PMEVTYPER_EL0(7), + PMU_PMEVTYPER_EL0(8), + PMU_PMEVTYPER_EL0(9), + PMU_PMEVTYPER_EL0(10), + PMU_PMEVTYPER_EL0(11), + PMU_PMEVTYPER_EL0(12), + PMU_PMEVTYPER_EL0(13), + PMU_PMEVTYPER_EL0(14), + PMU_PMEVTYPER_EL0(15), + PMU_PMEVTYPER_EL0(16), + PMU_PMEVTYPER_EL0(17), + PMU_PMEVTYPER_EL0(18), + PMU_PMEVTYPER_EL0(19), + PMU_PMEVTYPER_EL0(20), + PMU_PMEVTYPER_EL0(21), + PMU_PMEVTYPER_EL0(22), + PMU_PMEVTYPER_EL0(23), + PMU_PMEVTYPER_EL0(24), + PMU_PMEVTYPER_EL0(25), + PMU_PMEVTYPER_EL0(26), + PMU_PMEVTYPER_EL0(27), + PMU_PMEVTYPER_EL0(28), + PMU_PMEVTYPER_EL0(29), + PMU_PMEVTYPER_EL0(30), + /* PMCCFILTR_EL0 + * This register resets as unknown in 64bit mode while it resets as zero + * in 32bit mode. Here we choose to reset it as zero for consistency. + */ + { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111), + access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, + /* DACR32_EL2 */ { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), NULL, reset_unknown, DACR32_EL2 }, @@ -688,7 +1116,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, } else { u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); - u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT); + u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT); p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | @@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = { { Op1( 0), CRm( 2), .access = trap_raz_wi }, }; +/* Macro to expand the PMEVCNTRn register */ +#define PMU_PMEVCNTR(n) \ + /* PMEVCNTRn */ \ + { Op1(0), CRn(0b1110), \ + CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + access_pmu_evcntr } + +/* Macro to expand the PMEVTYPERn register */ +#define PMU_PMEVTYPER(n) \ + /* PMEVTYPERn */ \ + { Op1(0), CRn(0b1110), \ + CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + access_pmu_evtyper } + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit @@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, /* PMU */ - { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, - { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs }, + { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, + { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, @@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + + /* PMEVCNTRn */ + PMU_PMEVCNTR(0), + PMU_PMEVCNTR(1), + PMU_PMEVCNTR(2), + PMU_PMEVCNTR(3), + PMU_PMEVCNTR(4), + PMU_PMEVCNTR(5), + PMU_PMEVCNTR(6), + PMU_PMEVCNTR(7), + PMU_PMEVCNTR(8), + PMU_PMEVCNTR(9), + PMU_PMEVCNTR(10), + PMU_PMEVCNTR(11), + PMU_PMEVCNTR(12), + PMU_PMEVCNTR(13), + PMU_PMEVCNTR(14), + PMU_PMEVCNTR(15), + PMU_PMEVCNTR(16), + PMU_PMEVCNTR(17), + PMU_PMEVCNTR(18), + PMU_PMEVCNTR(19), + PMU_PMEVCNTR(20), + PMU_PMEVCNTR(21), + PMU_PMEVCNTR(22), + PMU_PMEVCNTR(23), + PMU_PMEVCNTR(24), + PMU_PMEVCNTR(25), + PMU_PMEVCNTR(26), + PMU_PMEVCNTR(27), + PMU_PMEVCNTR(28), + PMU_PMEVCNTR(29), + PMU_PMEVCNTR(30), + /* PMEVTYPERn */ + PMU_PMEVTYPER(0), + PMU_PMEVTYPER(1), + PMU_PMEVTYPER(2), + PMU_PMEVTYPER(3), + PMU_PMEVTYPER(4), + PMU_PMEVTYPER(5), + PMU_PMEVTYPER(6), + PMU_PMEVTYPER(7), + PMU_PMEVTYPER(8), + PMU_PMEVTYPER(9), + PMU_PMEVTYPER(10), + PMU_PMEVTYPER(11), + PMU_PMEVTYPER(12), + PMU_PMEVTYPER(13), + PMU_PMEVTYPER(14), + PMU_PMEVTYPER(15), + PMU_PMEVTYPER(16), + PMU_PMEVTYPER(17), + PMU_PMEVTYPER(18), + PMU_PMEVTYPER(19), + PMU_PMEVTYPER(20), + PMU_PMEVTYPER(21), + PMU_PMEVTYPER(22), + PMU_PMEVTYPER(23), + PMU_PMEVTYPER(24), + PMU_PMEVTYPER(25), + PMU_PMEVTYPER(26), + PMU_PMEVTYPER(27), + PMU_PMEVTYPER(28), + PMU_PMEVTYPER(29), + PMU_PMEVTYPER(30), + /* PMCCFILTR */ + { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, }; static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; @@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target, } } +#define reg_to_match_value(x) \ + ({ \ + unsigned long val; \ + val = (x)->Op0 << 14; \ + val |= (x)->Op1 << 11; \ + val |= (x)->CRn << 7; \ + val |= (x)->CRm << 3; \ + val |= (x)->Op2; \ + val; \ + }) + +static int match_sys_reg(const void *key, const void *elt) +{ + const unsigned long pval = (unsigned long)key; + const struct sys_reg_desc *r = elt; + + return pval - reg_to_match_value(r); +} + static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], unsigned int num) { - unsigned int i; - - for (i = 0; i < num; i++) { - const struct sys_reg_desc *r = &table[i]; - - if (params->Op0 != r->Op0) - continue; - if (params->Op1 != r->Op1) - continue; - if (params->CRn != r->CRn) - continue; - if (params->CRm != r->CRm) - continue; - if (params->Op2 != r->Op2) - continue; + unsigned long pval = reg_to_match_value(params); - return r; - } - return NULL; + return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); } int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 1a811ec..c86b790 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ strchr.o strrchr.o -# Tell the compiler to treat all general purpose registers as -# callee-saved, which allows for efficient runtime patching of the bl -# instruction in the caller with an atomic instruction when supported by -# the CPU. Result and argument registers are handled correctly, based on -# the function prototype. +# Tell the compiler to treat all general purpose registers (with the +# exception of the IP registers, which are already handled by the caller +# in case of a PLT) as callee-saved, which allows for efficient runtime +# patching of the bl instruction in the caller with an atomic instruction +# when supported by the CPU. Result and argument registers are handled +# correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ - -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18 + -fcall-saved-x18 diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a9723c7..5d1cad3 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -33,28 +33,28 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f 1: -USER(9f, str xzr, [x0], #8 ) +uao_user_alternative 9f, str, sttr, xzr, x0, 8 subs x1, x1, #8 b.pl 1b 2: adds x1, x1, #4 b.mi 3f -USER(9f, str wzr, [x0], #4 ) +uao_user_alternative 9f, str, sttr, wzr, x0, 4 sub x1, x1, #4 3: adds x1, x1, #2 b.mi 4f -USER(9f, strh wzr, [x0], #2 ) +uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f -USER(9f, strb wzr, [x0] ) +uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4699cd7..17e8306 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -34,7 +34,7 @@ */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val @@ -42,7 +42,7 @@ .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val @@ -50,7 +50,7 @@ .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val @@ -58,7 +58,7 @@ .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val @@ -67,11 +67,11 @@ end .req x5 ENTRY(__copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 81c8fc9..f7292dd0 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -35,44 +35,44 @@ * x0 - bytes not copied */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 512b9a7..4c1e700 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,8 @@ #include <linux/const.h> #include <asm/assembler.h> #include <asm/page.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> /* * Copy a page from src to dest (both are page aligned) @@ -27,20 +29,65 @@ * x1 - src */ ENTRY(copy_page) - /* Assume cache line size is 64 bytes. */ - prfm pldl1strm, [x1, #64] -1: ldp x2, x3, [x1] +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop + nop +alternative_else + # Prefetch two cache lines ahead. + prfm pldl1strm, [x1, #128] + prfm pldl1strm, [x1, #256] +alternative_endif + + ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] ldp x8, x9, [x1, #48] - add x1, x1, #64 - prfm pldl1strm, [x1, #64] + ldp x10, x11, [x1, #64] + ldp x12, x13, [x1, #80] + ldp x14, x15, [x1, #96] + ldp x16, x17, [x1, #112] + + mov x18, #(PAGE_SIZE - 128) + add x1, x1, #128 +1: + subs x18, x18, #128 + +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop +alternative_else + prfm pldl1strm, [x1, #384] +alternative_endif + stnp x2, x3, [x0] + ldp x2, x3, [x1] stnp x4, x5, [x0, #16] + ldp x4, x5, [x1, #16] stnp x6, x7, [x0, #32] + ldp x6, x7, [x1, #32] stnp x8, x9, [x0, #48] - add x0, x0, #64 - tst x1, #(PAGE_SIZE - 1) - b.ne 1b + ldp x8, x9, [x1, #48] + stnp x10, x11, [x0, #64] + ldp x10, x11, [x1, #64] + stnp x12, x13, [x0, #80] + ldp x12, x13, [x1, #80] + stnp x14, x15, [x0, #96] + ldp x14, x15, [x1, #96] + stnp x16, x17, [x0, #112] + ldp x16, x17, [x1, #112] + + add x0, x0, #128 + add x1, x1, #128 + + b.gt 1b + + stnp x2, x3, [x0] + stnp x4, x5, [x0, #16] + stnp x6, x7, [x0, #32] + stnp x8, x9, [x0, #48] + stnp x10, x11, [x0, #64] + stnp x12, x13, [x0, #80] + stnp x14, x15, [x0, #96] + stnp x16, x17, [x0, #112] + ret ENDPROC(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7512bbb..21faae6 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -37,7 +37,7 @@ .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val @@ -45,7 +45,7 @@ .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val @@ -53,7 +53,7 @@ .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val @@ -61,16 +61,16 @@ .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 ENTRY(__copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index ffbdec0..2a4e239 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -211,7 +211,7 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) .Lunequal_proc: cbz diff, .Lremain8 -/*There is differnence occured in the latest comparison.*/ +/* There is difference occurred in the latest comparison. */ .Lnot_limit: /* * For little endian,reverse the low significant equal bits into MSB,then diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e87f53f..c90c3c5 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -24,6 +24,7 @@ #include <asm/cpufeature.h> #include <asm/mmu_context.h> +#include <asm/smp.h> #include <asm/tlbflush.h> static u32 asid_bits; @@ -40,6 +41,45 @@ static cpumask_t tlb_flush_pending; #define ASID_FIRST_VERSION (1UL << asid_bits) #define NUM_USER_ASIDS ASID_FIRST_VERSION +/* Get the ASIDBits supported by the current CPU */ +static u32 get_cpu_asid_bits(void) +{ + u32 asid; + int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), + ID_AA64MMFR0_ASID_SHIFT); + + switch (fld) { + default: + pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n", + smp_processor_id(), fld); + /* Fallthrough */ + case 0: + asid = 8; + break; + case 2: + asid = 16; + } + + return asid; +} + +/* Check if the current cpu's ASIDBits is compatible with asid_bits */ +void verify_cpu_asid_bits(void) +{ + u32 asid = get_cpu_asid_bits(); + + if (asid < asid_bits) { + /* + * We cannot decrease the ASID size at runtime, so panic if we support + * fewer ASID bits than the boot CPU. + */ + pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n", + smp_processor_id(), asid, asid_bits); + update_cpu_boot_status(CPU_PANIC_KERNEL); + cpu_park_loop(); + } +} + static void flush_context(unsigned int cpu) { int i; @@ -187,19 +227,7 @@ switch_mm_fastpath: static int asids_init(void) { - int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); - - switch (fld) { - default: - pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); - /* Fallthrough */ - case 0: - asid_bits = 8; - break; - case 2: - asid_bits = 16; - } - + asid_bits = get_cpu_asid_bits(); /* If we end up with more CPUs than ASIDs, expect things to crash */ WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0adbebb..f9271cb 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -27,15 +27,15 @@ #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> -#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) - struct addr_marker { unsigned long start_address; const char *name; }; enum address_markers_idx { - VMALLOC_START_NR = 0, + MODULES_START_NR = 0, + MODULES_END_NR, + VMALLOC_START_NR, VMALLOC_END_NR, #ifdef CONFIG_SPARSEMEM_VMEMMAP VMEMMAP_START_NR, @@ -45,12 +45,12 @@ enum address_markers_idx { FIXADDR_END_NR, PCI_START_NR, PCI_END_NR, - MODULES_START_NR, - MODULES_END_NR, KERNEL_SPACE_NR, }; static struct addr_marker address_markers[] = { + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() Area" }, { VMALLOC_END, "vmalloc() End" }, #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -61,9 +61,7 @@ static struct addr_marker address_markers[] = { { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, { PCI_IO_END, "PCI I/O end" }, - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { PAGE_OFFSET, "Kernel Mapping" }, + { PAGE_OFFSET, "Linear Mapping" }, { -1, NULL }, }; @@ -90,6 +88,11 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { .mask = PTE_USER, .val = PTE_USER, .set = "USR", @@ -316,7 +319,7 @@ static int ptdump_show(struct seq_file *m, void *v) .marker = address_markers, }; - walk_pgd(&st, &init_mm, LOWEST_ADDR); + walk_pgd(&st, &init_mm, VA_START); note_page(&st, 0, 0, 0); return 0; diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 7944427..81acd47 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(instruction_pointer(regs)); if (fixup) - regs->pc = fixup->fixup; + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; return fixup != NULL; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index abe2a95..95df28b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -192,6 +192,14 @@ out: return fault; } +static inline int permission_fault(unsigned int esr) +{ + unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -225,12 +233,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - /* - * PAN bit set implies the fault happened in kernel space, but not - * in the arch's user access functions. - */ - if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) - goto no_context; + if (permission_fault(esr) && (addr < USER_DS)) { + if (get_fs() == KERNEL_DS) + die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + + if (!search_exception_tables(regs->pc)) + die("Accessing user space memory outside uaccess.h routines", regs, esr); + } /* * As per x86, we may deadlock here. However, since the kernel only @@ -295,7 +304,7 @@ retry: up_read(&mm->mmap_sem); /* - * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR + * Handle the "normal" case first - VM_FAULT_MAJOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) @@ -568,3 +577,16 @@ void cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); } #endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_UAO +/* + * Kernel threads have fs=KERNEL_DS by default, and don't need to call + * set_fs(), devtmpfs in particular relies on this behaviour. + * We need to enable the feature at runtime (instead of adding it to + * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. + */ +void cpu_enable_uao(void *__unused) +{ + asm(SET_PSTATE_UAO(1)); +} +#endif /* CONFIG_ARM64_UAO */ diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 82d607c..589fd28 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -124,7 +124,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, * will be no pte_unmap() to correspond with this * pte_alloc_map(). */ - pte = pte_alloc_map(mm, NULL, pmd, addr); + pte = pte_alloc_map(mm, pmd, addr); } else if (sz == PMD_SIZE) { if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && pud_none(*pud)) @@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; @@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); - -#ifdef CONFIG_ARM64_64K_PAGES -static __init int add_default_hugepagesz(void) -{ - if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PMD_SHIFT); - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7802f21..61a38ea 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,7 +35,10 @@ #include <linux/efi.h> #include <linux/swiotlb.h> +#include <asm/boot.h> #include <asm/fixmap.h> +#include <asm/kasan.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> #include <asm/sections.h> #include <asm/setup.h> @@ -45,7 +48,13 @@ #include "mm.h" -phys_addr_t memstart_addr __read_mostly = 0; +/* + * We need to be able to catch inadvertent references to memstart_addr + * that occur (potentially in generic code) before arm64_memblock_init() + * executes, which assigns it its actual value. So use a default value + * that cannot be mistaken for a real physical address. + */ +s64 memstart_addr __read_mostly = -1; phys_addr_t arm64_dma_phys_limit __read_mostly; #ifdef CONFIG_BLK_DEV_INITRD @@ -58,8 +67,8 @@ static int __init early_initrd(char *p) if (*endp == ',') { size = memparse(endp + 1, NULL); - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(start + size); + initrd_start = start; + initrd_end = start + size; } return 0; } @@ -159,7 +168,57 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { - memblock_enforce_memory_limit(memory_limit); + const s64 linear_region_size = -(s64)PAGE_OFFSET; + + /* + * Ensure that the linear region takes up exactly half of the kernel + * virtual address space. This way, we can distinguish a linear address + * from a kernel/module/vmalloc address by testing a single bit. + */ + BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); + + /* + * Select a suitable value for the base of physical memory. + */ + memstart_addr = round_down(memblock_start_of_DRAM(), + ARM64_MEMSTART_ALIGN); + + /* + * Remove the memory that we will not be able to cover with the + * linear mapping. Take care not to clip the kernel which may be + * high in memory. + */ + memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), + ULLONG_MAX); + if (memblock_end_of_DRAM() > linear_region_size) + memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + + /* + * Apply the memory limit if it was set. Since the kernel may be loaded + * high up in memory, add back the kernel region that must be accessible + * via the linear mapping. + */ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + memblock_enforce_memory_limit(memory_limit); + memblock_add(__pa(_text), (u64)(_end - _text)); + } + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern u16 memstart_offset_seed; + u64 range = linear_region_size - + (memblock_end_of_DRAM() - memblock_start_of_DRAM()); + + /* + * If the size of the linear region exceeds, by a sufficient + * margin, the size of the region that the available physical + * memory spans, randomize the linear region as well. + */ + if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) { + range = range / ARM64_MEMSTART_ALIGN + 1; + memstart_addr -= ARM64_MEMSTART_ALIGN * + ((range * memstart_offset_seed) >> 16); + } + } /* * Register the kernel text, kernel data, initrd, and initial @@ -167,8 +226,13 @@ void __init arm64_memblock_init(void) */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); + if (initrd_start) { + memblock_reserve(initrd_start, initrd_end - initrd_start); + + /* the generic initrd code expects virtual addresses */ + initrd_start = __phys_to_virt(initrd_start); + initrd_end = __phys_to_virt(initrd_end); + } #endif early_init_fdt_scan_reserved_mem(); @@ -302,35 +366,38 @@ void __init mem_init(void) #ifdef CONFIG_KASAN " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" #endif + " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld KB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" - " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" - " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + " memory : 0x%16lx - 0x%16lx (%6ld MB)\n", #ifdef CONFIG_KASAN MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), #endif + MLM(MODULES_VADDR, MODULES_END), MLG(VMALLOC_START, VMALLOC_END), + MLK_ROUNDUP(_text, __start_rodata), + MLK_ROUNDUP(__start_rodata, _etext), + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_sdata, _edata), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE), - MLM((unsigned long)virt_to_page(PAGE_OFFSET), + MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory)), #endif MLK(FIXADDR_START, FIXADDR_TOP), MLM(PCI_IO_START, PCI_IO_END), - MLM(MODULES_VADDR, MODULES_END), - MLM(PAGE_OFFSET, (unsigned long)high_memory), - MLK_ROUNDUP(__init_begin, __init_end), - MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(_sdata, _edata)); + MLM(__phys_to_virt(memblock_start_of_DRAM()), + (unsigned long)high_memory)); #undef MLK #undef MLM @@ -343,8 +410,6 @@ void __init mem_init(void) #ifdef CONFIG_COMPAT BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif - BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR); - BUG_ON(TASK_SIZE_64 > MODULES_VADDR); if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; @@ -358,8 +423,8 @@ void __init mem_init(void) void free_initmem(void) { - fixup_init(); free_initmem_default(0); + fixup_init(); } #ifdef CONFIG_BLK_DEV_INITRD @@ -380,3 +445,28 @@ static int __init keepinitrd_setup(char *__unused) __setup("keepinitrd", keepinitrd_setup); #endif + +/* + * Dump out memory limit information on panic. + */ +static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p) +{ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); + } else { + pr_emerg("Memory Limit: none\n"); + } + return 0; +} + +static struct notifier_block mem_limit_notifier = { + .notifier_call = dump_mem_limit, +}; + +static int __init register_mem_limit_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &mem_limit_notifier); + return 0; +} +__initcall(register_mem_limit_dumper); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cab7a5b..757009d 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -16,9 +16,12 @@ #include <linux/memblock.h> #include <linux/start_kernel.h> +#include <asm/mmu_context.h> +#include <asm/kernel-pgtable.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> +#include <asm/sections.h> #include <asm/tlbflush.h> static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); @@ -32,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, if (pmd_none(*pmd)) pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); - pte = pte_offset_kernel(pmd, addr); + pte = pte_offset_kimg(pmd, addr); do { next = addr + PAGE_SIZE; set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), @@ -50,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud, if (pud_none(*pud)) pud_populate(&init_mm, pud, kasan_zero_pmd); - pmd = pmd_offset(pud, addr); + pmd = pmd_offset_kimg(pud, addr); do { next = pmd_addr_end(addr, end); kasan_early_pte_populate(pmd, addr, next); @@ -67,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd, if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, kasan_zero_pud); - pud = pud_offset(pgd, addr); + pud = pud_offset_kimg(pgd, addr); do { next = pud_addr_end(addr, end); kasan_early_pmd_populate(pud, addr, next); @@ -96,6 +99,21 @@ asmlinkage void __init kasan_early_init(void) kasan_map_early_shadow(); } +/* + * Copy the current shadow region into a new pgdir. + */ +void __init kasan_copy_shadow(pgd_t *pgdir) +{ + pgd_t *pgd, *pgd_new, *pgd_end; + + pgd = pgd_offset_k(KASAN_SHADOW_START); + pgd_end = pgd_offset_k(KASAN_SHADOW_END); + pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); + do { + set_pgd(pgd_new, *pgd); + } while (pgd++, pgd_new++, pgd != pgd_end); +} + static void __init clear_pgds(unsigned long start, unsigned long end) { @@ -108,20 +126,19 @@ static void __init clear_pgds(unsigned long start, set_pgd(pgd_offset_k(start), __pgd(0)); } -static void __init cpu_set_ttbr1(unsigned long ttbr1) -{ - asm( - " msr ttbr1_el1, %0\n" - " isb" - : - : "r" (ttbr1)); -} - void __init kasan_init(void) { + u64 kimg_shadow_start, kimg_shadow_end; + u64 mod_shadow_start, mod_shadow_end; struct memblock_region *reg; int i; + kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); + kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); + + mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); + mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call bellow). @@ -130,13 +147,33 @@ void __init kasan_init(void) * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); - cpu_set_ttbr1(__pa(tmp_pg_dir)); - flush_tlb_all(); + dsb(ishst); + cpu_replace_ttbr1(tmp_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + vmemmap_populate(kimg_shadow_start, kimg_shadow_end, + pfn_to_nid(virt_to_pfn(_text))); + + /* + * vmemmap_populate() has populated the shadow region that covers the + * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round + * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent + * kasan_populate_zero_shadow() from replacing the page table entries + * (PMD or PTE) at the edges of the shadow region for the kernel + * image. + */ + kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE); + kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, - kasan_mem_to_shadow((void *)MODULES_VADDR)); + (void *)mod_shadow_start); + kasan_populate_zero_shadow((void *)kimg_shadow_end, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_zero_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); for_each_memblock(memory, reg) { void *start = (void *)__phys_to_virt(reg->base); @@ -165,8 +202,7 @@ void __init kasan_init(void) pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); memset(kasan_zero_page, 0, PAGE_SIZE); - cpu_set_ttbr1(__pa(swapper_pg_dir)); - flush_tlb_all(); + cpu_replace_ttbr1(swapper_pg_dir); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 58faeaa..d2d8b8c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -30,8 +30,10 @@ #include <linux/slab.h> #include <linux/stop_machine.h> +#include <asm/barrier.h> #include <asm/cputype.h> #include <asm/fixmap.h> +#include <asm/kasan.h> #include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> @@ -44,13 +46,20 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 kimage_voffset __read_mostly; +EXPORT_SYMBOL(kimage_voffset); + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_alloc(unsigned long sz) +static phys_addr_t __init early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; - phys = memblock_alloc(sz, sz); + phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); - ptr = __va(phys); - memset(ptr, 0, sz); - return ptr; + + /* + * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ + ptr = pte_set_fixmap(phys); + + memset(ptr, 0, PAGE_SIZE); + + /* + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ + pte_clear_fixmap(); + + return phys; } /* @@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + phys_addr_t pte_phys; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(); + pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); - __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); flush_tlb_all(); + pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); - pte = pte_offset_kernel(pmd, addr); + pte = pte_set_fixmap_offset(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + + pte_clear_fixmap(); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, +#ifdef CONFIG_DEBUG_PAGEALLOC +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + + /* + * If debug_page_alloc is enabled we must map the linear map + * using pages. However, other mappings created by + * create_mapping_noalloc must use sections in some cases. Allow + * sections to be used in those cases, where no pgtable_alloc + * function is provided. + */ + return !pgtable_alloc || !debug_pagealloc_enabled(); +} +#else +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + return true; +} +#endif + +static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); + pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -147,16 +198,18 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, */ split_pud(pud, pmd); } - pud_populate(mm, pud, pmd); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); flush_tlb_all(); + pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); - pmd = pmd_offset(pud, addr); + pmd = pmd_set_fixmap_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) { + if (((addr | next | phys) & ~SECTION_MASK) == 0 && + block_mappings_allowed(pgtable_alloc)) { pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | pgprot_val(mk_sect_prot(prot)))); @@ -167,17 +220,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { - phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + phys_addr_t table = pmd_page_paddr(old_pmd); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + prot, pgtable_alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); + + pmd_clear_fixmap(); } static inline bool use_1G_block(unsigned long addr, unsigned long next, @@ -192,28 +247,30 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, +static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(mm, pgd, pud); + phys_addr_t pud_phys; + BUG_ON(!pgtable_alloc); + pud_phys = pgtable_alloc(); + __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + pud = pud_set_fixmap_offset(pgd, addr); do { next = pud_addr_end(addr, end); /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys)) { + if (use_1G_block(addr, next, phys) && + block_mappings_allowed(pgtable_alloc)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); @@ -228,26 +285,28 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + phys_addr_t table = pud_page_paddr(old_pud); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + alloc_init_pmd(pud, addr, next, phys, prot, + pgtable_alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); + + pud_clear_fixmap(); } /* * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, - phys_addr_t phys, unsigned long virt, +static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -265,22 +324,35 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void *late_alloc(unsigned long size) +static phys_addr_t late_pgtable_alloc(void) { - void *ptr; - - BUG_ON(size > PAGE_SIZE); - ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); - return ptr; + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*alloc)(void)) +{ + init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); +} + +/* + * This function can only be used to modify existing table entries, + * without allocating new levels of table. Note that this permits the + * creation of new section or page entries. + */ +static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { @@ -288,16 +360,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, - size, prot, early_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + NULL); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_alloc); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -309,69 +381,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt), - phys, virt, size, prot, late_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + late_pgtable_alloc); } -#ifdef CONFIG_DEBUG_RODATA -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { + unsigned long kernel_start = __pa(_stext); + unsigned long kernel_end = __pa(_etext); + /* - * Set up the executable regions using the existing section mappings - * for now. This will get more fine grained later once all memory - * is mapped + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. */ - unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); - - if (end < kernel_x_start) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else if (start >= kernel_x_end) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else { - if (start < kernel_x_start) - create_mapping(start, __phys_to_virt(start), - kernel_x_start - start, - PAGE_KERNEL); - create_mapping(kernel_x_start, - __phys_to_virt(kernel_x_start), - kernel_x_end - kernel_x_start, - PAGE_KERNEL_EXEC); - if (kernel_x_end < end) - create_mapping(kernel_x_end, - __phys_to_virt(kernel_x_end), - end - kernel_x_end, - PAGE_KERNEL); + + /* No overlap with the kernel text */ + if (end < kernel_start || start >= kernel_end) { + __create_pgd_mapping(pgd, start, __phys_to_virt(start), + end - start, PAGE_KERNEL, + early_pgtable_alloc); + return; } + /* + * This block overlaps the kernel text mapping. + * Map the portion(s) which don't overlap. + */ + if (start < kernel_start) + __create_pgd_mapping(pgd, start, + __phys_to_virt(start), + kernel_start - start, PAGE_KERNEL, + early_pgtable_alloc); + if (kernel_end < end) + __create_pgd_mapping(pgd, kernel_end, + __phys_to_virt(kernel_end), + end - kernel_end, PAGE_KERNEL, + early_pgtable_alloc); + + /* + * Map the linear alias of the [_stext, _etext) interval as + * read-only/non-executable. This makes the contents of the + * region accessible to subsystems such as hibernate, but + * protects it from inadvertent modification or execution. + */ + __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), + kernel_end - kernel_start, PAGE_KERNEL_RO, + early_pgtable_alloc); } -#else -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) -{ - create_mapping(start, __phys_to_virt(start), end - start, - PAGE_KERNEL_EXEC); -} -#endif -static void __init map_mem(void) +static void __init map_mem(pgd_t *pgd) { struct memblock_region *reg; - phys_addr_t limit; - - /* - * Temporarily limit the memblock range. We need to do this as - * create_mapping requires puds, pmds and ptes to be allocated from - * memory addressable from the initial direct kernel mapping. - * - * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, - * memory starting from PHYS_OFFSET (which must be aligned to 2MB as - * per Documentation/arm64/booting.txt). - */ - limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; - memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -383,69 +443,94 @@ static void __init map_mem(void) if (memblock_is_nomap(reg)) continue; - if (ARM64_SWAPPER_USES_SECTION_MAPS) { - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. With - * the section maps, if the first block doesn't end on section - * size boundary, create_mapping() will try to allocate a pte - * page, which may be returned from an unmapped area. - * When section maps are not used, the pte page table for the - * current limit is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, SECTION_SIZE); - if (end < limit) { - limit = end & SECTION_MASK; - memblock_set_current_limit(limit); - } - } - __map_memblock(start, end); + __map_memblock(pgd, start, end); } - - /* Limit no longer required. */ - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } -static void __init fixup_executable(void) +void mark_rodata_ro(void) { -#ifdef CONFIG_DEBUG_RODATA - /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_start = round_down(__pa(_stext), - SWAPPER_BLOCK_SIZE); + unsigned long section_size; - create_mapping(aligned_start, __phys_to_virt(aligned_start), - __pa(_stext) - aligned_start, - PAGE_KERNEL); - } + section_size = (unsigned long)__start_rodata - (unsigned long)_stext; + create_mapping_late(__pa(_stext), (unsigned long)_stext, + section_size, PAGE_KERNEL_ROX); + /* + * mark .rodata as read only. Use _etext rather than __end_rodata to + * cover NOTES and EXCEPTION_TABLE. + */ + section_size = (unsigned long)_etext - (unsigned long)__start_rodata; + create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, + section_size, PAGE_KERNEL_RO); +} - if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_end = round_up(__pa(__init_end), - SWAPPER_BLOCK_SIZE); - create_mapping(__pa(__init_end), (unsigned long)__init_end, - aligned_end - __pa(__init_end), - PAGE_KERNEL); - } -#endif +void fixup_init(void) +{ + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void) +static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma) { - create_mapping_late(__pa(_stext), (unsigned long)_stext, - (unsigned long)_etext - (unsigned long)_stext, - PAGE_KERNEL_ROX); + phys_addr_t pa_start = __pa(va_start); + unsigned long size = va_end - va_start; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc); + + vma->addr = va_start; + vma->phys_addr = pa_start; + vma->size = size; + vma->flags = VM_MAP; + vma->caller = __builtin_return_address(0); + vm_area_add_early(vma); } -#endif -void fixup_init(void) +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgd) { - create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, - (unsigned long)__init_end - (unsigned long)__init_begin, - PAGE_KERNEL); + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; + + map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + + if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't + * live in the carveout for the swapper_pg_dir. We can simply + * re-use the existing dir for the fixmap. + */ + set_pgd(pgd_offset_raw(pgd, FIXADDR_START), + *pgd_offset_k(FIXADDR_START)); + } else if (CONFIG_PGTABLE_LEVELS > 3) { + /* + * The fixmap shares its top level pgd entry with the kernel + * mapping. This can really only occur when we are running + * with 16k/4 levels, so we can simply reuse the pud level + * entry instead. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), + __pud(__pa(bm_pmd) | PUD_TYPE_TABLE)); + pud_clear_fixmap(); + } else { + BUG(); + } + + kasan_copy_shadow(pgd); } /* @@ -454,28 +539,35 @@ void fixup_init(void) */ void __init paging_init(void) { - void *zero_page; + phys_addr_t pgd_phys = early_pgtable_alloc(); + pgd_t *pgd = pgd_set_fixmap(pgd_phys); - map_mem(); - fixup_executable(); + map_kernel(pgd); + map_mem(pgd); - /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); - - bootmem_init(); - - empty_zero_page = virt_to_page(zero_page); + /* + * We want to reuse the original swapper_pg_dir so we don't have to + * communicate the new address to non-coherent secondaries in + * secondary_entry, and so cpu_switch_mm can generate the address with + * adrp+add rather than a load from some global variable. + * + * To do this we need to go via a temporary pgd. + */ + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); + pgd_clear_fixmap(); + memblock_free(pgd_phys, PAGE_SIZE); /* - * TTBR0 is only used for the identity mapping at this stage. Make it - * point to zero page to avoid speculatively fetching new entries. + * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd + * allocated with it. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, + SWAPPER_DIR_SIZE - PAGE_SIZE); + + bootmem_init(); } /* @@ -562,21 +654,13 @@ void vmemmap_free(unsigned long start, unsigned long end) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - static inline pud_t * fixmap_pud(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - return pud_offset(pgd, addr); + return pud_offset_kimg(pgd, addr); } static inline pmd_t * fixmap_pmd(unsigned long addr) @@ -585,16 +669,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr) BUG_ON(pud_none(*pud) || pud_bad(*pud)); - return pmd_offset(pud, addr); + return pmd_offset_kimg(pud, addr); } static inline pte_t * fixmap_pte(unsigned long addr) { - pmd_t *pmd = fixmap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); + return &bm_pte[pte_index(addr)]; } void __init early_fixmap_init(void) @@ -605,15 +685,26 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); + if (CONFIG_PGTABLE_LEVELS > 3 && + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { + /* + * We only end up here if the kernel mapping and the fixmap + * share the top level pgd entry, which should only happen on + * 16k/4 levels configurations. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + pud = pud_offset_kimg(pgd, addr); + } else { + pgd_populate(&init_mm, pgd, bm_pud); + pud = fixmap_pud(addr); + } pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); + pmd = fixmap_pmd(addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which - * we are not preparted: + * we are not prepared: */ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); @@ -652,11 +743,10 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL_RO; - int size, offset; + int offset; void *dt_virt; /* @@ -673,7 +763,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) /* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe - * to call create_mapping() this early. + * to call create_mapping_noalloc() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. @@ -689,21 +779,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - SWAPPER_BLOCK_SIZE, prot); + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), + dt_virt_base, SWAPPER_BLOCK_SIZE, prot); if (fdt_check_header(dt_virt) != 0) return NULL; - size = fdt_totalsize(dt_virt); - if (size > MAX_FDT_SIZE) + *size = fdt_totalsize(dt_virt); + if (*size > MAX_FDT_SIZE) return NULL; - if (offset + size > SWAPPER_BLOCK_SIZE) - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); + if (offset + *size > SWAPPER_BLOCK_SIZE) + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); - memblock_reserve(dt_phys, size); + return dt_virt; +} + +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +{ + void *dt_virt; + int size; + + dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + if (!dt_virt) + return NULL; + memblock_reserve(dt_phys, size); return dt_virt; } + +int __init arch_ioremap_pud_supported(void) +{ + /* only 4k granule supports level 1 block mappings */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return 1; +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PUD_MASK); + set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PMD_MASK); + set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_sect(*pud)) + return 0; + pud_clear(pud); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_sect(*pmd)) + return 0; + pmd_clear(pmd); + return 1; +} diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0795c3a..ca6d268 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, return 0; } +/* + * This function assumes that the range is mapped with PAGE_SIZE pages. + */ +static int __change_memory_common(unsigned long start, unsigned long size, + pgprot_t set_mask, pgprot_t clear_mask) +{ + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + return ret; +} + static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; - int ret; - struct page_change_data data; struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { @@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages, if (!numpages) return 0; - data.set_mask = set_mask; - data.clear_mask = clear_mask; - - ret = apply_to_page_range(&init_mm, start, size, change_page_range, - &data); - - flush_tlb_kernel_range(start, end); - return ret; + return __change_memory_common(start, size, set_mask, clear_mask); } int set_memory_ro(unsigned long addr, int numpages) @@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(PTE_PXN)); } EXPORT_SYMBOL_GPL(set_memory_x); + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long addr = (unsigned long) page_address(page); + + if (enable) + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(PTE_VALID), + __pgprot(0)); + else + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(0), + __pgprot(PTE_VALID)); +} +#endif diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c164d2c..543f519 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -25,6 +25,8 @@ #include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> #include "proc-macros.S" @@ -137,9 +139,47 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb +alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 ret + nop + nop + nop +alternative_else + ic iallu + dsb nsh + isb + ret +alternative_endif ENDPROC(cpu_do_switch_mm) + .pushsection ".idmap.text", "ax" +/* + * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * + * This is the low-level counterpart to cpu_replace_ttbr1, and should not be + * called by anything else. It can only be executed from a TTBR0 mapping. + */ +ENTRY(idmap_cpu_replace_ttbr1) + mrs x2, daif + msr daifset, #0xf + + adrp x1, empty_zero_page + msr ttbr1_el1, x1 + isb + + tlbi vmalle1 + dsb nsh + isb + + msr ttbr1_el1, x0 + isb + + msr daif, x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection + /* * __cpu_setup * diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c index 83d896c..718a6d7 100644 --- a/arch/avr32/boards/merisc/setup.c +++ b/arch/avr32/boards/merisc/setup.c @@ -27,7 +27,6 @@ #include <asm/io.h> #include <asm/setup.h> -#include <asm/gpio.h> #include <mach/at32ap700x.h> #include <mach/board.h> diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h index 366bbea..572739b 100644 --- a/arch/avr32/include/asm/cmpxchg.h +++ b/arch/avr32/include/asm/cmpxchg.h @@ -57,7 +57,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, " brne 1b\n" "2:\n" : [ret] "=&r"(ret), [m] "=m"(*m) - : "m"(m), [old] "ir"(old), [new] "r"(new) + : "m"(m), [old] "Ks21r"(old), [new] "r"(new) : "memory", "cc"); return ret; } diff --git a/arch/avr32/include/asm/pci.h b/arch/avr32/include/asm/pci.h index a32a023..0f5f134 100644 --- a/arch/avr32/include/asm/pci.h +++ b/arch/avr32/include/asm/pci.h @@ -5,6 +5,4 @@ #define PCI_DMA_BUS_IS_PHYS (1) -#include <asm-generic/pci-dma-compat.h> - #endif /* __ASM_AVR32_PCI_H__ */ diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h index b60132b..60c0f3a 100644 --- a/arch/avr32/include/uapi/asm/unistd.h +++ b/arch/avr32/include/uapi/asm/unistd.h @@ -337,5 +337,6 @@ #define __NR_userfaultfd 322 #define __NR_membarrier 323 #define __NR_mlock2 324 +#define __NR_copy_file_range 325 #endif /* _UAPI__ASM_AVR32_UNISTD_H */ diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index 209ae5a..e692889 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata kernel_code = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, .sibling = &kernel_data, }; @@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end) new->start = start; new->end = end; new->name = "System RAM"; - new->flags = IORESOURCE_MEM; + new->flags = IORESOURCE_SYSTEM_RAM; *pprev = new; } diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S index f9c68fa..cb39915 100644 --- a/arch/avr32/kernel/syscall-stubs.S +++ b/arch/avr32/kernel/syscall-stubs.S @@ -124,3 +124,12 @@ __sys_process_vm_writev: call sys_process_vm_writev sub sp, -4 popm pc + + .global __sys_copy_file_range + .type __sys_copy_file_range,@function +__sys_copy_file_range: + pushm lr + st.w --sp, ARG6 + call sys_copy_file_range + sub sp, -4 + popm pc diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 1915a443..64d71a7 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -338,4 +338,5 @@ sys_call_table: .long sys_userfaultfd .long sys_membarrier .long sys_mlock2 + .long __sys_copy_file_range /* 325 */ .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 5020057..83c2a00 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -14,8 +14,8 @@ #include <linux/fs.h> #include <linux/platform_device.h> #include <linux/irq.h> +#include <linux/gpio.h> -#include <asm/gpio.h> #include <asm/io.h> #include <mach/portmux.h> diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index af76634..a63c122 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -1233,8 +1233,6 @@ source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - endmenu menu "Executable file formats" diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h index 14efc0d..11ea1cb 100644 --- a/arch/blackfin/include/asm/pci.h +++ b/arch/blackfin/include/asm/pci.h @@ -4,7 +4,6 @@ #define _ASM_BFIN_PCI_H #include <linux/scatterlist.h> -#include <asm-generic/pci-dma-compat.h> #include <asm-generic/pci.h> #define PCIBIOS_MIN_IO 0x00001000 diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h index b88a155..c1ee3d6 100644 --- a/arch/blackfin/include/asm/pgtable.h +++ b/arch/blackfin/include/asm/pgtable.h @@ -97,6 +97,8 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsigned long); #define HAVE_ARCH_FB_UNMAPPED_AREA +#define pgprot_writecombine pgprot_noncached + #include <asm-generic/pgtable.h> #endif /* _BLACKFIN_PGTABLE_H */ diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index a017359..c5d3128 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -11,6 +11,8 @@ #include <linux/err.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/gpio/driver.h> +/* FIXME: consumer API required for gpio_set_value() etc, get rid of this */ #include <linux/gpio.h> #include <linux/irq.h> @@ -1159,7 +1161,7 @@ static int bfin_gpiolib_direction_output(struct gpio_chip *chip, unsigned gpio, static int bfin_gpiolib_get_value(struct gpio_chip *chip, unsigned gpio) { - return bfin_gpio_get_value(gpio); + return !!bfin_gpio_get_value(gpio); } static void bfin_gpiolib_set_value(struct gpio_chip *chip, unsigned gpio, int value) @@ -1197,7 +1199,7 @@ static struct gpio_chip bfin_chip = { static int __init bfin_gpiolib_setup(void) { - return gpiochip_add(&bfin_chip); + return gpiochip_add_data(&bfin_chip, NULL); } arch_initcall(bfin_gpiolib_setup); #endif diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c index 86b1cd3..e272bca 100644 --- a/arch/blackfin/kernel/debug-mmrs.c +++ b/arch/blackfin/kernel/debug-mmrs.c @@ -11,9 +11,9 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/i2c/bfin_twi.h> +#include <linux/gpio.h> #include <asm/blackfin.h> -#include <asm/gpio.h> #include <asm/gptimers.h> #include <asm/bfin_can.h> #include <asm/bfin_dma.h> diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c index a3a5723..80bcfd1 100644 --- a/arch/blackfin/mach-bf527/boards/ezbrd.c +++ b/arch/blackfin/mach-bf527/boards/ezbrd.c @@ -279,7 +279,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = { #endif #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879) -#include <linux/spi/ad7879.h> +#include <linux/platform_data/ad7879.h> static const struct ad7879_platform_data bfin_ad7879_ts_info = { .model = 7879, /* Model = AD7879 */ .x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */ diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c index d4219e8..571edfd 100644 --- a/arch/blackfin/mach-bf527/boards/ezkit.c +++ b/arch/blackfin/mach-bf527/boards/ezkit.c @@ -477,7 +477,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = { #endif #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879) -#include <linux/spi/ad7879.h> +#include <linux/platform_data/ad7879.h> static const struct ad7879_platform_data bfin_ad7879_ts_info = { .model = 7879, /* Model = AD7879 */ .x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */ diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c index a0f5856..c1acce4 100644 --- a/arch/blackfin/mach-bf527/boards/tll6527m.c +++ b/arch/blackfin/mach-bf527/boards/tll6527m.c @@ -29,7 +29,7 @@ #include <asm/dpmc.h> #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879) -#include <linux/spi/ad7879.h> +#include <linux/platform_data/ad7879.h> #define LCD_BACKLIGHT_GPIO 0x40 /* TLL6527M uses TLL7UIQ35 / ADI LCD EZ Extender. AD7879 AUX GPIO is used for * LCD Backlight Enable diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index c181543..eaec7b4 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -776,7 +776,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = { #endif #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879) -#include <linux/spi/ad7879.h> +#include <linux/platform_data/ad7879.h> static const struct ad7879_platform_data bfin_ad7879_ts_info = { .model = 7879, /* Model = AD7879 */ .x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */ diff --git a/arch/blackfin/mach-bf538/boards/ezkit.c b/arch/blackfin/mach-bf538/boards/ezkit.c index ae2fcbb..1b6a52a 100644 --- a/arch/blackfin/mach-bf538/boards/ezkit.c +++ b/arch/blackfin/mach-bf538/boards/ezkit.c @@ -15,9 +15,9 @@ #include <linux/spi/flash.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/gpio.h> #include <asm/bfin5xx_spi.h> #include <asm/dma.h> -#include <asm/gpio.h> #include <asm/nand.h> #include <asm/portmux.h> #include <asm/dpmc.h> @@ -521,7 +521,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = { #endif /* CONFIG_SPI_BFIN5XX */ #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879) -#include <linux/spi/ad7879.h> +#include <linux/platform_data/ad7879.h> static const struct ad7879_platform_data bfin_ad7879_ts_info = { .model = 7879, /* Model = AD7879 */ .x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */ diff --git a/arch/blackfin/mach-bf538/ext-gpio.c b/arch/blackfin/mach-bf538/ext-gpio.c index 471a9b1..48c1002 100644 --- a/arch/blackfin/mach-bf538/ext-gpio.c +++ b/arch/blackfin/mach-bf538/ext-gpio.c @@ -8,8 +8,8 @@ #include <linux/module.h> #include <linux/err.h> +#include <linux/gpio.h> #include <asm/blackfin.h> -#include <asm/gpio.h> #include <asm/portmux.h> #define DEFINE_REG(reg, off) \ @@ -116,9 +116,9 @@ static struct gpio_chip bf538_porte_chip = { static int __init bf538_extgpio_setup(void) { - return gpiochip_add(&bf538_portc_chip) | - gpiochip_add(&bf538_portd_chip) | - gpiochip_add(&bf538_porte_chip); + return gpiochip_add_data(&bf538_portc_chip, NULL) | + gpiochip_add_data(&bf538_portd_chip, NULL) | + gpiochip_add_data(&bf538_porte_chip, NULL); } arch_initcall(bf538_extgpio_setup); diff --git a/arch/blackfin/mach-bf548/boards/cm_bf548.c b/arch/blackfin/mach-bf548/boards/cm_bf548.c index 6d5ffde..120c994 100644 --- a/arch/blackfin/mach-bf548/boards/cm_bf548.c +++ b/arch/blackfin/mach-bf548/boards/cm_bf548.c @@ -17,9 +17,9 @@ #include <linux/irq.h> #include <linux/interrupt.h> #include <linux/usb/musb.h> +#include <linux/gpio.h> #include <asm/bfin5xx_spi.h> #include <asm/dma.h> -#include <asm/gpio.h> #include <asm/nand.h> #include <asm/portmux.h> #include <asm/bfin_sdh.h> diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 4204b98..3cdd483 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -20,9 +20,9 @@ #include <linux/pinctrl/machine.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/platform_data/pinctrl-adi2.h> +#include <linux/gpio.h> #include <asm/bfin5xx_spi.h> #include <asm/dma.h> -#include <asm/gpio.h> #include <asm/nand.h> #include <asm/dpmc.h> #include <asm/bfin_sport.h> diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index c7928d8..aad5d74 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -21,8 +21,8 @@ #include <linux/pinctrl/pinconf-generic.h> #include <linux/platform_data/pinctrl-adi2.h> #include <linux/spi/adi_spi3.h> +#include <linux/gpio.h> #include <asm/dma.h> -#include <asm/gpio.h> #include <asm/nand.h> #include <asm/dpmc.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index e8d4d74..4986b4fb 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -17,13 +17,13 @@ #include <linux/irq.h> #include <linux/sched.h> #include <linux/syscore_ops.h> +#include <linux/gpio.h> #include <asm/delay.h> #ifdef CONFIG_IPIPE #include <linux/ipipe.h> #endif #include <asm/traps.h> #include <asm/blackfin.h> -#include <asm/gpio.h> #include <asm/irq_handler.h> #include <asm/dpmc.h> #include <asm/traps.h> diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c index a66d979..5ece38a 100644 --- a/arch/blackfin/mach-common/pm.c +++ b/arch/blackfin/mach-common/pm.c @@ -15,9 +15,9 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/delay.h> +#include <linux/gpio.h> #include <asm/cplb.h> -#include <asm/gpio.h> #include <asm/dma.h> #include <asm/dpmc.h> #include <asm/pm.h> diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 0030e21..23c4ef5 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -333,7 +333,7 @@ void secondary_start_kernel(void) /* We are done with local CPU inits, unblock the boot CPU. */ set_cpu_online(cpu, true); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 79049d4..5aa8ea8 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -36,6 +36,7 @@ config GENERIC_HWEIGHT config GENERIC_BUG def_bool y + depends on BUG config C6X_BIG_KERNEL bool "Build a big kernel" diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c index 72e17f7..786e36e 100644 --- a/arch/c6x/kernel/setup.c +++ b/arch/c6x/kernel/setup.c @@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr) */ set_ist(_vectors_start); - lockdep_init(); - /* * dtb is passed in from bootloader. * fdt is linked in blob. diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h index c15b4b4..b1b289d 100644 --- a/arch/cris/include/asm/pci.h +++ b/arch/cris/include/asm/pci.h @@ -48,9 +48,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #endif /* __KERNEL__ */ -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - /* generic pci stuff */ #include <asm-generic/pci.h> diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index e43d22c..809cfc6 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -15,7 +15,6 @@ #include <linux/mm.h> #include <linux/scatterlist.h> -#include <asm-generic/pci-dma-compat.h> #include <asm-generic/pci.h> struct pci_dev; @@ -32,12 +31,6 @@ extern void consistent_sync_page(struct page *page, unsigned long offset, size_t size, int direction); #endif -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle); - -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - /* Return the index of the PCI controller for device PDEV. */ #define pci_controller_num(PDEV) (0) diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h index dbb8259..bce0d0d 100644 --- a/arch/frv/include/asm/serial.h +++ b/arch/frv/include/asm/serial.h @@ -13,6 +13,6 @@ */ #define BASE_BAUD 0 -#define STD_COM_FLAGS ASYNC_BOOT_AUTOCONF +#define STD_COM_FLAGS UPF_BOOT_AUTOCONF #define SERIAL_PORT_DFNS diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index ff759f2..983bae7d2 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -180,7 +180,7 @@ void start_secondary(void) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fb0515e..b534eba 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -574,12 +574,8 @@ config PCI_DOMAINS config PCI_SYSCALL def_bool PCI -source "drivers/pci/pcie/Kconfig" - source "drivers/pci/Kconfig" -source "drivers/pci/hotplug/Kconfig" - source "drivers/pcmcia/Kconfig" endmenu diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/ia64/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index a865d2a..5de673a 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -433,6 +433,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo return ioremap(phys_addr, size); } #define ioremap_cache ioremap_cache +#define ioremap_uc ioremap_nocache /* diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 07039d1..c0835b0 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -50,8 +50,6 @@ struct pci_dev; extern unsigned long ia64_max_iommu_merge_mask; #define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL) -#include <asm-generic/pci-dma-compat.h> - #define HAVE_PCI_MMAP extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 3027e75..ce11247 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -3,7 +3,7 @@ * * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com> * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com> - * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com> + * Copyright (C) 2005 Christoph Lameter <cl@linux.com> * * Based on asm-i386/rwsem.h and other architecture implementation. * diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index caae3f4..300dac3 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, efi_memory_desc_t *md; u64 efi_desc_size; char *name; - unsigned long flags; + unsigned long flags, desc; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; @@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource, continue; flags = IORESOURCE_MEM | IORESOURCE_BUSY; + desc = IORES_DESC_NONE; + switch (md->type) { case EFI_MEMORY_MAPPED_IO: @@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->attribute & EFI_MEMORY_WP) { name = "System ROM"; flags |= IORESOURCE_READONLY; - } else if (md->attribute == EFI_MEMORY_UC) + } else if (md->attribute == EFI_MEMORY_UC) { name = "Uncached RAM"; - else + } else { name = "System RAM"; + flags |= IORESOURCE_SYSRAM; + } break; case EFI_ACPI_MEMORY_NVS: name = "ACPI Non-volatile Storage"; + desc = IORES_DESC_ACPI_NV_STORAGE; break; case EFI_UNUSABLE_MEMORY: @@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, case EFI_PERSISTENT_MEMORY: name = "Persistent Memory"; + desc = IORES_DESC_PERSISTENT_MEMORY; break; case EFI_RESERVED_TYPE: @@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, res->start = md->phys_addr; res->end = md->phys_addr + efi_md_size(md) - 1; res->flags = flags; + res->desc = desc; if (insert_resource(&iomem_resource, res) < 0) kfree(res); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4f118b0..2029a38 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -80,17 +80,17 @@ unsigned long vga_console_membase; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long ia64_max_cacheline_size; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0e76fad..74fe317 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -454,7 +454,7 @@ start_secondary (void *unused) preempt_disable(); smp_callin(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 622772b..e7ae608 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1336,8 +1336,11 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) * Don't call tty_write_message() if we're in the kernel; we might * be holding locks... */ - if (user_mode(regs)) - tty_write_message(current->signal->tty, buf); + if (user_mode(regs)) { + struct tty_struct *tty = get_current_tty(); + tty_write_message(tty, buf); + tty_kref_put(tty); + } buf[len-1] = '\0'; /* drop '\r' */ /* watch for command names containing %s */ printk(KERN_WARNING "%s", buf); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index f50d4b3..85de86d 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) if (pud) { pmd = pmd_alloc(mm, pud, taddr); if (pmd) - pte = pte_alloc_map(mm, NULL, pmd, taddr); + pte = pte_alloc_map(mm, pmd, taddr); } return pte; } diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index fc505d5..41caa99 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -17,14 +17,14 @@ * * The standard boot ROM sequence for an x86 machine uses the BIOS * to select an initial video card for boot display. This boot video - * card will have it's BIOS copied to C0000 in system RAM. + * card will have its BIOS copied to 0xC0000 in system RAM. * IORESOURCE_ROM_SHADOW is used to associate the boot video * card with this copy. On laptops this copy has to be used since * the main ROM may be compressed or combined with another image. * See pci_map_rom() for use of this flag. Before marking the device * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set - * by either arch cde or vga-arbitration, if so only apply the fixup to this - * already determined primary video card. + * by either arch code or vga-arbitration; if so only apply the fixup to this + * already-determined primary video card. */ static void pci_fixup_video(struct pci_dev *pdev) @@ -32,6 +32,7 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_dev *bridge; struct pci_bus *bus; u16 config; + struct resource *res; if ((strcmp(ia64_platform_name, "dig") != 0) && (strcmp(ia64_platform_name, "hpzx1") != 0)) @@ -61,8 +62,18 @@ static void pci_fixup_video(struct pci_dev *pdev) if (!vga_default_device() || pdev == vga_default_device()) { pci_read_config_word(pdev, PCI_COMMAND, &config); if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { - pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); + res = &pdev->resource[PCI_ROM_RESOURCE]; + + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = 0xC0000; + res->end = res->start + 0x20000 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", + res); } } } diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index 0640739..231234c 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -429,7 +429,8 @@ sn_acpi_slot_fixup(struct pci_dev *dev) void __iomem *addr; struct pcidev_info *pcidev_info = NULL; struct sn_irq_info *sn_irq_info = NULL; - size_t image_size, size; + struct resource *res; + size_t size; if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) { panic("%s: Failure obtaining pcidev_info for %s\n", @@ -443,17 +444,20 @@ sn_acpi_slot_fixup(struct pci_dev *dev) * of the shadowed copy, and the actual length of the ROM image. */ size = pci_resource_len(dev, PCI_ROM_RESOURCE); - addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], - size); - image_size = pci_get_rom_size(dev, addr, size); - dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; - dev->resource[PCI_ROM_RESOURCE].end = - (unsigned long) addr + image_size - 1; - dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY; + + res = &dev->resource[PCI_ROM_RESOURCE]; + + pci_disable_rom(dev); + if (res->parent) + release_resource(res); + + res->start = pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]; + res->end = res->start + size - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; } sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); } - EXPORT_SYMBOL(sn_acpi_slot_fixup); diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 1be65eb..c15a41e 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -150,7 +150,8 @@ void sn_io_slot_fixup(struct pci_dev *dev) { int idx; - unsigned long addr, end, size, start; + struct resource *res; + unsigned long addr, size; struct pcidev_info *pcidev_info; struct sn_irq_info *sn_irq_info; int status; @@ -175,55 +176,41 @@ sn_io_slot_fixup(struct pci_dev *dev) /* Copy over PIO Mapped Addresses */ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { - - if (!pcidev_info->pdi_pio_mapped_addr[idx]) { + if (!pcidev_info->pdi_pio_mapped_addr[idx]) continue; - } - start = dev->resource[idx].start; - end = dev->resource[idx].end; - size = end - start; - if (size == 0) { + res = &dev->resource[idx]; + + size = res->end - res->start; + if (size == 0) continue; - } - addr = pcidev_info->pdi_pio_mapped_addr[idx]; - addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; - dev->resource[idx].start = addr; - dev->resource[idx].end = addr + size; + + res->start = pcidev_info->pdi_pio_mapped_addr[idx]; + res->end = addr + size; /* * if it's already in the device structure, remove it before * inserting */ - if (dev->resource[idx].parent && dev->resource[idx].parent->child) - release_resource(&dev->resource[idx]); + if (res->parent && res->parent->child) + release_resource(res); - if (dev->resource[idx].flags & IORESOURCE_IO) - insert_resource(&ioport_resource, &dev->resource[idx]); + if (res->flags & IORESOURCE_IO) + insert_resource(&ioport_resource, res); else - insert_resource(&iomem_resource, &dev->resource[idx]); + insert_resource(&iomem_resource, res); /* - * If ROM, set the actual ROM image size, and mark as - * shadowed in PROM. + * If ROM, mark as shadowed in PROM. */ if (idx == PCI_ROM_RESOURCE) { - size_t image_size; - void __iomem *rom; - - rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), - size + 1); - image_size = pci_get_rom_size(dev, rom, size + 1); - dev->resource[PCI_ROM_RESOURCE].end = - dev->resource[PCI_ROM_RESOURCE].start + - image_size - 1; - dev->resource[PCI_ROM_RESOURCE].flags |= - IORESOURCE_ROM_BIOS_COPY; + pci_disable_rom(dev); + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; } } sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); } - EXPORT_SYMBOL(sn_io_slot_fixup); /* diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 2841c0a..c82b292 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -387,8 +387,6 @@ config ISA source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - endmenu diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index a5ecef7..136c69f 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -70,14 +70,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long memory_start; diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index a468467..f98d2f6 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -432,7 +432,7 @@ int __init start_secondary(void *unused) */ local_flush_tlb_all(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 0d4146f..11fa717 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -59,21 +59,24 @@ void free_initrd_mem(unsigned long, unsigned long); void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned long max_dma; - unsigned long low; unsigned long start_pfn; #ifdef CONFIG_MMU - start_pfn = START_PFN(0); - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = MAX_LOW_PFN(0); - - if (low < max_dma){ - zones_size[ZONE_DMA] = low - start_pfn; - zones_size[ZONE_NORMAL] = 0; - } else { - zones_size[ZONE_DMA] = low - start_pfn; - zones_size[ZONE_NORMAL] = low - max_dma; + { + unsigned long low; + unsigned long max_dma; + + start_pfn = START_PFN(0); + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = MAX_LOW_PFN(0); + + if (low < max_dma) { + zones_size[ZONE_DMA] = low - start_pfn; + zones_size[ZONE_NORMAL] = 0; + } else { + zones_size[ZONE_DMA] = low - start_pfn; + zones_size[ZONE_NORMAL] = low - max_dma; + } } #else zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; diff --git a/arch/m68k/68360/Makefile b/arch/m68k/68360/Makefile deleted file mode 100644 index 591ce42..0000000 --- a/arch/m68k/68360/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# -# Makefile for 68360 machines. -# -model-y := ram -model-$(CONFIG_ROMKERNEL) := rom - -obj-y := config.o commproc.o entry.o ints.o - -extra-y := head.o - -$(obj)/head.o: $(obj)/head-$(model-y).o - ln -sf head-$(model-y).o $(obj)/head.o diff --git a/arch/m68k/68360/commproc.c b/arch/m68k/68360/commproc.c deleted file mode 100644 index 14d7f35..0000000 --- a/arch/m68k/68360/commproc.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * General Purpose functions for the global management of the - * Communication Processor Module. - * - * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> - * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) - * - * In addition to the individual control of the communication - * channels, there are a few functions that globally affect the - * communication processor. - * - * Buffer descriptors must be allocated from the dual ported memory - * space. The allocator for that is here. When the communication - * process is reset, we reclaim the memory available. There is - * currently no deallocator for this memory. - * The amount of space available is platform dependent. On the - * MBX, the EPPC software loads additional microcode into the - * communication processor, and uses some of the DP ram for this - * purpose. Current, the first 512 bytes and the last 256 bytes of - * memory are used. Right now I am conservative and only use the - * memory that can never be used for microcode. If there are - * applications that require more DP ram, we can expand the boundaries - * but then we have to be careful of any downloaded microcode. - * - */ - -/* - * Michael Leslie <mleslie@lineo.com> - * adapted Dan Malek's ppc8xx drivers to M68360 - * - */ - -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <asm/irq.h> -#include <asm/m68360.h> -#include <asm/commproc.h> - -/* #include <asm/page.h> */ -/* #include <asm/pgtable.h> */ -extern void *_quicc_base; -extern unsigned int system_clock; - - -static uint dp_alloc_base; /* Starting offset in DP ram */ -static uint dp_alloc_top; /* Max offset + 1 */ - -#if 0 -static void *host_buffer; /* One page of host buffer */ -static void *host_end; /* end + 1 */ -#endif - -/* struct cpm360_t *cpmp; */ /* Pointer to comm processor space */ - -QUICC *pquicc; -/* QUICC *quicc_dpram; */ /* mleslie - temporary; use extern pquicc elsewhere instead */ - - -/* CPM interrupt vector functions. */ -struct cpm_action { - irq_handler_t handler; - void *dev_id; -}; -static struct cpm_action cpm_vecs[CPMVEC_NR]; -static void cpm_interrupt(int irq, void * dev, struct pt_regs * regs); -static void cpm_error_interrupt(void *); - -/* prototypes: */ -void cpm_install_handler(int vec, irq_handler_t handler, void *dev_id); -void m360_cpm_reset(void); - - - - -void __init m360_cpm_reset() -{ -/* pte_t *pte; */ - - pquicc = (struct quicc *)(_quicc_base); /* initialized in crt0_rXm.S */ - - /* Perform a CPM reset. */ - pquicc->cp_cr = (SOFTWARE_RESET | CMD_FLAG); - - /* Wait for CPM to become ready (should be 2 clocks). */ - while (pquicc->cp_cr & CMD_FLAG); - - /* On the recommendation of the 68360 manual, p. 7-60 - * - Set sdma interrupt service mask to 7 - * - Set sdma arbitration ID to 4 - */ - pquicc->sdma_sdcr = 0x0740; - - - /* Claim the DP memory for our use. - */ - dp_alloc_base = CPM_DATAONLY_BASE; - dp_alloc_top = dp_alloc_base + CPM_DATAONLY_SIZE; - - - /* Set the host page for allocation. - */ - /* host_buffer = host_page_addr; */ - /* host_end = host_page_addr + PAGE_SIZE; */ - - /* pte = find_pte(&init_mm, host_page_addr); */ - /* pte_val(*pte) |= _PAGE_NO_CACHE; */ - /* flush_tlb_page(current->mm->mmap, host_buffer); */ - - /* Tell everyone where the comm processor resides. - */ -/* cpmp = (cpm360_t *)commproc; */ -} - - -/* This is called during init_IRQ. We used to do it above, but this - * was too early since init_IRQ was not yet called. - */ -void -cpm_interrupt_init(void) -{ - /* Initialize the CPM interrupt controller. - * NOTE THAT pquicc had better have been initialized! - * reference: MC68360UM p. 7-377 - */ - pquicc->intr_cicr = - (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | - (CPM_INTERRUPT << 13) | - CICR_HP_MASK | - (CPM_VECTOR_BASE << 5) | - CICR_SPS; - - /* mask all CPM interrupts from reaching the cpu32 core: */ - pquicc->intr_cimr = 0; - - - /* mles - If I understand correctly, the 360 just pops over to the CPM - * specific vector, obviating the necessity to vector through the IRQ - * whose priority the CPM is set to. This needs a closer look, though. - */ - - /* Set our interrupt handler with the core CPU. */ -/* if (request_irq(CPM_INTERRUPT, cpm_interrupt, 0, "cpm", NULL) != 0) */ -/* panic("Could not allocate CPM IRQ!"); */ - - /* Install our own error handler. - */ - /* I think we want to hold off on this one for the moment - mles */ - /* cpm_install_handler(CPMVEC_ERROR, cpm_error_interrupt, NULL); */ - - /* master CPM interrupt enable */ - /* pquicc->intr_cicr |= CICR_IEN; */ /* no such animal for 360 */ -} - - - -/* CPM interrupt controller interrupt. -*/ -static void -cpm_interrupt(int irq, void * dev, struct pt_regs * regs) -{ - /* uint vec; */ - - /* mles: Note that this stuff is currently being performed by - * M68360_do_irq(int vec, struct pt_regs *fp), in ../ints.c */ - - /* figure out the vector */ - /* call that vector's handler */ - /* clear the irq's bit in the service register */ - -#if 0 /* old 860 stuff: */ - /* Get the vector by setting the ACK bit and then reading - * the register. - */ - ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr = 1; - vec = ((volatile immap_t *)IMAP_ADDR)->im_cpic.cpic_civr; - vec >>= 11; - - - if (cpm_vecs[vec].handler != 0) - (*cpm_vecs[vec].handler)(cpm_vecs[vec].dev_id); - else - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr &= ~(1 << vec); - - /* After servicing the interrupt, we have to remove the status - * indicator. - */ - ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cisr |= (1 << vec); -#endif - -} - -/* The CPM can generate the error interrupt when there is a race condition - * between generating and masking interrupts. All we have to do is ACK it - * and return. This is a no-op function so we don't need any special - * tests in the interrupt handler. - */ -static void -cpm_error_interrupt(void *dev) -{ -} - -/* Install a CPM interrupt handler. -*/ -void -cpm_install_handler(int vec, irq_handler_t handler, void *dev_id) -{ - - request_irq(vec, handler, 0, "timer", dev_id); - -/* if (cpm_vecs[vec].handler != 0) */ -/* printk(KERN_INFO "CPM interrupt %x replacing %x\n", */ -/* (uint)handler, (uint)cpm_vecs[vec].handler); */ -/* cpm_vecs[vec].handler = handler; */ -/* cpm_vecs[vec].dev_id = dev_id; */ - - /* ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr |= (1 << vec); */ -/* pquicc->intr_cimr |= (1 << vec); */ - -} - -/* Free a CPM interrupt handler. -*/ -void -cpm_free_handler(int vec) -{ - cpm_vecs[vec].handler = NULL; - cpm_vecs[vec].dev_id = NULL; - /* ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr &= ~(1 << vec); */ - pquicc->intr_cimr &= ~(1 << vec); -} - - - - -/* Allocate some memory from the dual ported ram. We may want to - * enforce alignment restrictions, but right now everyone is a good - * citizen. - */ -uint -m360_cpm_dpalloc(uint size) -{ - uint retloc; - - if ((dp_alloc_base + size) >= dp_alloc_top) - return(CPM_DP_NOSPACE); - - retloc = dp_alloc_base; - dp_alloc_base += size; - - return(retloc); -} - - -#if 0 /* mleslie - for now these are simply kmalloc'd */ -/* We also own one page of host buffer space for the allocation of - * UART "fifos" and the like. - */ -uint -m360_cpm_hostalloc(uint size) -{ - uint retloc; - - if ((host_buffer + size) >= host_end) - return(0); - - retloc = host_buffer; - host_buffer += size; - - return(retloc); -} -#endif - - -/* Set a baud rate generator. This needs lots of work. There are - * four BRGs, any of which can be wired to any channel. - * The internal baud rate clock is the system clock divided by 16. - * This assumes the baudrate is 16x oversampled by the uart. - */ -/* #define BRG_INT_CLK (((bd_t *)__res)->bi_intfreq * 1000000) */ -#define BRG_INT_CLK system_clock -#define BRG_UART_CLK (BRG_INT_CLK/16) - -void -m360_cpm_setbrg(uint brg, uint rate) -{ - volatile uint *bp; - - /* This is good enough to get SMCs running..... - */ - /* bp = (uint *)&cpmp->cp_brgc1; */ - bp = (volatile uint *)(&pquicc->brgc[0].l); - bp += brg; - *bp = ((BRG_UART_CLK / rate - 1) << 1) | CPM_BRG_EN; -} - - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 4 - * End: - */ diff --git a/arch/m68k/68360/config.c b/arch/m68k/68360/config.c deleted file mode 100644 index b65fe4e..0000000 --- a/arch/m68k/68360/config.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * config.c - non-mmu 68360 platform initialization code - * - * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> - * Copyright (C) 1993 Hamish Macdonald - * Copyright (C) 1999 D. Jeff Dionne <jeff@uclinux.org> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -#include <stdarg.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/irq.h> - -#include <asm/setup.h> -#include <asm/pgtable.h> -#include <asm/machdep.h> -#include <asm/m68360.h> - -#ifdef CONFIG_UCQUICC -#include <asm/bootstd.h> -#endif - -extern void m360_cpm_reset(void); - -// Mask to select if the PLL prescaler is enabled. -#define MCU_PREEN ((unsigned short)(0x0001 << 13)) - -#if defined(CONFIG_UCQUICC) -#define OSCILLATOR (unsigned long int)33000000 -#endif - -static irq_handler_t timer_interrupt; -unsigned long int system_clock; - -extern QUICC *pquicc; - -/* TODO DON"T Hard Code this */ -/* calculate properly using the right PLL and prescaller */ -// unsigned int system_clock = 33000000l; -extern unsigned long int system_clock; //In kernel setup.c - - -static irqreturn_t hw_tick(int irq, void *dummy) -{ - /* Reset Timer1 */ - /* TSTAT &= 0; */ - - pquicc->timer_ter1 = 0x0002; /* clear timer event */ - - return timer_interrupt(irq, dummy); -} - -static struct irqaction m68360_timer_irq = { - .name = "timer", - .flags = IRQF_TIMER, - .handler = hw_tick, -}; - -void hw_timer_init(irq_handler_t handler) -{ - unsigned char prescaler; - unsigned short tgcr_save; - -#if 0 - /* Restart mode, Enable int, 32KHz, Enable timer */ - TCTL = TCTL_OM | TCTL_IRQEN | TCTL_CLKSOURCE_32KHZ | TCTL_TEN; - /* Set prescaler (Divide 32KHz by 32)*/ - TPRER = 31; - /* Set compare register 32Khz / 32 / 10 = 100 */ - TCMP = 10; - - request_irq(IRQ_MACHSPEC | 1, timer_routine, 0, "timer", NULL); -#endif - - /* General purpose quicc timers: MC68360UM p7-20 */ - - /* Set up timer 1 (in [1..4]) to do 100Hz */ - tgcr_save = pquicc->timer_tgcr & 0xfff0; - pquicc->timer_tgcr = tgcr_save; /* stop and reset timer 1 */ - /* pquicc->timer_tgcr |= 0x4444; */ /* halt timers when FREEZE (ie bdm freeze) */ - - prescaler = 8; - pquicc->timer_tmr1 = 0x001a | /* or=1, frr=1, iclk=01b */ - (unsigned short)((prescaler - 1) << 8); - - pquicc->timer_tcn1 = 0x0000; /* initial count */ - /* calculate interval for 100Hz based on the _system_clock: */ - pquicc->timer_trr1 = (system_clock/ prescaler) / HZ; /* reference count */ - - pquicc->timer_ter1 = 0x0003; /* clear timer events */ - - timer_interrupt = handler; - - /* enable timer 1 interrupt in CIMR */ - setup_irq(CPMVEC_TIMER1, &m68360_timer_irq); - - /* Start timer 1: */ - tgcr_save = (pquicc->timer_tgcr & 0xfff0) | 0x0001; - pquicc->timer_tgcr = tgcr_save; -} - -void BSP_reset (void) -{ - local_irq_disable(); - asm volatile ( - "moveal #_start, %a0;\n" - "moveb #0, 0xFFFFF300;\n" - "moveal 0(%a0), %sp;\n" - "moveal 4(%a0), %a0;\n" - "jmp (%a0);\n" - ); -} - -unsigned char *scc1_hwaddr; -static int errno; - -#if defined (CONFIG_UCQUICC) -_bsc0(char *, getserialnum) -_bsc1(unsigned char *, gethwaddr, int, a) -_bsc1(char *, getbenv, char *, a) -#endif - - -void __init config_BSP(char *command, int len) -{ - unsigned char *p; - - m360_cpm_reset(); - - /* Calculate the real system clock value. */ - { - unsigned int local_pllcr = (unsigned int)(pquicc->sim_pllcr); - if( local_pllcr & MCU_PREEN ) // If the prescaler is dividing by 128 - { - int mf = (int)(pquicc->sim_pllcr & 0x0fff); - system_clock = (OSCILLATOR / 128) * (mf + 1); - } - else - { - int mf = (int)(pquicc->sim_pllcr & 0x0fff); - system_clock = (OSCILLATOR) * (mf + 1); - } - } - - printk(KERN_INFO "\n68360 QUICC support (C) 2000 Lineo Inc.\n"); - -#if defined(CONFIG_UCQUICC) && 0 - printk(KERN_INFO "uCquicc serial string [%s]\n",getserialnum()); - p = scc1_hwaddr = gethwaddr(0); - printk(KERN_INFO "uCquicc hwaddr %pM\n", p); - - p = getbenv("APPEND"); - if (p) - strcpy(p,command); - else - command[0] = 0; -#else - scc1_hwaddr = "\00\01\02\03\04\05"; -#endif - - mach_reset = BSP_reset; -} diff --git a/arch/m68k/68360/entry.S b/arch/m68k/68360/entry.S deleted file mode 100644 index 22eb302..0000000 --- a/arch/m68k/68360/entry.S +++ /dev/null @@ -1,164 +0,0 @@ -/* - * entry.S - non-mmu 68360 interrupt and exceptions entry points - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2001 SED Systems, a Division of Calian Ltd. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file README.legal in the main directory of this archive - * for more details. - * - * Linux/m68k support by Hamish Macdonald - * M68360 Port by SED Systems, and Lineo. - */ - -#include <linux/linkage.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> -#include <asm/errno.h> -#include <asm/setup.h> -#include <asm/segment.h> -#include <asm/traps.h> -#include <asm/asm-offsets.h> -#include <asm/entry.h> - -.text - -.globl system_call -.globl resume -.globl ret_from_exception -.globl ret_from_signal -.globl sys_call_table -.globl bad_interrupt -.globl inthandler - -badsys: - movel #-ENOSYS,%sp@(PT_OFF_D0) - jra ret_from_exception - -do_trace: - movel #-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/ - subql #4,%sp - SAVE_SWITCH_STACK - jbsr syscall_trace_enter - RESTORE_SWITCH_STACK - addql #4,%sp - movel %sp@(PT_OFF_ORIG_D0),%d1 - movel #-ENOSYS,%d0 - cmpl #NR_syscalls,%d1 - jcc 1f - lsl #2,%d1 - lea sys_call_table, %a0 - jbsr %a0@(%d1) - -1: movel %d0,%sp@(PT_OFF_D0) /* save the return value */ - subql #4,%sp /* dummy return address */ - SAVE_SWITCH_STACK - jbsr syscall_trace_leave - -ret_from_signal: - RESTORE_SWITCH_STACK - addql #4,%sp - jra ret_from_exception - -ENTRY(system_call) - SAVE_ALL_SYS - - /* save top of frame*/ - pea %sp@ - jbsr set_esp0 - addql #4,%sp - - movel %sp@(PT_OFF_ORIG_D0),%d0 - - movel %sp,%d1 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d1 - movel %d1,%a2 - btst #(TIF_SYSCALL_TRACE%8),%a2@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8) - jne do_trace - cmpl #NR_syscalls,%d0 - jcc badsys - lsl #2,%d0 - lea sys_call_table,%a0 - movel %a0@(%d0), %a0 - jbsr %a0@ - movel %d0,%sp@(PT_OFF_D0) /* save the return value*/ - -ret_from_exception: - btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel*/ - jeq Luser_return /* if so, skip resched, signals*/ - -Lkernel_return: - RESTORE_ALL - -Luser_return: - /* only allow interrupts when we are really the last one on the*/ - /* kernel stack, otherwise stack overflow can occur during*/ - /* heavy interrupt load*/ - andw #ALLOWINT,%sr - - movel %sp,%d1 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d1 - movel %d1,%a2 -1: - move %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */ - jne Lwork_to_do - RESTORE_ALL - -Lwork_to_do: - movel %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */ - btst #TIF_NEED_RESCHED,%d1 - jne reschedule - -Lsignal_return: - subql #4,%sp /* dummy return address*/ - SAVE_SWITCH_STACK - pea %sp@(SWITCH_STACK_SIZE) - bsrw do_notify_resume - addql #4,%sp - RESTORE_SWITCH_STACK - addql #4,%sp - jra 1b - -/* - * This is the main interrupt handler, responsible for calling do_IRQ() - */ -inthandler: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and.l #0x3ff, %d0 - lsr.l #0x02, %d0 - - movel %sp,%sp@- - movel %d0,%sp@- /* put vector # on stack*/ - jbsr do_IRQ /* process the IRQ */ - addql #8,%sp /* pop parameters off stack*/ - jra ret_from_exception - -/* - * Handler for uninitialized and spurious interrupts. - */ -bad_interrupt: - addql #1,irq_err_count - rte - -/* - * Beware - when entering resume, prev (the current task) is - * in a0, next (the new task) is in a1, so don't change these - * registers until their contents are no longer needed. - */ -ENTRY(resume) - movel %a0,%d1 /* save prev thread in d1 */ - movew %sr,%a0@(TASK_THREAD+THREAD_SR) /* save sr */ - SAVE_SWITCH_STACK - movel %sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack */ - movel %usp,%a3 /* save usp */ - movel %a3,%a0@(TASK_THREAD+THREAD_USP) - - movel %a1@(TASK_THREAD+THREAD_USP),%a3 /* restore user stack */ - movel %a3,%usp - movel %a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new thread stack */ - RESTORE_SWITCH_STACK - movew %a1@(TASK_THREAD+THREAD_SR),%sr /* restore thread status reg */ - rts - diff --git a/arch/m68k/68360/head-ram.S b/arch/m68k/68360/head-ram.S deleted file mode 100644 index 62bc56f..0000000 --- a/arch/m68k/68360/head-ram.S +++ /dev/null @@ -1,402 +0,0 @@ -/* - * head-ram.S - startup code for Motorola 68360 - * - * Copyright 2001 (C) SED Systems, a Division of Calian Ltd. - * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S - * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7 - * uClinux Kernel - * Copyright (C) Michael Leslie <mleslie@lineo.com> - * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S - * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>, - * - */ -#define ASSEMBLY - -.global _stext -.global _start - -.global _rambase -.global _ramvec -.global _ramstart -.global _ramend - -.global _quicc_base -.global _periph_base - -#define RAMEND (CONFIG_RAMBASE + CONFIG_RAMSIZE) -#define ROMEND (CONFIG_ROMBASE + CONFIG_ROMSIZE) - -#define REGB 0x1000 -#define PEPAR (_dprbase + REGB + 0x0016) -#define GMR (_dprbase + REGB + 0x0040) -#define OR0 (_dprbase + REGB + 0x0054) -#define BR0 (_dprbase + REGB + 0x0050) -#define OR1 (_dprbase + REGB + 0x0064) -#define BR1 (_dprbase + REGB + 0x0060) -#define OR4 (_dprbase + REGB + 0x0094) -#define BR4 (_dprbase + REGB + 0x0090) -#define OR6 (_dprbase + REGB + 0x00b4) -#define BR6 (_dprbase + REGB + 0x00b0) -#define OR7 (_dprbase + REGB + 0x00c4) -#define BR7 (_dprbase + REGB + 0x00c0) - -#define MCR (_dprbase + REGB + 0x0000) -#define AVR (_dprbase + REGB + 0x0008) - -#define SYPCR (_dprbase + REGB + 0x0022) - -#define PLLCR (_dprbase + REGB + 0x0010) -#define CLKOCR (_dprbase + REGB + 0x000C) -#define CDVCR (_dprbase + REGB + 0x0014) - -#define BKAR (_dprbase + REGB + 0x0030) -#define BKCR (_dprbase + REGB + 0x0034) -#define SWIV (_dprbase + REGB + 0x0023) -#define PICR (_dprbase + REGB + 0x0026) -#define PITR (_dprbase + REGB + 0x002A) - -/* Define for all memory configuration */ -#define MCU_SIM_GMR 0x00000000 -#define SIM_OR_MASK 0x0fffffff - -/* Defines for chip select zero - the flash */ -#define SIM_OR0_MASK 0x20000002 -#define SIM_BR0_MASK 0x00000001 - - -/* Defines for chip select one - the RAM */ -#define SIM_OR1_MASK 0x10000000 -#define SIM_BR1_MASK 0x00000001 - -#define MCU_SIM_MBAR_ADRS 0x0003ff00 -#define MCU_SIM_MBAR_BA_MASK 0xfffff000 -#define MCU_SIM_MBAR_AS_MASK 0x00000001 - -#define MCU_SIM_PEPAR 0x00B4 - -#define MCU_DISABLE_INTRPTS 0x2700 -#define MCU_SIM_AVR 0x00 - -#define MCU_SIM_MCR 0x00005cff - -#define MCU_SIM_CLKOCR 0x00 -#define MCU_SIM_PLLCR 0x8000 -#define MCU_SIM_CDVCR 0x0000 - -#define MCU_SIM_SYPCR 0x0000 -#define MCU_SIM_SWIV 0x00 -#define MCU_SIM_PICR 0x0000 -#define MCU_SIM_PITR 0x0000 - - -#include <asm/m68360_regs.h> - - -/* - * By the time this RAM specific code begins to execute, DPRAM - * and DRAM should already be mapped and accessible. - */ - - .text -_start: -_stext: - nop - ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */ - /* We should not need to setup the boot stack the reset should do it. */ - movea.l #RAMEND, %sp /*set up stack at the end of DRAM:*/ - -set_mbar_register: - moveq.l #0x07, %d1 /* Setup MBAR */ - movec %d1, %dfc - - lea.l MCU_SIM_MBAR_ADRS, %a0 - move.l #_dprbase, %d0 - andi.l #MCU_SIM_MBAR_BA_MASK, %d0 - ori.l #MCU_SIM_MBAR_AS_MASK, %d0 - moves.l %d0, %a0@ - - moveq.l #0x05, %d1 - movec.l %d1, %dfc - - /* Now we can begin to access registers in DPRAM */ - -set_sim_mcr: - /* Set Module Configuration Register */ - move.l #MCU_SIM_MCR, MCR - - /* to do: Determine cause of reset */ - - /* - * configure system clock MC68360 p. 6-40 - * (value +1)*osc/128 = system clock - */ -set_sim_clock: - move.w #MCU_SIM_PLLCR, PLLCR - move.b #MCU_SIM_CLKOCR, CLKOCR - move.w #MCU_SIM_CDVCR, CDVCR - - /* Wait for the PLL to settle */ - move.w #16384, %d0 -pll_settle_wait: - subi.w #1, %d0 - bne pll_settle_wait - - /* Setup the system protection register, and watchdog timer register */ - move.b #MCU_SIM_SWIV, SWIV - move.w #MCU_SIM_PICR, PICR - move.w #MCU_SIM_PITR, PITR - move.w #MCU_SIM_SYPCR, SYPCR - - /* Clear DPRAM - system + parameter */ - movea.l #_dprbase, %a0 - movea.l #_dprbase+0x2000, %a1 - - /* Copy 0 to %a0 until %a0 == %a1 */ -clear_dpram: - movel #0, %a0@+ - cmpal %a0, %a1 - bhi clear_dpram - -configure_memory_controller: - /* Set up Global Memory Register (GMR) */ - move.l #MCU_SIM_GMR, %d0 - move.l %d0, GMR - -configure_chip_select_0: - move.l #RAMEND, %d0 - subi.l #__ramstart, %d0 - subq.l #0x01, %d0 - eori.l #SIM_OR_MASK, %d0 - ori.l #SIM_OR0_MASK, %d0 - move.l %d0, OR0 - - move.l #__ramstart, %d0 - ori.l #SIM_BR0_MASK, %d0 - move.l %d0, BR0 - -configure_chip_select_1: - move.l #ROMEND, %d0 - subi.l #__rom_start, %d0 - subq.l #0x01, %d0 - eori.l #SIM_OR_MASK, %d0 - ori.l #SIM_OR1_MASK, %d0 - move.l %d0, OR1 - - move.l #__rom_start, %d0 - ori.l #SIM_BR1_MASK, %d0 - move.l %d0, BR1 - - move.w #MCU_SIM_PEPAR, PEPAR - - /* point to vector table: */ - move.l #_romvec, %a0 - move.l #_ramvec, %a1 -copy_vectors: - move.l %a0@, %d0 - move.l %d0, %a1@ - move.l %a0@, %a1@ - addq.l #0x04, %a0 - addq.l #0x04, %a1 - cmp.l #_start, %a0 - blt copy_vectors - - move.l #_ramvec, %a1 - movec %a1, %vbr - - - /* Copy data segment from ROM to RAM */ - moveal #_stext, %a0 - moveal #_sdata, %a1 - moveal #_edata, %a2 - - /* Copy %a0 to %a1 until %a1 == %a2 */ -LD1: - move.l %a0@, %d0 - addq.l #0x04, %a0 - move.l %d0, %a1@ - addq.l #0x04, %a1 - cmp.l #_edata, %a1 - blt LD1 - - moveal #__bss_start, %a0 - moveal #__bss_stop, %a1 - - /* Copy 0 to %a0 until %a0 == %a1 */ -L1: - movel #0, %a0@+ - cmpal %a0, %a1 - bhi L1 - -load_quicc: - move.l #_dprbase, _quicc_base - -store_ram_size: - /* Set ram size information */ - move.l #_sdata, _rambase - move.l #__bss_stop, _ramstart - move.l #RAMEND, %d0 - sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/ - move.l %d0, _ramend /* Different from RAMEND.*/ - - pea 0 - pea env - pea %sp@(4) - pea 0 - - lea init_thread_union, %a2 - lea 0x2000(%a2), %sp - -lp: - jsr start_kernel - -_exit: - jmp _exit - - - .data - .align 4 -env: - .long 0 -_quicc_base: - .long 0 -_periph_base: - .long 0 -_ramvec: - .long 0 -_rambase: - .long 0 -_ramstart: - .long 0 -_ramend: - .long 0 -_dprbase: - .long 0xffffe000 - - .text - - /* - * These are the exception vectors at boot up, they are copied into RAM - * and then overwritten as needed. - */ - -.section ".data..initvect","awx" - .long RAMEND /* Reset: Initial Stack Pointer - 0. */ - .long _start /* Reset: Initial Program Counter - 1. */ - .long buserr /* Bus Error - 2. */ - .long trap /* Address Error - 3. */ - .long trap /* Illegal Instruction - 4. */ - .long trap /* Divide by zero - 5. */ - .long trap /* CHK, CHK2 Instructions - 6. */ - .long trap /* TRAPcc, TRAPV Instructions - 7. */ - .long trap /* Privilege Violation - 8. */ - .long trap /* Trace - 9. */ - .long trap /* Line 1010 Emulator - 10. */ - .long trap /* Line 1111 Emualtor - 11. */ - .long trap /* Harware Breakpoint - 12. */ - .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */ - .long trap /* Format Error - 14. */ - .long trap /* Uninitialized Interrupt - 15. */ - .long trap /* (Unassigned, Reserver) - 16. */ - .long trap /* (Unassigned, Reserver) - 17. */ - .long trap /* (Unassigned, Reserver) - 18. */ - .long trap /* (Unassigned, Reserver) - 19. */ - .long trap /* (Unassigned, Reserver) - 20. */ - .long trap /* (Unassigned, Reserver) - 21. */ - .long trap /* (Unassigned, Reserver) - 22. */ - .long trap /* (Unassigned, Reserver) - 23. */ - .long trap /* Spurious Interrupt - 24. */ - .long trap /* Level 1 Interrupt Autovector - 25. */ - .long trap /* Level 2 Interrupt Autovector - 26. */ - .long trap /* Level 3 Interrupt Autovector - 27. */ - .long trap /* Level 4 Interrupt Autovector - 28. */ - .long trap /* Level 5 Interrupt Autovector - 29. */ - .long trap /* Level 6 Interrupt Autovector - 30. */ - .long trap /* Level 7 Interrupt Autovector - 31. */ - .long system_call /* Trap Instruction Vectors 0 - 32. */ - .long trap /* Trap Instruction Vectors 1 - 33. */ - .long trap /* Trap Instruction Vectors 2 - 34. */ - .long trap /* Trap Instruction Vectors 3 - 35. */ - .long trap /* Trap Instruction Vectors 4 - 36. */ - .long trap /* Trap Instruction Vectors 5 - 37. */ - .long trap /* Trap Instruction Vectors 6 - 38. */ - .long trap /* Trap Instruction Vectors 7 - 39. */ - .long trap /* Trap Instruction Vectors 8 - 40. */ - .long trap /* Trap Instruction Vectors 9 - 41. */ - .long trap /* Trap Instruction Vectors 10 - 42. */ - .long trap /* Trap Instruction Vectors 11 - 43. */ - .long trap /* Trap Instruction Vectors 12 - 44. */ - .long trap /* Trap Instruction Vectors 13 - 45. */ - .long trap /* Trap Instruction Vectors 14 - 46. */ - .long trap /* Trap Instruction Vectors 15 - 47. */ - .long 0 /* (Reserved for Coprocessor) - 48. */ - .long 0 /* (Reserved for Coprocessor) - 49. */ - .long 0 /* (Reserved for Coprocessor) - 50. */ - .long 0 /* (Reserved for Coprocessor) - 51. */ - .long 0 /* (Reserved for Coprocessor) - 52. */ - .long 0 /* (Reserved for Coprocessor) - 53. */ - .long 0 /* (Reserved for Coprocessor) - 54. */ - .long 0 /* (Reserved for Coprocessor) - 55. */ - .long 0 /* (Reserved for Coprocessor) - 56. */ - .long 0 /* (Reserved for Coprocessor) - 57. */ - .long 0 /* (Reserved for Coprocessor) - 58. */ - .long 0 /* (Unassigned, Reserved) - 59. */ - .long 0 /* (Unassigned, Reserved) - 60. */ - .long 0 /* (Unassigned, Reserved) - 61. */ - .long 0 /* (Unassigned, Reserved) - 62. */ - .long 0 /* (Unassigned, Reserved) - 63. */ - /* The assignment of these vectors to the CPM is */ - /* dependent on the configuration of the CPM vba */ - /* fields. */ - .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */ - .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */ - .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */ - .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */ - .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */ - .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */ - .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */ - .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */ - .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */ - .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */ - .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */ - .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */ - .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */ - .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */ - .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */ - .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */ - .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */ - .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */ - .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */ - .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */ - .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */ - .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */ - .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */ - .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */ - .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */ - .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */ - .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */ - .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */ - .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */ - .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */ - .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */ - .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */ - /* I don't think anything uses the vectors after here. */ - .long 0 /* (User-Defined Vectors 34) - 96. */ - .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */ - .long 0,0,0 /* (User-Defined Vectors 190 - 192). */ -.text -ignore: rte diff --git a/arch/m68k/68360/head-rom.S b/arch/m68k/68360/head-rom.S deleted file mode 100644 index b3a7e40..0000000 --- a/arch/m68k/68360/head-rom.S +++ /dev/null @@ -1,413 +0,0 @@ -/* - * head-rom.S - startup code for Motorola 68360 - * - * Copyright (C) SED Systems, a Division of Calian Ltd. - * Based on: arch/m68knommu/platform/68328/pilot/crt0_rom.S - * Based on: arch/m68knommu/platform/68360/uCquicc/crt0_rom.S, 2.0.38.1.pre7 - * uClinux Kernel - * Copyright (C) Michael Leslie <mleslie@lineo.com> - * Based on: arch/m68knommu/platform/68EZ328/ucsimm/crt0_rom.S - * Copyright (C) 1998 D. Jeff Dionne <jeff@uclinux.org>, - * - */ - -.global _stext -.global __bss_start -.global _start - -.global _rambase -.global _ramvec -.global _ramstart -.global _ramend - -.global _quicc_base -.global _periph_base - -#define RAMEND (CONFIG_RAMBASE + CONFIG_RAMSIZE) - -#define REGB 0x1000 -#define PEPAR (_dprbase + REGB + 0x0016) -#define GMR (_dprbase + REGB + 0x0040) -#define OR0 (_dprbase + REGB + 0x0054) -#define BR0 (_dprbase + REGB + 0x0050) - -#define OR1 (_dprbase + REGB + 0x0064) -#define BR1 (_dprbase + REGB + 0x0060) - -#define OR2 (_dprbase + REGB + 0x0074) -#define BR2 (_dprbase + REGB + 0x0070) - -#define OR3 (_dprbase + REGB + 0x0084) -#define BR3 (_dprbase + REGB + 0x0080) - -#define OR4 (_dprbase + REGB + 0x0094) -#define BR4 (_dprbase + REGB + 0x0090) - -#define OR5 (_dprbase + REGB + 0x00A4) -#define BR5 (_dprbase + REGB + 0x00A0) - -#define OR6 (_dprbase + REGB + 0x00b4) -#define BR6 (_dprbase + REGB + 0x00b0) - -#define OR7 (_dprbase + REGB + 0x00c4) -#define BR7 (_dprbase + REGB + 0x00c0) - -#define MCR (_dprbase + REGB + 0x0000) -#define AVR (_dprbase + REGB + 0x0008) - -#define SYPCR (_dprbase + REGB + 0x0022) - -#define PLLCR (_dprbase + REGB + 0x0010) -#define CLKOCR (_dprbase + REGB + 0x000C) -#define CDVCR (_dprbase + REGB + 0x0014) - -#define BKAR (_dprbase + REGB + 0x0030) -#define BKCR (_dprbase + REGB + 0x0034) -#define SWIV (_dprbase + REGB + 0x0023) -#define PICR (_dprbase + REGB + 0x0026) -#define PITR (_dprbase + REGB + 0x002A) - -/* Define for all memory configuration */ -#define MCU_SIM_GMR 0x00000000 -#define SIM_OR_MASK 0x0fffffff - -/* Defines for chip select zero - the flash */ -#define SIM_OR0_MASK 0x20000000 -#define SIM_BR0_MASK 0x00000001 - -/* Defines for chip select one - the RAM */ -#define SIM_OR1_MASK 0x10000000 -#define SIM_BR1_MASK 0x00000001 - -#define MCU_SIM_MBAR_ADRS 0x0003ff00 -#define MCU_SIM_MBAR_BA_MASK 0xfffff000 -#define MCU_SIM_MBAR_AS_MASK 0x00000001 - -#define MCU_SIM_PEPAR 0x00B4 - -#define MCU_DISABLE_INTRPTS 0x2700 -#define MCU_SIM_AVR 0x00 - -#define MCU_SIM_MCR 0x00005cff - -#define MCU_SIM_CLKOCR 0x00 -#define MCU_SIM_PLLCR 0x8000 -#define MCU_SIM_CDVCR 0x0000 - -#define MCU_SIM_SYPCR 0x0000 -#define MCU_SIM_SWIV 0x00 -#define MCU_SIM_PICR 0x0000 -#define MCU_SIM_PITR 0x0000 - - -#include <asm/m68360_regs.h> - - -/* - * By the time this RAM specific code begins to execute, DPRAM - * and DRAM should already be mapped and accessible. - */ - - .text -_start: -_stext: - nop - ori.w #MCU_DISABLE_INTRPTS, %sr /* disable interrupts: */ - /* We should not need to setup the boot stack the reset should do it. */ - movea.l #RAMEND, %sp /* set up stack at the end of DRAM:*/ - - -set_mbar_register: - moveq.l #0x07, %d1 /* Setup MBAR */ - movec %d1, %dfc - - lea.l MCU_SIM_MBAR_ADRS, %a0 - move.l #_dprbase, %d0 - andi.l #MCU_SIM_MBAR_BA_MASK, %d0 - ori.l #MCU_SIM_MBAR_AS_MASK, %d0 - moves.l %d0, %a0@ - - moveq.l #0x05, %d1 - movec.l %d1, %dfc - - /* Now we can begin to access registers in DPRAM */ - -set_sim_mcr: - /* Set Module Configuration Register */ - move.l #MCU_SIM_MCR, MCR - - /* to do: Determine cause of reset */ - - /* - * configure system clock MC68360 p. 6-40 - * (value +1)*osc/128 = system clock - * or - * (value + 1)*osc = system clock - * You do not need to divide the oscillator by 128 unless you want to. - */ -set_sim_clock: - move.w #MCU_SIM_PLLCR, PLLCR - move.b #MCU_SIM_CLKOCR, CLKOCR - move.w #MCU_SIM_CDVCR, CDVCR - - /* Wait for the PLL to settle */ - move.w #16384, %d0 -pll_settle_wait: - subi.w #1, %d0 - bne pll_settle_wait - - /* Setup the system protection register, and watchdog timer register */ - move.b #MCU_SIM_SWIV, SWIV - move.w #MCU_SIM_PICR, PICR - move.w #MCU_SIM_PITR, PITR - move.w #MCU_SIM_SYPCR, SYPCR - - /* Clear DPRAM - system + parameter */ - movea.l #_dprbase, %a0 - movea.l #_dprbase+0x2000, %a1 - - /* Copy 0 to %a0 until %a0 == %a1 */ -clear_dpram: - movel #0, %a0@+ - cmpal %a0, %a1 - bhi clear_dpram - -configure_memory_controller: - /* Set up Global Memory Register (GMR) */ - move.l #MCU_SIM_GMR, %d0 - move.l %d0, GMR - -configure_chip_select_0: - move.l #0x00400000, %d0 - subq.l #0x01, %d0 - eori.l #SIM_OR_MASK, %d0 - ori.l #SIM_OR0_MASK, %d0 - move.l %d0, OR0 - - move.l #__rom_start, %d0 - ori.l #SIM_BR0_MASK, %d0 - move.l %d0, BR0 - - move.l #0x0, BR1 - move.l #0x0, BR2 - move.l #0x0, BR3 - move.l #0x0, BR4 - move.l #0x0, BR5 - move.l #0x0, BR6 - move.l #0x0, BR7 - - move.w #MCU_SIM_PEPAR, PEPAR - - /* point to vector table: */ - move.l #_romvec, %a0 - move.l #_ramvec, %a1 -copy_vectors: - move.l %a0@, %d0 - move.l %d0, %a1@ - move.l %a0@, %a1@ - addq.l #0x04, %a0 - addq.l #0x04, %a1 - cmp.l #_start, %a0 - blt copy_vectors - - move.l #_ramvec, %a1 - movec %a1, %vbr - - - /* Copy data segment from ROM to RAM */ - moveal #_etext, %a0 - moveal #_sdata, %a1 - moveal #_edata, %a2 - - /* Copy %a0 to %a1 until %a1 == %a2 */ -LD1: - move.l %a0@, %d0 - addq.l #0x04, %a0 - move.l %d0, %a1@ - addq.l #0x04, %a1 - cmp.l #_edata, %a1 - blt LD1 - - moveal #__bss_start, %a0 - moveal #__bss_stop, %a1 - - /* Copy 0 to %a0 until %a0 == %a1 */ -L1: - movel #0, %a0@+ - cmpal %a0, %a1 - bhi L1 - -load_quicc: - move.l #_dprbase, _quicc_base - -store_ram_size: - /* Set ram size information */ - move.l #_sdata, _rambase - move.l #__bss_stop, _ramstart - move.l #RAMEND, %d0 - sub.l #0x1000, %d0 /* Reserve 4K for stack space.*/ - move.l %d0, _ramend /* Different from RAMEND.*/ - - pea 0 - pea env - pea %sp@(4) - pea 0 - - lea init_thread_union, %a2 - lea 0x2000(%a2), %sp - -lp: - jsr start_kernel - -_exit: - jmp _exit - - - .data - .align 4 -env: - .long 0 -_quicc_base: - .long 0 -_periph_base: - .long 0 -_ramvec: - .long 0 -_rambase: - .long 0 -_ramstart: - .long 0 -_ramend: - .long 0 -_dprbase: - .long 0xffffe000 - - - .text - - /* - * These are the exception vectors at boot up, they are copied into RAM - * and then overwritten as needed. - */ - -.section ".data..initvect","awx" - .long RAMEND /* Reset: Initial Stack Pointer - 0. */ - .long _start /* Reset: Initial Program Counter - 1. */ - .long buserr /* Bus Error - 2. */ - .long trap /* Address Error - 3. */ - .long trap /* Illegal Instruction - 4. */ - .long trap /* Divide by zero - 5. */ - .long trap /* CHK, CHK2 Instructions - 6. */ - .long trap /* TRAPcc, TRAPV Instructions - 7. */ - .long trap /* Privilege Violation - 8. */ - .long trap /* Trace - 9. */ - .long trap /* Line 1010 Emulator - 10. */ - .long trap /* Line 1111 Emualtor - 11. */ - .long trap /* Harware Breakpoint - 12. */ - .long trap /* (Reserved for Coprocessor Protocol Violation)- 13. */ - .long trap /* Format Error - 14. */ - .long trap /* Uninitialized Interrupt - 15. */ - .long trap /* (Unassigned, Reserver) - 16. */ - .long trap /* (Unassigned, Reserver) - 17. */ - .long trap /* (Unassigned, Reserver) - 18. */ - .long trap /* (Unassigned, Reserver) - 19. */ - .long trap /* (Unassigned, Reserver) - 20. */ - .long trap /* (Unassigned, Reserver) - 21. */ - .long trap /* (Unassigned, Reserver) - 22. */ - .long trap /* (Unassigned, Reserver) - 23. */ - .long trap /* Spurious Interrupt - 24. */ - .long trap /* Level 1 Interrupt Autovector - 25. */ - .long trap /* Level 2 Interrupt Autovector - 26. */ - .long trap /* Level 3 Interrupt Autovector - 27. */ - .long trap /* Level 4 Interrupt Autovector - 28. */ - .long trap /* Level 5 Interrupt Autovector - 29. */ - .long trap /* Level 6 Interrupt Autovector - 30. */ - .long trap /* Level 7 Interrupt Autovector - 31. */ - .long system_call /* Trap Instruction Vectors 0 - 32. */ - .long trap /* Trap Instruction Vectors 1 - 33. */ - .long trap /* Trap Instruction Vectors 2 - 34. */ - .long trap /* Trap Instruction Vectors 3 - 35. */ - .long trap /* Trap Instruction Vectors 4 - 36. */ - .long trap /* Trap Instruction Vectors 5 - 37. */ - .long trap /* Trap Instruction Vectors 6 - 38. */ - .long trap /* Trap Instruction Vectors 7 - 39. */ - .long trap /* Trap Instruction Vectors 8 - 40. */ - .long trap /* Trap Instruction Vectors 9 - 41. */ - .long trap /* Trap Instruction Vectors 10 - 42. */ - .long trap /* Trap Instruction Vectors 11 - 43. */ - .long trap /* Trap Instruction Vectors 12 - 44. */ - .long trap /* Trap Instruction Vectors 13 - 45. */ - .long trap /* Trap Instruction Vectors 14 - 46. */ - .long trap /* Trap Instruction Vectors 15 - 47. */ - .long 0 /* (Reserved for Coprocessor) - 48. */ - .long 0 /* (Reserved for Coprocessor) - 49. */ - .long 0 /* (Reserved for Coprocessor) - 50. */ - .long 0 /* (Reserved for Coprocessor) - 51. */ - .long 0 /* (Reserved for Coprocessor) - 52. */ - .long 0 /* (Reserved for Coprocessor) - 53. */ - .long 0 /* (Reserved for Coprocessor) - 54. */ - .long 0 /* (Reserved for Coprocessor) - 55. */ - .long 0 /* (Reserved for Coprocessor) - 56. */ - .long 0 /* (Reserved for Coprocessor) - 57. */ - .long 0 /* (Reserved for Coprocessor) - 58. */ - .long 0 /* (Unassigned, Reserved) - 59. */ - .long 0 /* (Unassigned, Reserved) - 60. */ - .long 0 /* (Unassigned, Reserved) - 61. */ - .long 0 /* (Unassigned, Reserved) - 62. */ - .long 0 /* (Unassigned, Reserved) - 63. */ - /* The assignment of these vectors to the CPM is */ - /* dependent on the configuration of the CPM vba */ - /* fields. */ - .long 0 /* (User-Defined Vectors 1) CPM Error - 64. */ - .long 0 /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */ - .long 0 /* (User-Defined Vectors 3) CPM Parallel IO PC10- 66. */ - .long 0 /* (User-Defined Vectors 4) CPM SMC2 / PIP - 67. */ - .long 0 /* (User-Defined Vectors 5) CPM SMC1 - 68. */ - .long 0 /* (User-Defined Vectors 6) CPM SPI - 69. */ - .long 0 /* (User-Defined Vectors 7) CPM Parallel IO PC9 - 70. */ - .long 0 /* (User-Defined Vectors 8) CPM Timer 4 - 71. */ - .long 0 /* (User-Defined Vectors 9) CPM Reserved - 72. */ - .long 0 /* (User-Defined Vectors 10) CPM Parallel IO PC8- 73. */ - .long 0 /* (User-Defined Vectors 11) CPM Parallel IO PC7- 74. */ - .long 0 /* (User-Defined Vectors 12) CPM Parallel IO PC6- 75. */ - .long 0 /* (User-Defined Vectors 13) CPM Timer 3 - 76. */ - .long 0 /* (User-Defined Vectors 14) CPM Reserved - 77. */ - .long 0 /* (User-Defined Vectors 15) CPM Parallel IO PC5- 78. */ - .long 0 /* (User-Defined Vectors 16) CPM Parallel IO PC4- 79. */ - .long 0 /* (User-Defined Vectors 17) CPM Reserved - 80. */ - .long 0 /* (User-Defined Vectors 18) CPM RISC Timer Tbl - 81. */ - .long 0 /* (User-Defined Vectors 19) CPM Timer 2 - 82. */ - .long 0 /* (User-Defined Vectors 21) CPM Reserved - 83. */ - .long 0 /* (User-Defined Vectors 22) CPM IDMA2 - 84. */ - .long 0 /* (User-Defined Vectors 23) CPM IDMA1 - 85. */ - .long 0 /* (User-Defined Vectors 24) CPM SDMA Bus Err - 86. */ - .long 0 /* (User-Defined Vectors 25) CPM Parallel IO PC3- 87. */ - .long 0 /* (User-Defined Vectors 26) CPM Parallel IO PC2- 88. */ - .long 0 /* (User-Defined Vectors 27) CPM Timer 1 - 89. */ - .long 0 /* (User-Defined Vectors 28) CPM Parallel IO PC1- 90. */ - .long 0 /* (User-Defined Vectors 29) CPM SCC 4 - 91. */ - .long 0 /* (User-Defined Vectors 30) CPM SCC 3 - 92. */ - .long 0 /* (User-Defined Vectors 31) CPM SCC 2 - 93. */ - .long 0 /* (User-Defined Vectors 32) CPM SCC 1 - 94. */ - .long 0 /* (User-Defined Vectors 33) CPM Parallel IO PC0- 95. */ - /* I don't think anything uses the vectors after here. */ - .long 0 /* (User-Defined Vectors 34) - 96. */ - .long 0,0,0,0,0 /* (User-Defined Vectors 35 - 39). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 40 - 49). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 50 - 59). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 60 - 69). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 70 - 79). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 80 - 89). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 90 - 99). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 100 - 109). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 110 - 119). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 120 - 129). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 130 - 139). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 140 - 149). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 150 - 159). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 160 - 169). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 170 - 179). */ - .long 0,0,0,0,0,0,0,0,0,0 /* (User-Defined Vectors 180 - 189). */ - .long 0,0,0 /* (User-Defined Vectors 190 - 192). */ -.text -ignore: rte diff --git a/arch/m68k/68360/ints.c b/arch/m68k/68360/ints.c deleted file mode 100644 index 2360fc0..0000000 --- a/arch/m68k/68360/ints.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * ints.c - first level interrupt handlers - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - * - * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> - * Copyright (c) 1996 Roman Zippel - * Copyright (c) 1999 D. Jeff Dionne <jeff@uclinux.org> - */ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <asm/traps.h> -#include <asm/machdep.h> -#include <asm/m68360.h> - -/* from quicc/commproc.c: */ -extern QUICC *pquicc; -extern void cpm_interrupt_init(void); - -#define INTERNAL_IRQS (96) - -/* assembler routines */ -asmlinkage void system_call(void); -asmlinkage void buserr(void); -asmlinkage void trap(void); -asmlinkage void bad_interrupt(void); -asmlinkage void inthandler(void); - -static void intc_irq_unmask(struct irq_data *d) -{ - pquicc->intr_cimr |= (1 << d->irq); -} - -static void intc_irq_mask(struct irq_data *d) -{ - pquicc->intr_cimr &= ~(1 << d->irq); -} - -static void intc_irq_ack(struct irq_data *d) -{ - pquicc->intr_cisr = (1 << d->irq); -} - -static struct irq_chip intc_irq_chip = { - .name = "M68K-INTC", - .irq_mask = intc_irq_mask, - .irq_unmask = intc_irq_unmask, - .irq_ack = intc_irq_ack, -}; - -/* - * This function should be called during kernel startup to initialize - * the vector table. - */ -void __init trap_init(void) -{ - int vba = (CPM_VECTOR_BASE<<4); - - /* set up the vectors */ - _ramvec[2] = buserr; - _ramvec[3] = trap; - _ramvec[4] = trap; - _ramvec[5] = trap; - _ramvec[6] = trap; - _ramvec[7] = trap; - _ramvec[8] = trap; - _ramvec[9] = trap; - _ramvec[10] = trap; - _ramvec[11] = trap; - _ramvec[12] = trap; - _ramvec[13] = trap; - _ramvec[14] = trap; - _ramvec[15] = trap; - - _ramvec[32] = system_call; - _ramvec[33] = trap; - - cpm_interrupt_init(); - - /* set up CICR for vector base address and irq level */ - /* irl = 4, hp = 1f - see MC68360UM p 7-377 */ - pquicc->intr_cicr = 0x00e49f00 | vba; - - /* CPM interrupt vectors: (p 7-376) */ - _ramvec[vba+CPMVEC_ERROR] = bad_interrupt; /* Error */ - _ramvec[vba+CPMVEC_PIO_PC11] = inthandler; /* pio - pc11 */ - _ramvec[vba+CPMVEC_PIO_PC10] = inthandler; /* pio - pc10 */ - _ramvec[vba+CPMVEC_SMC2] = inthandler; /* smc2/pip */ - _ramvec[vba+CPMVEC_SMC1] = inthandler; /* smc1 */ - _ramvec[vba+CPMVEC_SPI] = inthandler; /* spi */ - _ramvec[vba+CPMVEC_PIO_PC9] = inthandler; /* pio - pc9 */ - _ramvec[vba+CPMVEC_TIMER4] = inthandler; /* timer 4 */ - _ramvec[vba+CPMVEC_RESERVED1] = inthandler; /* reserved */ - _ramvec[vba+CPMVEC_PIO_PC8] = inthandler; /* pio - pc8 */ - _ramvec[vba+CPMVEC_PIO_PC7] = inthandler; /* pio - pc7 */ - _ramvec[vba+CPMVEC_PIO_PC6] = inthandler; /* pio - pc6 */ - _ramvec[vba+CPMVEC_TIMER3] = inthandler; /* timer 3 */ - _ramvec[vba+CPMVEC_PIO_PC5] = inthandler; /* pio - pc5 */ - _ramvec[vba+CPMVEC_PIO_PC4] = inthandler; /* pio - pc4 */ - _ramvec[vba+CPMVEC_RESERVED2] = inthandler; /* reserved */ - _ramvec[vba+CPMVEC_RISCTIMER] = inthandler; /* timer table */ - _ramvec[vba+CPMVEC_TIMER2] = inthandler; /* timer 2 */ - _ramvec[vba+CPMVEC_RESERVED3] = inthandler; /* reserved */ - _ramvec[vba+CPMVEC_IDMA2] = inthandler; /* idma 2 */ - _ramvec[vba+CPMVEC_IDMA1] = inthandler; /* idma 1 */ - _ramvec[vba+CPMVEC_SDMA_CB_ERR] = inthandler; /* sdma channel bus error */ - _ramvec[vba+CPMVEC_PIO_PC3] = inthandler; /* pio - pc3 */ - _ramvec[vba+CPMVEC_PIO_PC2] = inthandler; /* pio - pc2 */ - /* _ramvec[vba+CPMVEC_TIMER1] = cpm_isr_timer1; */ /* timer 1 */ - _ramvec[vba+CPMVEC_TIMER1] = inthandler; /* timer 1 */ - _ramvec[vba+CPMVEC_PIO_PC1] = inthandler; /* pio - pc1 */ - _ramvec[vba+CPMVEC_SCC4] = inthandler; /* scc 4 */ - _ramvec[vba+CPMVEC_SCC3] = inthandler; /* scc 3 */ - _ramvec[vba+CPMVEC_SCC2] = inthandler; /* scc 2 */ - _ramvec[vba+CPMVEC_SCC1] = inthandler; /* scc 1 */ - _ramvec[vba+CPMVEC_PIO_PC0] = inthandler; /* pio - pc0 */ - - - /* turn off all CPM interrupts */ - pquicc->intr_cimr = 0x00000000; -} - -void init_IRQ(void) -{ - int i; - - for (i = 0; (i < NR_IRQS); i++) { - irq_set_chip(i, &intc_irq_chip); - irq_set_handler(i, handle_level_irq); - } -} - diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c496d48..0dfcf12 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -114,13 +114,6 @@ config M68VZ328 help Motorola 68VZ328 processor support. -config M68360 - bool "MC68360" - depends on !MMU - select MCPU32 - help - Motorola 68360 processor support. - endif # M68KCLASSIC if COLDFIRE diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug index 64776d7..50a67d0 100644 --- a/arch/m68k/Kconfig.debug +++ b/arch/m68k/Kconfig.debug @@ -12,7 +12,7 @@ config BOOTPARAM_STRING config EARLY_PRINTK bool "Early printk" - depends on !(SUN3 || M68360 || M68000 || COLDFIRE) + depends on !(SUN3 || M68000 || COLDFIRE) help Write kernel log output directly to a serial port. Where implemented, output goes to the framebuffer as well. diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 61dc643..2a5c7ab 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -187,12 +187,6 @@ config MEMORY_RESERVE help Reserve certain memory regions on 68x328 based boards. -config UCQUICC - bool "Lineo uCquicc board support" - depends on M68360 - help - Support for the Lineo uCquicc board. - config ARN5206 bool "Arnewsh 5206 board support" depends on M5206 diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 0b29dcf..f0dd9fc 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -39,7 +39,6 @@ cpuflags-$(CONFIG_M68040) := -m68040 endif cpuflags-$(CONFIG_M68030) := cpuflags-$(CONFIG_M68020) := -cpuflags-$(CONFIG_M68360) := -m68332 cpuflags-$(CONFIG_M68000) := -m68000 cpuflags-$(CONFIG_M5441x) := $(call cc-option,-mcpu=54455,-mcfv4e) cpuflags-$(CONFIG_M54xx) := $(call cc-option,-mcpu=5475,-m5200) @@ -92,7 +91,6 @@ endif # head-y := arch/m68k/kernel/head.o head-$(CONFIG_SUN3) := arch/m68k/kernel/sun3-head.o -head-$(CONFIG_M68360) := arch/m68k/68360/head.o head-$(CONFIG_M68000) := arch/m68k/68000/head.o head-$(CONFIG_COLDFIRE) := arch/m68k/coldfire/head.o @@ -114,7 +112,6 @@ core-$(CONFIG_NATFEAT) += arch/m68k/emu/ core-$(CONFIG_M68040) += arch/m68k/fpsp040/ core-$(CONFIG_M68060) += arch/m68k/ifpsp060/ core-$(CONFIG_M68KFPU_EMU) += arch/m68k/math-emu/ -core-$(CONFIG_M68360) += arch/m68k/68360/ core-$(CONFIG_M68000) += arch/m68k/68000/ core-$(CONFIG_COLDFIRE) += arch/m68k/coldfire/ diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index 71ea4c0..a0fc0c1 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -89,7 +89,7 @@ static struct platform_device mcf_uart = { .dev.platform_data = mcf_uart_platform_data, }; -#ifdef CONFIG_FEC +#if IS_ENABLED(CONFIG_FEC) #ifdef CONFIG_M5441x #define FEC_NAME "enet-fec" @@ -329,7 +329,7 @@ static struct platform_device mcf_qspi = { static struct platform_device *mcf_devices[] __initdata = { &mcf_uart, -#ifdef CONFIG_FEC +#if IS_ENABLED(CONFIG_FEC) &mcf_fec0, #ifdef MCFFEC_BASE1 &mcf_fec1, diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index 37a83e2..8832083 100644 --- a/arch/m68k/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c @@ -178,7 +178,7 @@ static struct gpio_chip mcfgpio_chip = { static int __init mcfgpio_sysinit(void) { - gpiochip_add(&mcfgpio_chip); + gpiochip_add_data(&mcfgpio_chip, NULL); return subsys_system_register(&mcfgpio_subsys, NULL); } diff --git a/arch/m68k/include/asm/MC68328.h b/arch/m68k/include/asm/MC68328.h index 4ebf098..1a8080c 100644 --- a/arch/m68k/include/asm/MC68328.h +++ b/arch/m68k/include/asm/MC68328.h @@ -798,7 +798,7 @@ /********** * - * 0xFFFFF7xx -- Serial Periferial Interface Slave (SPIS) + * 0xFFFFF7xx -- Serial Peripheral Interface Slave (SPIS) * **********/ @@ -824,7 +824,7 @@ /********** * - * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM) + * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM) * **********/ @@ -904,7 +904,7 @@ #define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */ #define UBAUD_PRESCALER_SHIFT 0 -#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */ +#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */ #define UBAUD_DIVIDE_SHIFT 8 #define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */ #define UBAUD_GPIOSRC 0x1000 /* GPIO source */ diff --git a/arch/m68k/include/asm/MC68EZ328.h b/arch/m68k/include/asm/MC68EZ328.h index d1bde58..fedac87 100644 --- a/arch/m68k/include/asm/MC68EZ328.h +++ b/arch/m68k/include/asm/MC68EZ328.h @@ -631,7 +631,7 @@ /********** * - * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM) + * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM) * **********/ @@ -712,7 +712,7 @@ #define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */ #define UBAUD_PRESCALER_SHIFT 0 -#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */ +#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */ #define UBAUD_DIVIDE_SHIFT 8 #define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */ #define UBAUD_UCLKDIR 0x2000 /* UCLK Direction */ @@ -1160,7 +1160,7 @@ typedef volatile struct { #define DRAMMC_COL10 0x0080 /* Col address bit for MD10 PA11/PA0 */ #define DRAMMC_COL9 0x0040 /* Col address bit for MD9 PA10/PA0 */ #define DRAMMC_COL8 0x0020 /* Col address bit for MD8 PA9/PA0 */ -#define DRAMMC_REF_MASK 0x001f /* Reresh Cycle */ +#define DRAMMC_REF_MASK 0x001f /* Refresh Cycle */ #define DRAMMC_REF_SHIFT 0 /* diff --git a/arch/m68k/include/asm/MC68VZ328.h b/arch/m68k/include/asm/MC68VZ328.h index 6bd1bf1..34a51b2 100644 --- a/arch/m68k/include/asm/MC68VZ328.h +++ b/arch/m68k/include/asm/MC68VZ328.h @@ -724,7 +724,7 @@ /********** * - * 0xFFFFF8xx -- Serial Periferial Interface Master (SPIM) + * 0xFFFFF8xx -- Serial Peripheral Interface Master (SPIM) * **********/ @@ -806,7 +806,7 @@ #define UBAUD_PRESCALER_MASK 0x003f /* Actual divisor is 65 - PRESCALER */ #define UBAUD_PRESCALER_SHIFT 0 -#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divizor */ +#define UBAUD_DIVIDE_MASK 0x0700 /* Baud Rate freq. divisor */ #define UBAUD_DIVIDE_SHIFT 8 #define UBAUD_BAUD_SRC 0x0800 /* Baud Rate Source */ #define UBAUD_UCLKDIR 0x2000 /* UCLK Direction */ @@ -1256,7 +1256,7 @@ typedef struct { #define DRAMMC_COL10 0x0080 /* Col address bit for MD10 PA11/PA0 */ #define DRAMMC_COL9 0x0040 /* Col address bit for MD9 PA10/PA0 */ #define DRAMMC_COL8 0x0020 /* Col address bit for MD8 PA9/PA0 */ -#define DRAMMC_REF_MASK 0x001f /* Reresh Cycle */ +#define DRAMMC_REF_MASK 0x001f /* Refresh Cycle */ #define DRAMMC_REF_SHIFT 0 /* diff --git a/arch/m68k/include/asm/commproc.h b/arch/m68k/include/asm/commproc.h deleted file mode 100644 index f41c968..0000000 --- a/arch/m68k/include/asm/commproc.h +++ /dev/null @@ -1,664 +0,0 @@ - -/* - * 68360 Communication Processor Module. - * Copyright (c) 2000 Michael Leslie <mleslie@lineo.com> (mc68360) after: - * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> (mpc8xx) - * - * This file contains structures and information for the communication - * processor channels. Some CPM control and status is available - * through the 68360 internal memory map. See include/asm/360_immap.h for details. - * This file is not a complete map of all of the 360 QUICC's capabilities - * - * On the MBX board, EPPC-Bug loads CPM microcode into the first 512 - * bytes of the DP RAM and relocates the I2C parameter area to the - * IDMA1 space. The remaining DP RAM is available for buffer descriptors - * or other use. - */ -#ifndef __CPM_360__ -#define __CPM_360__ - - -/* CPM Command register masks: */ -#define CPM_CR_RST ((ushort)0x8000) -#define CPM_CR_OPCODE ((ushort)0x0f00) -#define CPM_CR_CHAN ((ushort)0x00f0) -#define CPM_CR_FLG ((ushort)0x0001) - -/* CPM Command set (opcodes): */ -#define CPM_CR_INIT_TRX ((ushort)0x0000) -#define CPM_CR_INIT_RX ((ushort)0x0001) -#define CPM_CR_INIT_TX ((ushort)0x0002) -#define CPM_CR_HUNT_MODE ((ushort)0x0003) -#define CPM_CR_STOP_TX ((ushort)0x0004) -#define CPM_CR_GRSTOP_TX ((ushort)0x0005) -#define CPM_CR_RESTART_TX ((ushort)0x0006) -#define CPM_CR_CLOSE_RXBD ((ushort)0x0007) -#define CPM_CR_SET_GADDR ((ushort)0x0008) -#define CPM_CR_GCI_TIMEOUT ((ushort)0x0009) -#define CPM_CR_GCI_ABORT ((ushort)0x000a) -#define CPM_CR_RESET_BCS ((ushort)0x000a) - -/* CPM Channel numbers. */ -#define CPM_CR_CH_SCC1 ((ushort)0x0000) -#define CPM_CR_CH_SCC2 ((ushort)0x0004) -#define CPM_CR_CH_SPI ((ushort)0x0005) /* SPI / Timers */ -#define CPM_CR_CH_TMR ((ushort)0x0005) -#define CPM_CR_CH_SCC3 ((ushort)0x0008) -#define CPM_CR_CH_SMC1 ((ushort)0x0009) /* SMC1 / IDMA1 */ -#define CPM_CR_CH_IDMA1 ((ushort)0x0009) -#define CPM_CR_CH_SCC4 ((ushort)0x000c) -#define CPM_CR_CH_SMC2 ((ushort)0x000d) /* SMC2 / IDMA2 */ -#define CPM_CR_CH_IDMA2 ((ushort)0x000d) - - -#define mk_cr_cmd(CH, CMD) ((CMD << 8) | (CH << 4)) - -#if 1 /* mleslie: I dinna think we have any such restrictions on - * DP RAM aboard the 360 board - see the MC68360UM p.3-3 */ - -/* The dual ported RAM is multi-functional. Some areas can be (and are - * being) used for microcode. There is an area that can only be used - * as data ram for buffer descriptors, which is all we use right now. - * Currently the first 512 and last 256 bytes are used for microcode. - */ -/* mleslie: The uCquicc board is using no extra microcode in DPRAM */ -#define CPM_DATAONLY_BASE ((uint)0x0000) -#define CPM_DATAONLY_SIZE ((uint)0x0800) -#define CPM_DP_NOSPACE ((uint)0x7fffffff) - -#endif - - -/* Export the base address of the communication processor registers - * and dual port ram. */ -/* extern cpm360_t *cpmp; */ /* Pointer to comm processor */ -extern QUICC *pquicc; -uint m360_cpm_dpalloc(uint size); -/* void *m360_cpm_hostalloc(uint size); */ -void m360_cpm_setbrg(uint brg, uint rate); - -#if 0 /* use QUICC_BD declared in include/asm/m68360_quicc.h */ -/* Buffer descriptors used by many of the CPM protocols. */ -typedef struct cpm_buf_desc { - ushort cbd_sc; /* Status and Control */ - ushort cbd_datlen; /* Data length in buffer */ - uint cbd_bufaddr; /* Buffer address in host memory */ -} cbd_t; -#endif - - -/* rx bd status/control bits */ -#define BD_SC_EMPTY ((ushort)0x8000) /* Receive is empty */ -#define BD_SC_WRAP ((ushort)0x2000) /* Last buffer descriptor in table */ -#define BD_SC_INTRPT ((ushort)0x1000) /* Interrupt on change */ -#define BD_SC_LAST ((ushort)0x0800) /* Last buffer in frame OR control char */ - -#define BD_SC_FIRST ((ushort)0x0400) /* 1st buffer in an HDLC frame */ -#define BD_SC_ADDR ((ushort)0x0400) /* 1st byte is a multidrop address */ - -#define BD_SC_CM ((ushort)0x0200) /* Continuous mode */ -#define BD_SC_ID ((ushort)0x0100) /* Received too many idles */ - -#define BD_SC_AM ((ushort)0x0080) /* Multidrop address match */ -#define BD_SC_DE ((ushort)0x0080) /* DPLL Error (HDLC) */ - -#define BD_SC_BR ((ushort)0x0020) /* Break received */ -#define BD_SC_LG ((ushort)0x0020) /* Frame length violation (HDLC) */ - -#define BD_SC_FR ((ushort)0x0010) /* Framing error */ -#define BD_SC_NO ((ushort)0x0010) /* Nonoctet aligned frame (HDLC) */ - -#define BD_SC_PR ((ushort)0x0008) /* Parity error */ -#define BD_SC_AB ((ushort)0x0008) /* Received abort Sequence (HDLC) */ - -#define BD_SC_OV ((ushort)0x0002) /* Overrun */ -#define BD_SC_CD ((ushort)0x0001) /* Carrier Detect lost */ - -/* tx bd status/control bits (as differ from rx bd) */ -#define BD_SC_READY ((ushort)0x8000) /* Transmit is ready */ -#define BD_SC_TC ((ushort)0x0400) /* Transmit CRC */ -#define BD_SC_P ((ushort)0x0100) /* xmt preamble */ -#define BD_SC_UN ((ushort)0x0002) /* Underrun */ - - - - -/* Parameter RAM offsets. */ - - - -/* In 2.4 ppc, the PROFF_S?C? are used as byte offsets into DPRAM. - * In 2.0, we use a more structured C struct map of DPRAM, and so - * instead, we need only a parameter ram `slot' */ - -#define PRSLOT_SCC1 0 -#define PRSLOT_SCC2 1 -#define PRSLOT_SCC3 2 -#define PRSLOT_SMC1 2 -#define PRSLOT_SCC4 3 -#define PRSLOT_SMC2 3 - - -/* #define PROFF_SCC1 ((uint)0x0000) */ -/* #define PROFF_SCC2 ((uint)0x0100) */ -/* #define PROFF_SCC3 ((uint)0x0200) */ -/* #define PROFF_SMC1 ((uint)0x0280) */ -/* #define PROFF_SCC4 ((uint)0x0300) */ -/* #define PROFF_SMC2 ((uint)0x0380) */ - - -/* Define enough so I can at least use the serial port as a UART. - * The MBX uses SMC1 as the host serial port. - */ -typedef struct smc_uart { - ushort smc_rbase; /* Rx Buffer descriptor base address */ - ushort smc_tbase; /* Tx Buffer descriptor base address */ - u_char smc_rfcr; /* Rx function code */ - u_char smc_tfcr; /* Tx function code */ - ushort smc_mrblr; /* Max receive buffer length */ - uint smc_rstate; /* Internal */ - uint smc_idp; /* Internal */ - ushort smc_rbptr; /* Internal */ - ushort smc_ibc; /* Internal */ - uint smc_rxtmp; /* Internal */ - uint smc_tstate; /* Internal */ - uint smc_tdp; /* Internal */ - ushort smc_tbptr; /* Internal */ - ushort smc_tbc; /* Internal */ - uint smc_txtmp; /* Internal */ - ushort smc_maxidl; /* Maximum idle characters */ - ushort smc_tmpidl; /* Temporary idle counter */ - ushort smc_brklen; /* Last received break length */ - ushort smc_brkec; /* rcv'd break condition counter */ - ushort smc_brkcr; /* xmt break count register */ - ushort smc_rmask; /* Temporary bit mask */ -} smc_uart_t; - -/* Function code bits. -*/ -#define SMC_EB ((u_char)0x10) /* Set big endian byte order */ - -/* SMC uart mode register. -*/ -#define SMCMR_REN ((ushort)0x0001) -#define SMCMR_TEN ((ushort)0x0002) -#define SMCMR_DM ((ushort)0x000c) -#define SMCMR_SM_GCI ((ushort)0x0000) -#define SMCMR_SM_UART ((ushort)0x0020) -#define SMCMR_SM_TRANS ((ushort)0x0030) -#define SMCMR_SM_MASK ((ushort)0x0030) -#define SMCMR_PM_EVEN ((ushort)0x0100) /* Even parity, else odd */ -#define SMCMR_REVD SMCMR_PM_EVEN -#define SMCMR_PEN ((ushort)0x0200) /* Parity enable */ -#define SMCMR_BS SMCMR_PEN -#define SMCMR_SL ((ushort)0x0400) /* Two stops, else one */ -#define SMCR_CLEN_MASK ((ushort)0x7800) /* Character length */ -#define smcr_mk_clen(C) (((C) << 11) & SMCR_CLEN_MASK) - -/* SMC2 as Centronics parallel printer. It is half duplex, in that - * it can only receive or transmit. The parameter ram values for - * each direction are either unique or properly overlap, so we can - * include them in one structure. - */ -typedef struct smc_centronics { - ushort scent_rbase; - ushort scent_tbase; - u_char scent_cfcr; - u_char scent_smask; - ushort scent_mrblr; - uint scent_rstate; - uint scent_r_ptr; - ushort scent_rbptr; - ushort scent_r_cnt; - uint scent_rtemp; - uint scent_tstate; - uint scent_t_ptr; - ushort scent_tbptr; - ushort scent_t_cnt; - uint scent_ttemp; - ushort scent_max_sl; - ushort scent_sl_cnt; - ushort scent_character1; - ushort scent_character2; - ushort scent_character3; - ushort scent_character4; - ushort scent_character5; - ushort scent_character6; - ushort scent_character7; - ushort scent_character8; - ushort scent_rccm; - ushort scent_rccr; -} smc_cent_t; - -/* Centronics Status Mask Register. -*/ -#define SMC_CENT_F ((u_char)0x08) -#define SMC_CENT_PE ((u_char)0x04) -#define SMC_CENT_S ((u_char)0x02) - -/* SMC Event and Mask register. -*/ -#define SMCM_BRKE ((unsigned char)0x40) /* When in UART Mode */ -#define SMCM_BRK ((unsigned char)0x10) /* When in UART Mode */ -#define SMCM_TXE ((unsigned char)0x10) /* When in Transparent Mode */ -#define SMCM_BSY ((unsigned char)0x04) -#define SMCM_TX ((unsigned char)0x02) -#define SMCM_RX ((unsigned char)0x01) - -/* Baud rate generators. -*/ -#define CPM_BRG_RST ((uint)0x00020000) -#define CPM_BRG_EN ((uint)0x00010000) -#define CPM_BRG_EXTC_INT ((uint)0x00000000) -#define CPM_BRG_EXTC_CLK2 ((uint)0x00004000) -#define CPM_BRG_EXTC_CLK6 ((uint)0x00008000) -#define CPM_BRG_ATB ((uint)0x00002000) -#define CPM_BRG_CD_MASK ((uint)0x00001ffe) -#define CPM_BRG_DIV16 ((uint)0x00000001) - -/* SCCs. -*/ -#define SCC_GSMRH_IRP ((uint)0x00040000) -#define SCC_GSMRH_GDE ((uint)0x00010000) -#define SCC_GSMRH_TCRC_CCITT ((uint)0x00008000) -#define SCC_GSMRH_TCRC_BISYNC ((uint)0x00004000) -#define SCC_GSMRH_TCRC_HDLC ((uint)0x00000000) -#define SCC_GSMRH_REVD ((uint)0x00002000) -#define SCC_GSMRH_TRX ((uint)0x00001000) -#define SCC_GSMRH_TTX ((uint)0x00000800) -#define SCC_GSMRH_CDP ((uint)0x00000400) -#define SCC_GSMRH_CTSP ((uint)0x00000200) -#define SCC_GSMRH_CDS ((uint)0x00000100) -#define SCC_GSMRH_CTSS ((uint)0x00000080) -#define SCC_GSMRH_TFL ((uint)0x00000040) -#define SCC_GSMRH_RFW ((uint)0x00000020) -#define SCC_GSMRH_TXSY ((uint)0x00000010) -#define SCC_GSMRH_SYNL16 ((uint)0x0000000c) -#define SCC_GSMRH_SYNL8 ((uint)0x00000008) -#define SCC_GSMRH_SYNL4 ((uint)0x00000004) -#define SCC_GSMRH_RTSM ((uint)0x00000002) -#define SCC_GSMRH_RSYN ((uint)0x00000001) - -#define SCC_GSMRL_SIR ((uint)0x80000000) /* SCC2 only */ -#define SCC_GSMRL_EDGE_NONE ((uint)0x60000000) -#define SCC_GSMRL_EDGE_NEG ((uint)0x40000000) -#define SCC_GSMRL_EDGE_POS ((uint)0x20000000) -#define SCC_GSMRL_EDGE_BOTH ((uint)0x00000000) -#define SCC_GSMRL_TCI ((uint)0x10000000) -#define SCC_GSMRL_TSNC_3 ((uint)0x0c000000) -#define SCC_GSMRL_TSNC_4 ((uint)0x08000000) -#define SCC_GSMRL_TSNC_14 ((uint)0x04000000) -#define SCC_GSMRL_TSNC_INF ((uint)0x00000000) -#define SCC_GSMRL_RINV ((uint)0x02000000) -#define SCC_GSMRL_TINV ((uint)0x01000000) -#define SCC_GSMRL_TPL_128 ((uint)0x00c00000) -#define SCC_GSMRL_TPL_64 ((uint)0x00a00000) -#define SCC_GSMRL_TPL_48 ((uint)0x00800000) -#define SCC_GSMRL_TPL_32 ((uint)0x00600000) -#define SCC_GSMRL_TPL_16 ((uint)0x00400000) -#define SCC_GSMRL_TPL_8 ((uint)0x00200000) -#define SCC_GSMRL_TPL_NONE ((uint)0x00000000) -#define SCC_GSMRL_TPP_ALL1 ((uint)0x00180000) -#define SCC_GSMRL_TPP_01 ((uint)0x00100000) -#define SCC_GSMRL_TPP_10 ((uint)0x00080000) -#define SCC_GSMRL_TPP_ZEROS ((uint)0x00000000) -#define SCC_GSMRL_TEND ((uint)0x00040000) -#define SCC_GSMRL_TDCR_32 ((uint)0x00030000) -#define SCC_GSMRL_TDCR_16 ((uint)0x00020000) -#define SCC_GSMRL_TDCR_8 ((uint)0x00010000) -#define SCC_GSMRL_TDCR_1 ((uint)0x00000000) -#define SCC_GSMRL_RDCR_32 ((uint)0x0000c000) -#define SCC_GSMRL_RDCR_16 ((uint)0x00008000) -#define SCC_GSMRL_RDCR_8 ((uint)0x00004000) -#define SCC_GSMRL_RDCR_1 ((uint)0x00000000) -#define SCC_GSMRL_RENC_DFMAN ((uint)0x00003000) -#define SCC_GSMRL_RENC_MANCH ((uint)0x00002000) -#define SCC_GSMRL_RENC_FM0 ((uint)0x00001000) -#define SCC_GSMRL_RENC_NRZI ((uint)0x00000800) -#define SCC_GSMRL_RENC_NRZ ((uint)0x00000000) -#define SCC_GSMRL_TENC_DFMAN ((uint)0x00000600) -#define SCC_GSMRL_TENC_MANCH ((uint)0x00000400) -#define SCC_GSMRL_TENC_FM0 ((uint)0x00000200) -#define SCC_GSMRL_TENC_NRZI ((uint)0x00000100) -#define SCC_GSMRL_TENC_NRZ ((uint)0x00000000) -#define SCC_GSMRL_DIAG_LE ((uint)0x000000c0) /* Loop and echo */ -#define SCC_GSMRL_DIAG_ECHO ((uint)0x00000080) -#define SCC_GSMRL_DIAG_LOOP ((uint)0x00000040) -#define SCC_GSMRL_DIAG_NORM ((uint)0x00000000) -#define SCC_GSMRL_ENR ((uint)0x00000020) -#define SCC_GSMRL_ENT ((uint)0x00000010) -#define SCC_GSMRL_MODE_ENET ((uint)0x0000000c) -#define SCC_GSMRL_MODE_DDCMP ((uint)0x00000009) -#define SCC_GSMRL_MODE_BISYNC ((uint)0x00000008) -#define SCC_GSMRL_MODE_V14 ((uint)0x00000007) -#define SCC_GSMRL_MODE_AHDLC ((uint)0x00000006) -#define SCC_GSMRL_MODE_PROFIBUS ((uint)0x00000005) -#define SCC_GSMRL_MODE_UART ((uint)0x00000004) -#define SCC_GSMRL_MODE_SS7 ((uint)0x00000003) -#define SCC_GSMRL_MODE_ATALK ((uint)0x00000002) -#define SCC_GSMRL_MODE_HDLC ((uint)0x00000000) - -#define SCC_TODR_TOD ((ushort)0x8000) - -/* SCC Event and Mask register. -*/ -#define SCCM_TXE ((unsigned char)0x10) -#define SCCM_BSY ((unsigned char)0x04) -#define SCCM_TX ((unsigned char)0x02) -#define SCCM_RX ((unsigned char)0x01) - -typedef struct scc_param { - ushort scc_rbase; /* Rx Buffer descriptor base address */ - ushort scc_tbase; /* Tx Buffer descriptor base address */ - u_char scc_rfcr; /* Rx function code */ - u_char scc_tfcr; /* Tx function code */ - ushort scc_mrblr; /* Max receive buffer length */ - uint scc_rstate; /* Internal */ - uint scc_idp; /* Internal */ - ushort scc_rbptr; /* Internal */ - ushort scc_ibc; /* Internal */ - uint scc_rxtmp; /* Internal */ - uint scc_tstate; /* Internal */ - uint scc_tdp; /* Internal */ - ushort scc_tbptr; /* Internal */ - ushort scc_tbc; /* Internal */ - uint scc_txtmp; /* Internal */ - uint scc_rcrc; /* Internal */ - uint scc_tcrc; /* Internal */ -} sccp_t; - - -/* Function code bits. - */ -#define SCC_EB ((u_char)0x10) /* Set big endian byte order */ -#define SCC_FC_DMA ((u_char)0x08) /* Set SDMA */ - -/* CPM Ethernet through SCC1. - */ -typedef struct scc_enet { - sccp_t sen_genscc; - uint sen_cpres; /* Preset CRC */ - uint sen_cmask; /* Constant mask for CRC */ - uint sen_crcec; /* CRC Error counter */ - uint sen_alec; /* alignment error counter */ - uint sen_disfc; /* discard frame counter */ - ushort sen_pads; /* Tx short frame pad character */ - ushort sen_retlim; /* Retry limit threshold */ - ushort sen_retcnt; /* Retry limit counter */ - ushort sen_maxflr; /* maximum frame length register */ - ushort sen_minflr; /* minimum frame length register */ - ushort sen_maxd1; /* maximum DMA1 length */ - ushort sen_maxd2; /* maximum DMA2 length */ - ushort sen_maxd; /* Rx max DMA */ - ushort sen_dmacnt; /* Rx DMA counter */ - ushort sen_maxb; /* Max BD byte count */ - ushort sen_gaddr1; /* Group address filter */ - ushort sen_gaddr2; - ushort sen_gaddr3; - ushort sen_gaddr4; - uint sen_tbuf0data0; /* Save area 0 - current frame */ - uint sen_tbuf0data1; /* Save area 1 - current frame */ - uint sen_tbuf0rba; /* Internal */ - uint sen_tbuf0crc; /* Internal */ - ushort sen_tbuf0bcnt; /* Internal */ - ushort sen_paddrh; /* physical address (MSB) */ - ushort sen_paddrm; - ushort sen_paddrl; /* physical address (LSB) */ - ushort sen_pper; /* persistence */ - ushort sen_rfbdptr; /* Rx first BD pointer */ - ushort sen_tfbdptr; /* Tx first BD pointer */ - ushort sen_tlbdptr; /* Tx last BD pointer */ - uint sen_tbuf1data0; /* Save area 0 - current frame */ - uint sen_tbuf1data1; /* Save area 1 - current frame */ - uint sen_tbuf1rba; /* Internal */ - uint sen_tbuf1crc; /* Internal */ - ushort sen_tbuf1bcnt; /* Internal */ - ushort sen_txlen; /* Tx Frame length counter */ - ushort sen_iaddr1; /* Individual address filter */ - ushort sen_iaddr2; - ushort sen_iaddr3; - ushort sen_iaddr4; - ushort sen_boffcnt; /* Backoff counter */ - - /* NOTE: Some versions of the manual have the following items - * incorrectly documented. Below is the proper order. - */ - ushort sen_taddrh; /* temp address (MSB) */ - ushort sen_taddrm; - ushort sen_taddrl; /* temp address (LSB) */ -} scc_enet_t; - - - -#if defined (CONFIG_UCQUICC) -/* uCquicc has the following signals connected to Ethernet: - * 68360 - lxt905 - * PA0/RXD1 - rxd - * PA1/TXD1 - txd - * PA8/CLK1 - tclk - * PA9/CLK2 - rclk - * PC0/!RTS1 - t_en - * PC1/!CTS1 - col - * PC5/!CD1 - cd - */ -#define PA_ENET_RXD PA_RXD1 -#define PA_ENET_TXD PA_TXD1 -#define PA_ENET_TCLK PA_CLK1 -#define PA_ENET_RCLK PA_CLK2 -#define PC_ENET_TENA PC_RTS1 -#define PC_ENET_CLSN PC_CTS1 -#define PC_ENET_RENA PC_CD1 - -/* Control bits in the SICR to route TCLK (CLK1) and RCLK (CLK2) to - * SCC1. - */ -#define SICR_ENET_MASK ((uint)0x000000ff) -#define SICR_ENET_CLKRT ((uint)0x0000002c) - -#endif /* config_ucquicc */ - - -#ifdef MBX -/* Bits in parallel I/O port registers that have to be set/cleared - * to configure the pins for SCC1 use. The TCLK and RCLK seem unique - * to the MBX860 board. Any two of the four available clocks could be - * used, and the MPC860 cookbook manual has an example using different - * clock pins. - */ -#define PA_ENET_RXD ((ushort)0x0001) -#define PA_ENET_TXD ((ushort)0x0002) -#define PA_ENET_TCLK ((ushort)0x0200) -#define PA_ENET_RCLK ((ushort)0x0800) -#define PC_ENET_TENA ((ushort)0x0001) -#define PC_ENET_CLSN ((ushort)0x0010) -#define PC_ENET_RENA ((ushort)0x0020) - -/* Control bits in the SICR to route TCLK (CLK2) and RCLK (CLK4) to - * SCC1. Also, make sure GR1 (bit 24) and SC1 (bit 25) are zero. - */ -#define SICR_ENET_MASK ((uint)0x000000ff) -#define SICR_ENET_CLKRT ((uint)0x0000003d) -#endif - -/* SCC Event register as used by Ethernet. -*/ -#define SCCE_ENET_GRA ((ushort)0x0080) /* Graceful stop complete */ -#define SCCE_ENET_TXE ((ushort)0x0010) /* Transmit Error */ -#define SCCE_ENET_RXF ((ushort)0x0008) /* Full frame received */ -#define SCCE_ENET_BSY ((ushort)0x0004) /* All incoming buffers full */ -#define SCCE_ENET_TXB ((ushort)0x0002) /* A buffer was transmitted */ -#define SCCE_ENET_RXB ((ushort)0x0001) /* A buffer was received */ - -/* SCC Mode Register (PMSR) as used by Ethernet. -*/ -#define SCC_PMSR_HBC ((ushort)0x8000) /* Enable heartbeat */ -#define SCC_PMSR_FC ((ushort)0x4000) /* Force collision */ -#define SCC_PMSR_RSH ((ushort)0x2000) /* Receive short frames */ -#define SCC_PMSR_IAM ((ushort)0x1000) /* Check individual hash */ -#define SCC_PMSR_ENCRC ((ushort)0x0800) /* Ethernet CRC mode */ -#define SCC_PMSR_PRO ((ushort)0x0200) /* Promiscuous mode */ -#define SCC_PMSR_BRO ((ushort)0x0100) /* Catch broadcast pkts */ -#define SCC_PMSR_SBT ((ushort)0x0080) /* Special backoff timer */ -#define SCC_PMSR_LPB ((ushort)0x0040) /* Set Loopback mode */ -#define SCC_PMSR_SIP ((ushort)0x0020) /* Sample Input Pins */ -#define SCC_PMSR_LCW ((ushort)0x0010) /* Late collision window */ -#define SCC_PMSR_NIB22 ((ushort)0x000a) /* Start frame search */ -#define SCC_PMSR_FDE ((ushort)0x0001) /* Full duplex enable */ - -/* Buffer descriptor control/status used by Ethernet receive. -*/ -#define BD_ENET_RX_EMPTY ((ushort)0x8000) -#define BD_ENET_RX_WRAP ((ushort)0x2000) -#define BD_ENET_RX_INTR ((ushort)0x1000) -#define BD_ENET_RX_LAST ((ushort)0x0800) -#define BD_ENET_RX_FIRST ((ushort)0x0400) -#define BD_ENET_RX_MISS ((ushort)0x0100) -#define BD_ENET_RX_LG ((ushort)0x0020) -#define BD_ENET_RX_NO ((ushort)0x0010) -#define BD_ENET_RX_SH ((ushort)0x0008) -#define BD_ENET_RX_CR ((ushort)0x0004) -#define BD_ENET_RX_OV ((ushort)0x0002) -#define BD_ENET_RX_CL ((ushort)0x0001) -#define BD_ENET_RX_STATS ((ushort)0x013f) /* All status bits */ - -/* Buffer descriptor control/status used by Ethernet transmit. -*/ -#define BD_ENET_TX_READY ((ushort)0x8000) -#define BD_ENET_TX_PAD ((ushort)0x4000) -#define BD_ENET_TX_WRAP ((ushort)0x2000) -#define BD_ENET_TX_INTR ((ushort)0x1000) -#define BD_ENET_TX_LAST ((ushort)0x0800) -#define BD_ENET_TX_TC ((ushort)0x0400) -#define BD_ENET_TX_DEF ((ushort)0x0200) -#define BD_ENET_TX_HB ((ushort)0x0100) -#define BD_ENET_TX_LC ((ushort)0x0080) -#define BD_ENET_TX_RL ((ushort)0x0040) -#define BD_ENET_TX_RCMASK ((ushort)0x003c) -#define BD_ENET_TX_UN ((ushort)0x0002) -#define BD_ENET_TX_CSL ((ushort)0x0001) -#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ - -/* SCC as UART -*/ -typedef struct scc_uart { - sccp_t scc_genscc; - uint scc_res1; /* Reserved */ - uint scc_res2; /* Reserved */ - ushort scc_maxidl; /* Maximum idle chars */ - ushort scc_idlc; /* temp idle counter */ - ushort scc_brkcr; /* Break count register */ - ushort scc_parec; /* receive parity error counter */ - ushort scc_frmec; /* receive framing error counter */ - ushort scc_nosec; /* receive noise counter */ - ushort scc_brkec; /* receive break condition counter */ - ushort scc_brkln; /* last received break length */ - ushort scc_uaddr1; /* UART address character 1 */ - ushort scc_uaddr2; /* UART address character 2 */ - ushort scc_rtemp; /* Temp storage */ - ushort scc_toseq; /* Transmit out of sequence char */ - ushort scc_char1; /* control character 1 */ - ushort scc_char2; /* control character 2 */ - ushort scc_char3; /* control character 3 */ - ushort scc_char4; /* control character 4 */ - ushort scc_char5; /* control character 5 */ - ushort scc_char6; /* control character 6 */ - ushort scc_char7; /* control character 7 */ - ushort scc_char8; /* control character 8 */ - ushort scc_rccm; /* receive control character mask */ - ushort scc_rccr; /* receive control character register */ - ushort scc_rlbc; /* receive last break character */ -} scc_uart_t; - -/* SCC Event and Mask registers when it is used as a UART. -*/ -#define UART_SCCM_GLR ((ushort)0x1000) -#define UART_SCCM_GLT ((ushort)0x0800) -#define UART_SCCM_AB ((ushort)0x0200) -#define UART_SCCM_IDL ((ushort)0x0100) -#define UART_SCCM_GRA ((ushort)0x0080) -#define UART_SCCM_BRKE ((ushort)0x0040) -#define UART_SCCM_BRKS ((ushort)0x0020) -#define UART_SCCM_CCR ((ushort)0x0008) -#define UART_SCCM_BSY ((ushort)0x0004) -#define UART_SCCM_TX ((ushort)0x0002) -#define UART_SCCM_RX ((ushort)0x0001) - -/* The SCC PMSR when used as a UART. -*/ -#define SCU_PMSR_FLC ((ushort)0x8000) -#define SCU_PMSR_SL ((ushort)0x4000) -#define SCU_PMSR_CL ((ushort)0x3000) -#define SCU_PMSR_UM ((ushort)0x0c00) -#define SCU_PMSR_FRZ ((ushort)0x0200) -#define SCU_PMSR_RZS ((ushort)0x0100) -#define SCU_PMSR_SYN ((ushort)0x0080) -#define SCU_PMSR_DRT ((ushort)0x0040) -#define SCU_PMSR_PEN ((ushort)0x0010) -#define SCU_PMSR_RPM ((ushort)0x000c) -#define SCU_PMSR_REVP ((ushort)0x0008) -#define SCU_PMSR_TPM ((ushort)0x0003) -#define SCU_PMSR_TEVP ((ushort)0x0003) - -/* CPM Transparent mode SCC. - */ -typedef struct scc_trans { - sccp_t st_genscc; - uint st_cpres; /* Preset CRC */ - uint st_cmask; /* Constant mask for CRC */ -} scc_trans_t; - -#define BD_SCC_TX_LAST ((ushort)0x0800) - - - -/* CPM interrupts. There are nearly 32 interrupts generated by CPM - * channels or devices. All of these are presented to the PPC core - * as a single interrupt. The CPM interrupt handler dispatches its - * own handlers, in a similar fashion to the PPC core handler. We - * use the table as defined in the manuals (i.e. no special high - * priority and SCC1 == SCCa, etc...). - */ -/* #define CPMVEC_NR 32 */ -/* #define CPMVEC_PIO_PC15 ((ushort)0x1f) */ -/* #define CPMVEC_SCC1 ((ushort)0x1e) */ -/* #define CPMVEC_SCC2 ((ushort)0x1d) */ -/* #define CPMVEC_SCC3 ((ushort)0x1c) */ -/* #define CPMVEC_SCC4 ((ushort)0x1b) */ -/* #define CPMVEC_PIO_PC14 ((ushort)0x1a) */ -/* #define CPMVEC_TIMER1 ((ushort)0x19) */ -/* #define CPMVEC_PIO_PC13 ((ushort)0x18) */ -/* #define CPMVEC_PIO_PC12 ((ushort)0x17) */ -/* #define CPMVEC_SDMA_CB_ERR ((ushort)0x16) */ -/* #define CPMVEC_IDMA1 ((ushort)0x15) */ -/* #define CPMVEC_IDMA2 ((ushort)0x14) */ -/* #define CPMVEC_TIMER2 ((ushort)0x12) */ -/* #define CPMVEC_RISCTIMER ((ushort)0x11) */ -/* #define CPMVEC_I2C ((ushort)0x10) */ -/* #define CPMVEC_PIO_PC11 ((ushort)0x0f) */ -/* #define CPMVEC_PIO_PC10 ((ushort)0x0e) */ -/* #define CPMVEC_TIMER3 ((ushort)0x0c) */ -/* #define CPMVEC_PIO_PC9 ((ushort)0x0b) */ -/* #define CPMVEC_PIO_PC8 ((ushort)0x0a) */ -/* #define CPMVEC_PIO_PC7 ((ushort)0x09) */ -/* #define CPMVEC_TIMER4 ((ushort)0x07) */ -/* #define CPMVEC_PIO_PC6 ((ushort)0x06) */ -/* #define CPMVEC_SPI ((ushort)0x05) */ -/* #define CPMVEC_SMC1 ((ushort)0x04) */ -/* #define CPMVEC_SMC2 ((ushort)0x03) */ -/* #define CPMVEC_PIO_PC5 ((ushort)0x02) */ -/* #define CPMVEC_PIO_PC4 ((ushort)0x01) */ -/* #define CPMVEC_ERROR ((ushort)0x00) */ - -extern void cpm_install_handler(int vec, irq_handler_t handler, void *dev_id); - -/* CPM interrupt configuration vector. -*/ -#define CICR_SCD_SCC4 ((uint)0x00c00000) /* SCC4 @ SCCd */ -#define CICR_SCC_SCC3 ((uint)0x00200000) /* SCC3 @ SCCc */ -#define CICR_SCB_SCC2 ((uint)0x00040000) /* SCC2 @ SCCb */ -#define CICR_SCA_SCC1 ((uint)0x00000000) /* SCC1 @ SCCa */ -#define CICR_IRL_MASK ((uint)0x0000e000) /* Core interrupt */ -#define CICR_HP_MASK ((uint)0x00001f00) /* Hi-pri int. */ -#define CICR_IEN ((uint)0x00000080) /* Int. enable */ -#define CICR_SPS ((uint)0x00000001) /* SCC Spread */ -#endif /* __CPM_360__ */ diff --git a/arch/m68k/include/asm/m54xxacr.h b/arch/m68k/include/asm/m54xxacr.h index 6d13cae..59e1710 100644 --- a/arch/m68k/include/asm/m54xxacr.h +++ b/arch/m68k/include/asm/m54xxacr.h @@ -23,8 +23,8 @@ #define CACR_IEC 0x00008000 /* Enable instruction cache */ #define CACR_DNFB 0x00002000 /* Inhibited fill buffer */ #define CACR_IDPI 0x00001000 /* Disable CPUSHL */ -#define CACR_IHLCK 0x00000800 /* Intruction cache half lock */ -#define CACR_IDCM 0x00000400 /* Intruction cache inhibit */ +#define CACR_IHLCK 0x00000800 /* Instruction cache half lock */ +#define CACR_IDCM 0x00000400 /* Instruction cache inhibit */ #define CACR_ICINVA 0x00000100 /* Invalidate instr cache */ #define CACR_EUSP 0x00000020 /* Enable separate user a7 */ diff --git a/arch/m68k/include/asm/m68360.h b/arch/m68k/include/asm/m68360.h deleted file mode 100644 index 4664180..0000000 --- a/arch/m68k/include/asm/m68360.h +++ /dev/null @@ -1,13 +0,0 @@ -#include <asm/m68360_regs.h> -#include <asm/m68360_pram.h> -#include <asm/m68360_quicc.h> -#include <asm/m68360_enet.h> - -#ifdef CONFIG_M68360 - -#define CPM_INTERRUPT 4 - -/* see MC68360 User's Manual, p. 7-377 */ -#define CPM_VECTOR_BASE 0x04 /* 3 MSbits of CPM vector */ - -#endif /* CONFIG_M68360 */ diff --git a/arch/m68k/include/asm/m68360_enet.h b/arch/m68k/include/asm/m68360_enet.h deleted file mode 100644 index 4d04037..0000000 --- a/arch/m68k/include/asm/m68360_enet.h +++ /dev/null @@ -1,177 +0,0 @@ -/*********************************** - * $Id: m68360_enet.h,v 1.1 2002/03/02 15:01:07 gerg Exp $ - *********************************** - * - *************************************** - * Definitions for the ETHERNET controllers - *************************************** - */ - -#ifndef __ETHER_H -#define __ETHER_H - -#include <asm/quicc_simple.h> - -/* - * transmit BD's - */ -#define T_R 0x8000 /* ready bit */ -#define E_T_PAD 0x4000 /* short frame padding */ -#define T_W 0x2000 /* wrap bit */ -#define T_I 0x1000 /* interrupt on completion */ -#define T_L 0x0800 /* last in frame */ -#define T_TC 0x0400 /* transmit CRC (when last) */ - -#define T_DEF 0x0200 /* defer indication */ -#define T_HB 0x0100 /* heartbeat */ -#define T_LC 0x0080 /* error: late collision */ -#define T_RL 0x0040 /* error: retransmission limit */ -#define T_RC 0x003c /* retry count */ -#define T_UN 0x0002 /* error: underrun */ -#define T_CSL 0x0001 /* carier sense lost */ -#define T_ERROR (T_HB | T_LC | T_RL | T_UN | T_CSL) - -/* - * receive BD's - */ -#define R_E 0x8000 /* buffer empty */ -#define R_W 0x2000 /* wrap bit */ -#define R_I 0x1000 /* interrupt on reception */ -#define R_L 0x0800 /* last BD in frame */ -#define R_F 0x0400 /* first BD in frame */ -#define R_M 0x0100 /* received because of promisc. mode */ - -#define R_LG 0x0020 /* frame too long */ -#define R_NO 0x0010 /* non-octet aligned */ -#define R_SH 0x0008 /* short frame */ -#define R_CR 0x0004 /* receive CRC error */ -#define R_OV 0x0002 /* receive overrun */ -#define R_CL 0x0001 /* collision */ -#define ETHER_R_ERROR (R_LG | R_NO | R_SH | R_CR | R_OV | R_CL) - - -/* - * ethernet interrupts - */ -#define ETHERNET_GRA 0x0080 /* graceful stop complete */ -#define ETHERNET_TXE 0x0010 /* transmit error */ -#define ETHERNET_RXF 0x0008 /* receive frame */ -#define ETHERNET_BSY 0x0004 /* busy condition */ -#define ETHERNET_TXB 0x0002 /* transmit buffer */ -#define ETHERNET_RXB 0x0001 /* receive buffer */ - -/* - * ethernet protocol specific mode register (PSMR) - */ -#define ETHER_HBC 0x8000 /* heartbeat checking */ -#define ETHER_FC 0x4000 /* force collision */ -#define ETHER_RSH 0x2000 /* receive short frames */ -#define ETHER_IAM 0x1000 /* individual address mode */ -#define ETHER_CRC_32 (0x2<<10) /* Enable CRC */ -#define ETHER_PRO 0x0200 /* promiscuous */ -#define ETHER_BRO 0x0100 /* broadcast address */ -#define ETHER_SBT 0x0080 /* stop backoff timer */ -#define ETHER_LPB 0x0040 /* Loop Back Mode */ -#define ETHER_SIP 0x0020 /* sample input pins */ -#define ETHER_LCW 0x0010 /* late collision window */ -#define ETHER_NIB_13 (0x0<<1) /* # of ignored bits 13 */ -#define ETHER_NIB_14 (0x1<<1) /* # of ignored bits 14 */ -#define ETHER_NIB_15 (0x2<<1) /* # of ignored bits 15 */ -#define ETHER_NIB_16 (0x3<<1) /* # of ignored bits 16 */ -#define ETHER_NIB_21 (0x4<<1) /* # of ignored bits 21 */ -#define ETHER_NIB_22 (0x5<<1) /* # of ignored bits 22 */ -#define ETHER_NIB_23 (0x6<<1) /* # of ignored bits 23 */ -#define ETHER_NIB_24 (0x7<<1) /* # of ignored bits 24 */ - -/* - * ethernet specific parameters - */ -#define CRC_WORD 4 /* Length in bytes of CRC */ -#define C_PRES 0xffffffff /* preform 32 bit CRC */ -#define C_MASK 0xdebb20e3 /* comply with 32 bit CRC */ -#define CRCEC 0x00000000 -#define ALEC 0x00000000 -#define DISFC 0x00000000 -#define PADS 0x00000000 -#define RET_LIM 0x000f /* retry 15 times to send a frame before interrupt */ -#define ETH_MFLR 0x05ee /* 1518 max frame size */ -#define MINFLR 0x0040 /* Minimum frame size 64 */ -#define MAXD1 0x05ee /* Max dma count 1518 */ -#define MAXD2 0x05ee -#define GADDR1 0x00000000 /* Clear group address */ -#define GADDR2 0x00000000 -#define GADDR3 0x00000000 -#define GADDR4 0x00000000 -#define P_PER 0x00000000 /*not used */ -#define IADDR1 0x00000000 /* Individual hash table not used */ -#define IADDR2 0x00000000 -#define IADDR3 0x00000000 -#define IADDR4 0x00000000 -#define TADDR_H 0x00000000 /* clear this regs */ -#define TADDR_M 0x00000000 -#define TADDR_L 0x00000000 - -/* SCC Parameter Ram */ -#define RFCR 0x18 /* normal operation */ -#define TFCR 0x18 /* normal operation */ -#define E_MRBLR 1518 /* Max ethernet frame length */ - -/* - * ethernet specific structure - */ -typedef union { - unsigned char b[6]; - struct { - unsigned short high; - unsigned short middl; - unsigned short low; - } w; -} ETHER_ADDR; - -typedef struct { - int max_frame_length; - int promisc_mode; - int reject_broadcast; - ETHER_ADDR phys_adr; -} ETHER_SPECIFIC; - -typedef struct { - ETHER_ADDR dst_addr; - ETHER_ADDR src_addr; - unsigned short type_or_len; - unsigned char data[1]; -} ETHER_FRAME; - -#define MAX_DATALEN 1500 -typedef struct { - ETHER_ADDR dst_addr; - ETHER_ADDR src_addr; - unsigned short type_or_len; - unsigned char data[MAX_DATALEN]; - unsigned char fcs[CRC_WORD]; -} ETHER_MAX_FRAME; - - -/* - * Internal ethernet function prototypes - */ -void ether_interrupt(int scc_num); -/* mleslie: debug */ -/* static void ethernet_rx_internal(int scc_num); */ -/* static void ethernet_tx_internal(int scc_num); */ - -/* - * User callable routines prototypes (ethernet specific) - */ -void ethernet_init(int scc_number, - alloc_routine *alloc_buffer, - free_routine *free_buffer, - store_rx_buffer_routine *store_rx_buffer, - handle_tx_error_routine *handle_tx_error, - handle_rx_error_routine *handle_rx_error, - handle_lost_error_routine *handle_lost_error, - ETHER_SPECIFIC *ether_spec); -int ethernet_tx(int scc_number, void *buf, int length); - -#endif - diff --git a/arch/m68k/include/asm/m68360_pram.h b/arch/m68k/include/asm/m68360_pram.h deleted file mode 100644 index c0cbd96..0000000 --- a/arch/m68k/include/asm/m68360_pram.h +++ /dev/null @@ -1,431 +0,0 @@ -/*********************************** - * $Id: m68360_pram.h,v 1.1 2002/03/02 15:01:07 gerg Exp $ - *********************************** - * - *************************************** - * Definitions of the parameter area RAM. - * Note that different structures are overlaid - * at the same offsets for the different modes - * of operation. - *************************************** - */ - -#ifndef __PRAM_H -#define __PRAM_H - -/* Time slot assignment table */ -#define VALID_SLOT 0x8000 -#define WRAP_SLOT 0x4000 - -/***************************************************************** - Global Multichannel parameter RAM -*****************************************************************/ -struct global_multi_pram { - /* - * Global Multichannel parameter RAM - */ - unsigned long mcbase; /* Multichannel Base pointer */ - unsigned short qmcstate; /* Multichannel Controller state */ - unsigned short mrblr; /* Maximum Receive Buffer Length */ - unsigned short tx_s_ptr; /* TSTATx Pointer */ - unsigned short rxptr; /* Current Time slot entry in TSATRx */ - unsigned short grfthr; /* Global Receive frame threshold */ - unsigned short grfcnt; /* Global Receive Frame Count */ - unsigned long intbase; /* Multichannel Base address */ - unsigned long iintptr; /* Pointer to interrupt queue */ - unsigned short rx_s_ptr; /* TSTARx Pointer */ - - unsigned short txptr; /* Current Time slot entry in TSATTx */ - unsigned long c_mask32; /* CRC Constant (debb20e3) */ - unsigned short tsatrx[32]; /* Time Slot Assignment Table Rx */ - unsigned short tsattx[32]; /* Time Slot Assignment Table Tx */ - unsigned short c_mask16; /* CRC Constant (f0b8) */ -}; - -/***************************************************************** - Quicc32 HDLC parameter RAM -*****************************************************************/ -struct quicc32_pram { - - unsigned short tbase; /* Tx Buffer Descriptors Base Address */ - unsigned short chamr; /* Channel Mode Register */ - unsigned long tstate; /* Tx Internal State */ - unsigned long txintr; /* Tx Internal Data Pointer */ - unsigned short tbptr; /* Tx Buffer Descriptor Pointer */ - unsigned short txcntr; /* Tx Internal Byte Count */ - unsigned long tupack; /* (Tx Temp) */ - unsigned long zistate; /* Zero Insertion machine state */ - unsigned long tcrc; /* Temp Transmit CRC */ - unsigned short intmask; /* Channel's interrupt mask flags */ - unsigned short bdflags; - unsigned short rbase; /* Rx Buffer Descriptors Base Address */ - unsigned short mflr; /* Max Frame Length Register */ - unsigned long rstate; /* Rx Internal State */ - unsigned long rxintr; /* Rx Internal Data Pointer */ - unsigned short rbptr; /* Rx Buffer Descriptor Pointer */ - unsigned short rxbyc; /* Rx Internal Byte Count */ - unsigned long rpack; /* (Rx Temp) */ - unsigned long zdstate; /* Zero Deletion machine state */ - unsigned long rcrc; /* Temp Transmit CRC */ - unsigned short maxc; /* Max_length counter */ - unsigned short tmp_mb; /* Temp */ -}; - - -/***************************************************************** - HDLC parameter RAM -*****************************************************************/ - -struct hdlc_pram { - /* - * SCC parameter RAM - */ - unsigned short rbase; /* RX BD base address */ - unsigned short tbase; /* TX BD base address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned long rcrc; /* temp receive CRC */ - unsigned long tcrc; /* temp transmit CRC */ - - /* - * HDLC specific parameter RAM - */ - unsigned char RESERVED1[4]; /* Reserved area */ - unsigned long c_mask; /* CRC constant */ - unsigned long c_pres; /* CRC preset */ - unsigned short disfc; /* discarded frame counter */ - unsigned short crcec; /* CRC error counter */ - unsigned short abtsc; /* abort sequence counter */ - unsigned short nmarc; /* nonmatching address rx cnt */ - unsigned short retrc; /* frame retransmission cnt */ - unsigned short mflr; /* maximum frame length reg */ - unsigned short max_cnt; /* maximum length counter */ - unsigned short rfthr; /* received frames threshold */ - unsigned short rfcnt; /* received frames count */ - unsigned short hmask; /* user defined frm addr mask */ - unsigned short haddr1; /* user defined frm address 1 */ - unsigned short haddr2; /* user defined frm address 2 */ - unsigned short haddr3; /* user defined frm address 3 */ - unsigned short haddr4; /* user defined frm address 4 */ - unsigned short tmp; /* temp */ - unsigned short tmp_mb; /* temp */ -}; - - - -/***************************************************************** - UART parameter RAM -*****************************************************************/ - -/* - * bits in uart control characters table - */ -#define CC_INVALID 0x8000 /* control character is valid */ -#define CC_REJ 0x4000 /* don't store char in buffer */ -#define CC_CHAR 0x00ff /* control character */ - -/* UART */ -struct uart_pram { - /* - * SCC parameter RAM - */ - unsigned short rbase; /* RX BD base address */ - unsigned short tbase; /* TX BD base address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rx_temp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned long rcrc; /* temp receive CRC */ - unsigned long tcrc; /* temp transmit CRC */ - - /* - * UART specific parameter RAM - */ - unsigned char RESERVED1[8]; /* Reserved area */ - unsigned short max_idl; /* maximum idle characters */ - unsigned short idlc; /* rx idle counter (internal) */ - unsigned short brkcr; /* break count register */ - - unsigned short parec; /* Rx parity error counter */ - unsigned short frmer; /* Rx framing error counter */ - unsigned short nosec; /* Rx noise counter */ - unsigned short brkec; /* Rx break character counter */ - unsigned short brkln; /* Receive break length */ - - unsigned short uaddr1; /* address character 1 */ - unsigned short uaddr2; /* address character 2 */ - unsigned short rtemp; /* temp storage */ - unsigned short toseq; /* Tx out of sequence char */ - unsigned short cc[8]; /* Rx control characters */ - unsigned short rccm; /* Rx control char mask */ - unsigned short rccr; /* Rx control char register */ - unsigned short rlbc; /* Receive last break char */ -}; - - - -/***************************************************************** - BISYNC parameter RAM -*****************************************************************/ - -struct bisync_pram { - /* - * SCC parameter RAM - */ - unsigned short rbase; /* RX BD base address */ - unsigned short tbase; /* TX BD base address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned long rcrc; /* temp receive CRC */ - unsigned long tcrc; /* temp transmit CRC */ - - /* - * BISYNC specific parameter RAM - */ - unsigned char RESERVED1[4]; /* Reserved area */ - unsigned long crcc; /* CRC Constant Temp Value */ - unsigned short prcrc; /* Preset Receiver CRC-16/LRC */ - unsigned short ptcrc; /* Preset Transmitter CRC-16/LRC */ - unsigned short parec; /* Receive Parity Error Counter */ - unsigned short bsync; /* BISYNC SYNC Character */ - unsigned short bdle; /* BISYNC DLE Character */ - unsigned short cc[8]; /* Rx control characters */ - unsigned short rccm; /* Receive Control Character Mask */ -}; - -/***************************************************************** - IOM2 parameter RAM - (overlaid on tx bd[5] of SCC channel[2]) -*****************************************************************/ -struct iom2_pram { - unsigned short ci_data; /* ci data */ - unsigned short monitor_data; /* monitor data */ - unsigned short tstate; /* transmitter state */ - unsigned short rstate; /* receiver state */ -}; - -/***************************************************************** - SPI/SMC parameter RAM - (overlaid on tx bd[6,7] of SCC channel[2]) -*****************************************************************/ - -#define SPI_R 0x8000 /* Ready bit in BD */ - -struct spi_pram { - unsigned short rbase; /* Rx BD Base Address */ - unsigned short tbase; /* Tx BD Base Address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ -}; - -struct smc_uart_pram { - unsigned short rbase; /* Rx BD Base Address */ - unsigned short tbase; /* Tx BD Base Address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned short max_idl; /* Maximum IDLE Characters */ - unsigned short idlc; /* Temporary IDLE Counter */ - unsigned short brkln; /* Last Rx Break Length */ - unsigned short brkec; /* Rx Break Condition Counter */ - unsigned short brkcr; /* Break Count Register (Tx) */ - unsigned short r_mask; /* Temporary bit mask */ -}; - -struct smc_trnsp_pram { - unsigned short rbase; /* rx BD Base Address */ - unsigned short tbase; /* Tx BD Base Address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned short reserved[5]; /* Reserved */ -}; - -struct idma_pram { - unsigned short ibase; /* IDMA BD Base Address */ - unsigned short ibptr; /* IDMA buffer descriptor pointer */ - unsigned long istate; /* IDMA internal state */ - unsigned long itemp; /* IDMA temp */ -}; - -struct ethernet_pram { - /* - * SCC parameter RAM - */ - unsigned short rbase; /* RX BD base address */ - unsigned short tbase; /* TX BD base address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned long rcrc; /* temp receive CRC */ - unsigned long tcrc; /* temp transmit CRC */ - - /* - * ETHERNET specific parameter RAM - */ - unsigned long c_pres; /* preset CRC */ - unsigned long c_mask; /* constant mask for CRC */ - unsigned long crcec; /* CRC error counter */ - unsigned long alec; /* alignment error counter */ - unsigned long disfc; /* discard frame counter */ - unsigned short pads; /* short frame PAD characters */ - unsigned short ret_lim; /* retry limit threshold */ - unsigned short ret_cnt; /* retry limit counter */ - unsigned short mflr; /* maximum frame length reg */ - unsigned short minflr; /* minimum frame length reg */ - unsigned short maxd1; /* maximum DMA1 length reg */ - unsigned short maxd2; /* maximum DMA2 length reg */ - unsigned short maxd; /* rx max DMA */ - unsigned short dma_cnt; /* rx dma counter */ - unsigned short max_b; /* max bd byte count */ - unsigned short gaddr1; /* group address filter 1 */ - unsigned short gaddr2; /* group address filter 2 */ - unsigned short gaddr3; /* group address filter 3 */ - unsigned short gaddr4; /* group address filter 4 */ - unsigned long tbuf0_data0; /* save area 0 - current frm */ - unsigned long tbuf0_data1; /* save area 1 - current frm */ - unsigned long tbuf0_rba0; - unsigned long tbuf0_crc; - unsigned short tbuf0_bcnt; - union { - unsigned char b[6]; - struct { - unsigned short high; - unsigned short middl; - unsigned short low; - } w; - } paddr; - unsigned short p_per; /* persistence */ - unsigned short rfbd_ptr; /* rx first bd pointer */ - unsigned short tfbd_ptr; /* tx first bd pointer */ - unsigned short tlbd_ptr; /* tx last bd pointer */ - unsigned long tbuf1_data0; /* save area 0 - next frame */ - unsigned long tbuf1_data1; /* save area 1 - next frame */ - unsigned long tbuf1_rba0; - unsigned long tbuf1_crc; - unsigned short tbuf1_bcnt; - unsigned short tx_len; /* tx frame length counter */ - unsigned short iaddr1; /* individual address filter 1*/ - unsigned short iaddr2; /* individual address filter 2*/ - unsigned short iaddr3; /* individual address filter 3*/ - unsigned short iaddr4; /* individual address filter 4*/ - unsigned short boff_cnt; /* back-off counter */ - unsigned short taddr_h; /* temp address (MSB) */ - unsigned short taddr_m; /* temp address */ - unsigned short taddr_l; /* temp address (LSB) */ -}; - -struct transparent_pram { - /* - * SCC parameter RAM - */ - unsigned short rbase; /* RX BD base address */ - unsigned short tbase; /* TX BD base address */ - unsigned char rfcr; /* Rx function code */ - unsigned char tfcr; /* Tx function code */ - unsigned short mrblr; /* Rx buffer length */ - unsigned long rstate; /* Rx internal state */ - unsigned long rptr; /* Rx internal data pointer */ - unsigned short rbptr; /* rb BD Pointer */ - unsigned short rcount; /* Rx internal byte count */ - unsigned long rtemp; /* Rx temp */ - unsigned long tstate; /* Tx internal state */ - unsigned long tptr; /* Tx internal data pointer */ - unsigned short tbptr; /* Tx BD pointer */ - unsigned short tcount; /* Tx byte count */ - unsigned long ttemp; /* Tx temp */ - unsigned long rcrc; /* temp receive CRC */ - unsigned long tcrc; /* temp transmit CRC */ - - /* - * TRANSPARENT specific parameter RAM - */ - unsigned long crc_p; /* CRC Preset */ - unsigned long crc_c; /* CRC constant */ -}; - -struct timer_pram { - /* - * RISC timers parameter RAM - */ - unsigned short tm_base; /* RISC timer table base adr */ - unsigned short tm_ptr; /* RISC timer table pointer */ - unsigned short r_tmr; /* RISC timer mode register */ - unsigned short r_tmv; /* RISC timer valid register */ - unsigned long tm_cmd; /* RISC timer cmd register */ - unsigned long tm_cnt; /* RISC timer internal cnt */ -}; - -#endif diff --git a/arch/m68k/include/asm/m68360_quicc.h b/arch/m68k/include/asm/m68360_quicc.h deleted file mode 100644 index 59414cc..0000000 --- a/arch/m68k/include/asm/m68360_quicc.h +++ /dev/null @@ -1,362 +0,0 @@ -/*********************************** - * $Id: m68360_quicc.h,v 1.1 2002/03/02 15:01:07 gerg Exp $ - *********************************** - * - *************************************** - * Definitions of QUICC memory structures - *************************************** - */ - -#ifndef __M68360_QUICC_H -#define __M68360_QUICC_H - -/* - * include registers and - * parameter ram definitions files - */ -#include <asm/m68360_regs.h> -#include <asm/m68360_pram.h> - - - -/* Buffer Descriptors */ -typedef struct quicc_bd { - volatile unsigned short status; - volatile unsigned short length; - volatile unsigned char *buf; /* WARNING: This is only true if *char is 32 bits */ -} QUICC_BD; - - -#ifdef MOTOROLA_ORIGINAL -struct user_data { - /* BASE + 0x000: user data memory */ - volatile unsigned char udata_bd_ucode[0x400]; /*user data bd's Ucode*/ - volatile unsigned char udata_bd[0x200]; /*user data Ucode */ - volatile unsigned char ucode_ext[0x100]; /*Ucode Extension ram */ - volatile unsigned char RESERVED1[0x500]; /* Reserved area */ -}; -#else -struct user_data { - /* BASE + 0x000: user data memory */ - volatile unsigned char udata_bd_ucode[0x400]; /* user data, bds, Ucode*/ - volatile unsigned char udata_bd1[0x200]; /* user, bds */ - volatile unsigned char ucode_bd_scratch[0x100]; /* user, bds, ucode scratch */ - volatile unsigned char udata_bd2[0x100]; /* user, bds */ - volatile unsigned char RESERVED1[0x400]; /* Reserved area */ -}; -#endif - - -/* - * internal ram - */ -typedef struct quicc { - union { - struct quicc32_pram ch_pram_tbl[32]; /* 32*64(bytes) per channel */ - struct user_data u; - }ch_or_u; /* multipul or user space */ - - /* BASE + 0xc00: PARAMETER RAM */ - union { - struct scc_pram { - union { - struct hdlc_pram h; - struct uart_pram u; - struct bisync_pram b; - struct transparent_pram t; - unsigned char RESERVED66[0x70]; - } pscc; /* scc parameter area (protocol dependent) */ - union { - struct { - unsigned char RESERVED70[0x10]; - struct spi_pram spi; - unsigned char RESERVED72[0x8]; - struct timer_pram timer; - } timer_spi; - struct { - struct idma_pram idma; - unsigned char RESERVED67[0x4]; - union { - struct smc_uart_pram u; - struct smc_trnsp_pram t; - } psmc; - } idma_smc; - } pothers; - } scc; - struct ethernet_pram enet_scc; - struct global_multi_pram m; - unsigned char pr[0x100]; - } pram[4]; - - /* reserved */ - - /* BASE + 0x1000: INTERNAL REGISTERS */ - /* SIM */ - volatile unsigned long sim_mcr; /* module configuration reg */ - volatile unsigned short sim_simtr; /* module test register */ - volatile unsigned char RESERVED2[0x2]; /* Reserved area */ - volatile unsigned char sim_avr; /* auto vector reg */ - volatile unsigned char sim_rsr; /* reset status reg */ - volatile unsigned char RESERVED3[0x2]; /* Reserved area */ - volatile unsigned char sim_clkocr; /* CLCO control register */ - volatile unsigned char RESERVED62[0x3]; /* Reserved area */ - volatile unsigned short sim_pllcr; /* PLL control register */ - volatile unsigned char RESERVED63[0x2]; /* Reserved area */ - volatile unsigned short sim_cdvcr; /* Clock devider control register */ - volatile unsigned short sim_pepar; /* Port E pin assignment register */ - volatile unsigned char RESERVED64[0xa]; /* Reserved area */ - volatile unsigned char sim_sypcr; /* system protection control*/ - volatile unsigned char sim_swiv; /* software interrupt vector*/ - volatile unsigned char RESERVED6[0x2]; /* Reserved area */ - volatile unsigned short sim_picr; /* periodic interrupt control reg */ - volatile unsigned char RESERVED7[0x2]; /* Reserved area */ - volatile unsigned short sim_pitr; /* periodic interrupt timing reg */ - volatile unsigned char RESERVED8[0x3]; /* Reserved area */ - volatile unsigned char sim_swsr; /* software service */ - volatile unsigned long sim_bkar; /* breakpoint address register*/ - volatile unsigned long sim_bkcr; /* breakpoint control register*/ - volatile unsigned char RESERVED10[0x8]; /* Reserved area */ - /* MEMC */ - volatile unsigned long memc_gmr; /* Global memory register */ - volatile unsigned short memc_mstat; /* MEMC status register */ - volatile unsigned char RESERVED11[0xa]; /* Reserved area */ - volatile unsigned long memc_br0; /* base register 0 */ - volatile unsigned long memc_or0; /* option register 0 */ - volatile unsigned char RESERVED12[0x8]; /* Reserved area */ - volatile unsigned long memc_br1; /* base register 1 */ - volatile unsigned long memc_or1; /* option register 1 */ - volatile unsigned char RESERVED13[0x8]; /* Reserved area */ - volatile unsigned long memc_br2; /* base register 2 */ - volatile unsigned long memc_or2; /* option register 2 */ - volatile unsigned char RESERVED14[0x8]; /* Reserved area */ - volatile unsigned long memc_br3; /* base register 3 */ - volatile unsigned long memc_or3; /* option register 3 */ - volatile unsigned char RESERVED15[0x8]; /* Reserved area */ - volatile unsigned long memc_br4; /* base register 3 */ - volatile unsigned long memc_or4; /* option register 3 */ - volatile unsigned char RESERVED16[0x8]; /* Reserved area */ - volatile unsigned long memc_br5; /* base register 3 */ - volatile unsigned long memc_or5; /* option register 3 */ - volatile unsigned char RESERVED17[0x8]; /* Reserved area */ - volatile unsigned long memc_br6; /* base register 3 */ - volatile unsigned long memc_or6; /* option register 3 */ - volatile unsigned char RESERVED18[0x8]; /* Reserved area */ - volatile unsigned long memc_br7; /* base register 3 */ - volatile unsigned long memc_or7; /* option register 3 */ - volatile unsigned char RESERVED9[0x28]; /* Reserved area */ - /* TEST */ - volatile unsigned short test_tstmra; /* master shift a */ - volatile unsigned short test_tstmrb; /* master shift b */ - volatile unsigned short test_tstsc; /* shift count */ - volatile unsigned short test_tstrc; /* repetition counter */ - volatile unsigned short test_creg; /* control */ - volatile unsigned short test_dreg; /* destributed register */ - volatile unsigned char RESERVED58[0x404]; /* Reserved area */ - /* IDMA1 */ - volatile unsigned short idma_iccr; /* channel configuration reg*/ - volatile unsigned char RESERVED19[0x2]; /* Reserved area */ - volatile unsigned short idma1_cmr; /* dma mode reg */ - volatile unsigned char RESERVED68[0x2]; /* Reserved area */ - volatile unsigned long idma1_sapr; /* dma source addr ptr */ - volatile unsigned long idma1_dapr; /* dma destination addr ptr */ - volatile unsigned long idma1_bcr; /* dma byte count reg */ - volatile unsigned char idma1_fcr; /* function code reg */ - volatile unsigned char RESERVED20; /* Reserved area */ - volatile unsigned char idma1_cmar; /* channel mask reg */ - volatile unsigned char RESERVED21; /* Reserved area */ - volatile unsigned char idma1_csr; /* channel status reg */ - volatile unsigned char RESERVED22[0x3]; /* Reserved area */ - /* SDMA */ - volatile unsigned char sdma_sdsr; /* status reg */ - volatile unsigned char RESERVED23; /* Reserved area */ - volatile unsigned short sdma_sdcr; /* configuration reg */ - volatile unsigned long sdma_sdar; /* address reg */ - /* IDMA2 */ - volatile unsigned char RESERVED69[0x2]; /* Reserved area */ - volatile unsigned short idma2_cmr; /* dma mode reg */ - volatile unsigned long idma2_sapr; /* dma source addr ptr */ - volatile unsigned long idma2_dapr; /* dma destination addr ptr */ - volatile unsigned long idma2_bcr; /* dma byte count reg */ - volatile unsigned char idma2_fcr; /* function code reg */ - volatile unsigned char RESERVED24; /* Reserved area */ - volatile unsigned char idma2_cmar; /* channel mask reg */ - volatile unsigned char RESERVED25; /* Reserved area */ - volatile unsigned char idma2_csr; /* channel status reg */ - volatile unsigned char RESERVED26[0x7]; /* Reserved area */ - /* Interrupt Controller */ - volatile unsigned long intr_cicr; /* CP interrupt configuration reg*/ - volatile unsigned long intr_cipr; /* CP interrupt pending reg */ - volatile unsigned long intr_cimr; /* CP interrupt mask reg */ - volatile unsigned long intr_cisr; /* CP interrupt in service reg*/ - /* Parallel I/O */ - volatile unsigned short pio_padir; /* port A data direction reg */ - volatile unsigned short pio_papar; /* port A pin assignment reg */ - volatile unsigned short pio_paodr; /* port A open drain reg */ - volatile unsigned short pio_padat; /* port A data register */ - volatile unsigned char RESERVED28[0x8]; /* Reserved area */ - volatile unsigned short pio_pcdir; /* port C data direction reg*/ - volatile unsigned short pio_pcpar; /* port C pin assignment reg*/ - volatile unsigned short pio_pcso; /* port C special options */ - volatile unsigned short pio_pcdat; /* port C data register */ - volatile unsigned short pio_pcint; /* port C interrupt cntrl reg */ - volatile unsigned char RESERVED29[0x16]; /* Reserved area */ - /* Timer */ - volatile unsigned short timer_tgcr; /* timer global configuration reg */ - volatile unsigned char RESERVED30[0xe]; /* Reserved area */ - volatile unsigned short timer_tmr1; /* timer 1 mode reg */ - volatile unsigned short timer_tmr2; /* timer 2 mode reg */ - volatile unsigned short timer_trr1; /* timer 1 referance reg */ - volatile unsigned short timer_trr2; /* timer 2 referance reg */ - volatile unsigned short timer_tcr1; /* timer 1 capture reg */ - volatile unsigned short timer_tcr2; /* timer 2 capture reg */ - volatile unsigned short timer_tcn1; /* timer 1 counter reg */ - volatile unsigned short timer_tcn2; /* timer 2 counter reg */ - volatile unsigned short timer_tmr3; /* timer 3 mode reg */ - volatile unsigned short timer_tmr4; /* timer 4 mode reg */ - volatile unsigned short timer_trr3; /* timer 3 referance reg */ - volatile unsigned short timer_trr4; /* timer 4 referance reg */ - volatile unsigned short timer_tcr3; /* timer 3 capture reg */ - volatile unsigned short timer_tcr4; /* timer 4 capture reg */ - volatile unsigned short timer_tcn3; /* timer 3 counter reg */ - volatile unsigned short timer_tcn4; /* timer 4 counter reg */ - volatile unsigned short timer_ter1; /* timer 1 event reg */ - volatile unsigned short timer_ter2; /* timer 2 event reg */ - volatile unsigned short timer_ter3; /* timer 3 event reg */ - volatile unsigned short timer_ter4; /* timer 4 event reg */ - volatile unsigned char RESERVED34[0x8]; /* Reserved area */ - /* CP */ - volatile unsigned short cp_cr; /* command register */ - volatile unsigned char RESERVED35[0x2]; /* Reserved area */ - volatile unsigned short cp_rccr; /* main configuration reg */ - volatile unsigned char RESERVED37; /* Reserved area */ - volatile unsigned char cp_rmds; /* development support status reg */ - volatile unsigned long cp_rmdr; /* development support control reg */ - volatile unsigned short cp_rctr1; /* ram break register 1 */ - volatile unsigned short cp_rctr2; /* ram break register 2 */ - volatile unsigned short cp_rctr3; /* ram break register 3 */ - volatile unsigned short cp_rctr4; /* ram break register 4 */ - volatile unsigned char RESERVED59[0x2]; /* Reserved area */ - volatile unsigned short cp_rter; /* RISC timers event reg */ - volatile unsigned char RESERVED38[0x2]; /* Reserved area */ - volatile unsigned short cp_rtmr; /* RISC timers mask reg */ - volatile unsigned char RESERVED39[0x14]; /* Reserved area */ - /* BRG */ - union { - volatile unsigned long l; - struct { - volatile unsigned short BRGC_RESERV:14; - volatile unsigned short rst:1; - volatile unsigned short en:1; - volatile unsigned short extc:2; - volatile unsigned short atb:1; - volatile unsigned short cd:12; - volatile unsigned short div16:1; - } b; - } brgc[4]; /* BRG1-BRG4 configuration regs*/ - /* SCC registers */ - struct scc_regs { - union { - struct { - /* Low word. */ - volatile unsigned short GSMR_RESERV2:1; - volatile unsigned short edge:2; - volatile unsigned short tci:1; - volatile unsigned short tsnc:2; - volatile unsigned short rinv:1; - volatile unsigned short tinv:1; - volatile unsigned short tpl:3; - volatile unsigned short tpp:2; - volatile unsigned short tend:1; - volatile unsigned short tdcr:2; - volatile unsigned short rdcr:2; - volatile unsigned short renc:3; - volatile unsigned short tenc:3; - volatile unsigned short diag:2; - volatile unsigned short enr:1; - volatile unsigned short ent:1; - volatile unsigned short mode:4; - /* High word. */ - volatile unsigned short GSMR_RESERV1:14; - volatile unsigned short pri:1; - volatile unsigned short gde:1; - volatile unsigned short tcrc:2; - volatile unsigned short revd:1; - volatile unsigned short trx:1; - volatile unsigned short ttx:1; - volatile unsigned short cdp:1; - volatile unsigned short ctsp:1; - volatile unsigned short cds:1; - volatile unsigned short ctss:1; - volatile unsigned short tfl:1; - volatile unsigned short rfw:1; - volatile unsigned short txsy:1; - volatile unsigned short synl:2; - volatile unsigned short rtsm:1; - volatile unsigned short rsyn:1; - } b; - struct { - volatile unsigned long low; - volatile unsigned long high; - } w; - } scc_gsmr; /* SCC general mode reg */ - volatile unsigned short scc_psmr; /* protocol specific mode reg */ - volatile unsigned char RESERVED42[0x2]; /* Reserved area */ - volatile unsigned short scc_todr; /* SCC transmit on demand */ - volatile unsigned short scc_dsr; /* SCC data sync reg */ - volatile unsigned short scc_scce; /* SCC event reg */ - volatile unsigned char RESERVED43[0x2];/* Reserved area */ - volatile unsigned short scc_sccm; /* SCC mask reg */ - volatile unsigned char RESERVED44[0x1];/* Reserved area */ - volatile unsigned char scc_sccs; /* SCC status reg */ - volatile unsigned char RESERVED45[0x8]; /* Reserved area */ - } scc_regs[4]; - /* SMC */ - struct smc_regs { - volatile unsigned char RESERVED46[0x2]; /* Reserved area */ - volatile unsigned short smc_smcmr; /* SMC mode reg */ - volatile unsigned char RESERVED60[0x2]; /* Reserved area */ - volatile unsigned char smc_smce; /* SMC event reg */ - volatile unsigned char RESERVED47[0x3]; /* Reserved area */ - volatile unsigned char smc_smcm; /* SMC mask reg */ - volatile unsigned char RESERVED48[0x5]; /* Reserved area */ - } smc_regs[2]; - /* SPI */ - volatile unsigned short spi_spmode; /* SPI mode reg */ - volatile unsigned char RESERVED51[0x4]; /* Reserved area */ - volatile unsigned char spi_spie; /* SPI event reg */ - volatile unsigned char RESERVED52[0x3]; /* Reserved area */ - volatile unsigned char spi_spim; /* SPI mask reg */ - volatile unsigned char RESERVED53[0x2]; /* Reserved area */ - volatile unsigned char spi_spcom; /* SPI command reg */ - volatile unsigned char RESERVED54[0x4]; /* Reserved area */ - /* PIP */ - volatile unsigned short pip_pipc; /* pip configuration reg */ - volatile unsigned char RESERVED65[0x2]; /* Reserved area */ - volatile unsigned short pip_ptpr; /* pip timing parameters reg */ - volatile unsigned long pip_pbdir; /* port b data direction reg */ - volatile unsigned long pip_pbpar; /* port b pin assignment reg */ - volatile unsigned long pip_pbodr; /* port b open drain reg */ - volatile unsigned long pip_pbdat; /* port b data reg */ - volatile unsigned char RESERVED71[0x18]; /* Reserved area */ - /* Serial Interface */ - volatile unsigned long si_simode; /* SI mode register */ - volatile unsigned char si_sigmr; /* SI global mode register */ - volatile unsigned char RESERVED55; /* Reserved area */ - volatile unsigned char si_sistr; /* SI status register */ - volatile unsigned char si_sicmr; /* SI command register */ - volatile unsigned char RESERVED56[0x4]; /* Reserved area */ - volatile unsigned long si_sicr; /* SI clock routing */ - volatile unsigned long si_sirp; /* SI ram pointers */ - volatile unsigned char RESERVED57[0xc]; /* Reserved area */ - volatile unsigned short si_siram[0x80]; /* SI routing ram */ -} QUICC; - -#endif - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 4 - * End: - */ diff --git a/arch/m68k/include/asm/m68360_regs.h b/arch/m68k/include/asm/m68360_regs.h deleted file mode 100644 index d57217c..0000000 --- a/arch/m68k/include/asm/m68360_regs.h +++ /dev/null @@ -1,408 +0,0 @@ -/*********************************** - * $Id: m68360_regs.h,v 1.2 2002/10/26 15:03:55 gerg Exp $ - *********************************** - * - *************************************** - * Definitions of the QUICC registers - *************************************** - */ - -#ifndef __REGISTERS_H -#define __REGISTERS_H - -#define CLEAR_BIT(x, bit) x =bit - -/***************************************************************** - Command Register -*****************************************************************/ - -/* bit fields within command register */ -#define SOFTWARE_RESET 0x8000 -#define CMD_OPCODE 0x0f00 -#define CMD_CHANNEL 0x00f0 -#define CMD_FLAG 0x0001 - -/* general command opcodes */ -#define INIT_RXTX_PARAMS 0x0000 -#define INIT_RX_PARAMS 0x0100 -#define INIT_TX_PARAMS 0x0200 -#define ENTER_HUNT_MODE 0x0300 -#define STOP_TX 0x0400 -#define GR_STOP_TX 0x0500 -#define RESTART_TX 0x0600 -#define CLOSE_RX_BD 0x0700 -#define SET_ENET_GROUP 0x0800 -#define RESET_ENET_GROUP 0x0900 - -/* quicc32 CP commands */ -#define STOP_TX_32 0x0e00 /*add chan# bits 2-6 */ -#define ENTER_HUNT_MODE_32 0x1e00 - -/* quicc32 mask/event SCC register */ -#define GOV 0x01 -#define GUN 0x02 -#define GINT 0x04 -#define IQOV 0x08 - - -/* Timer commands */ -#define SET_TIMER 0x0800 - -/* Multi channel Interrupt structure */ -#define INTR_VALID 0x8000 /* Valid interrupt entry */ -#define INTR_WRAP 0x4000 /* Wrap bit in the interrupt entry table */ -#define INTR_CH_NU 0x07c0 /* Channel Num in interrupt table */ -#define INTR_MASK_BITS 0x383f - -/* - * General SCC mode register (GSMR) - */ - -#define MODE_HDLC 0x0 -#define MODE_APPLE_TALK 0x2 -#define MODE_SS7 0x3 -#define MODE_UART 0x4 -#define MODE_PROFIBUS 0x5 -#define MODE_ASYNC_HDLC 0x6 -#define MODE_V14 0x7 -#define MODE_BISYNC 0x8 -#define MODE_DDCMP 0x9 -#define MODE_MULTI_CHANNEL 0xa -#define MODE_ETHERNET 0xc - -#define DIAG_NORMAL 0x0 -#define DIAG_LOCAL_LPB 0x1 -#define DIAG_AUTO_ECHO 0x2 -#define DIAG_LBP_ECHO 0x3 - -/* For RENC and TENC fields in GSMR */ -#define ENC_NRZ 0x0 -#define ENC_NRZI 0x1 -#define ENC_FM0 0x2 -#define ENC_MANCH 0x4 -#define ENC_DIFF_MANC 0x6 - -/* For TDCR and RDCR fields in GSMR */ -#define CLOCK_RATE_1 0x0 -#define CLOCK_RATE_8 0x1 -#define CLOCK_RATE_16 0x2 -#define CLOCK_RATE_32 0x3 - -#define TPP_00 0x0 -#define TPP_10 0x1 -#define TPP_01 0x2 -#define TPP_11 0x3 - -#define TPL_NO 0x0 -#define TPL_8 0x1 -#define TPL_16 0x2 -#define TPL_32 0x3 -#define TPL_48 0x4 -#define TPL_64 0x5 -#define TPL_128 0x6 - -#define TSNC_INFINITE 0x0 -#define TSNC_14_65 0x1 -#define TSNC_4_15 0x2 -#define TSNC_3_1 0x3 - -#define EDGE_BOTH 0x0 -#define EDGE_POS 0x1 -#define EDGE_NEG 0x2 -#define EDGE_NO 0x3 - -#define SYNL_NO 0x0 -#define SYNL_4 0x1 -#define SYNL_8 0x2 -#define SYNL_16 0x3 - -#define TCRC_CCITT16 0x0 -#define TCRC_CRC16 0x1 -#define TCRC_CCITT32 0x2 - - -/***************************************************************** - TODR (Transmit on demand) Register -*****************************************************************/ -#define TODR_TOD 0x8000 /* Transmit on demand */ - - -/***************************************************************** - CICR register settings -*****************************************************************/ - -/* note that relative irq priorities of the SCCs can be reordered - * if desired - see p. 7-377 of the MC68360UM */ -#define CICR_SCA_SCC1 ((uint)0x00000000) /* SCC1 @ SCCa */ -#define CICR_SCB_SCC2 ((uint)0x00040000) /* SCC2 @ SCCb */ -#define CICR_SCC_SCC3 ((uint)0x00200000) /* SCC3 @ SCCc */ -#define CICR_SCD_SCC4 ((uint)0x00c00000) /* SCC4 @ SCCd */ - -#define CICR_IRL_MASK ((uint)0x0000e000) /* Core interrupt */ -#define CICR_HP_MASK ((uint)0x00001f00) /* Hi-pri int. */ -#define CICR_VBA_MASK ((uint)0x000000e0) /* Vector Base Address */ -#define CICR_SPS ((uint)0x00000001) /* SCC Spread */ - - -/***************************************************************** - Interrupt bits for CIPR and CIMR (MC68360UM p. 7-379) -*****************************************************************/ - -#define INTR_PIO_PC0 0x80000000 /* parallel I/O C bit 0 */ -#define INTR_SCC1 0x40000000 /* SCC port 1 */ -#define INTR_SCC2 0x20000000 /* SCC port 2 */ -#define INTR_SCC3 0x10000000 /* SCC port 3 */ -#define INTR_SCC4 0x08000000 /* SCC port 4 */ -#define INTR_PIO_PC1 0x04000000 /* parallel i/o C bit 1 */ -#define INTR_TIMER1 0x02000000 /* timer 1 */ -#define INTR_PIO_PC2 0x01000000 /* parallel i/o C bit 2 */ -#define INTR_PIO_PC3 0x00800000 /* parallel i/o C bit 3 */ -#define INTR_SDMA_BERR 0x00400000 /* SDMA channel bus error */ -#define INTR_DMA1 0x00200000 /* idma 1 */ -#define INTR_DMA2 0x00100000 /* idma 2 */ -#define INTR_TIMER2 0x00040000 /* timer 2 */ -#define INTR_CP_TIMER 0x00020000 /* CP timer */ -#define INTR_PIP_STATUS 0x00010000 /* PIP status */ -#define INTR_PIO_PC4 0x00008000 /* parallel i/o C bit 4 */ -#define INTR_PIO_PC5 0x00004000 /* parallel i/o C bit 5 */ -#define INTR_TIMER3 0x00001000 /* timer 3 */ -#define INTR_PIO_PC6 0x00000800 /* parallel i/o C bit 6 */ -#define INTR_PIO_PC7 0x00000400 /* parallel i/o C bit 7 */ -#define INTR_PIO_PC8 0x00000200 /* parallel i/o C bit 8 */ -#define INTR_TIMER4 0x00000080 /* timer 4 */ -#define INTR_PIO_PC9 0x00000040 /* parallel i/o C bit 9 */ -#define INTR_SCP 0x00000020 /* SCP */ -#define INTR_SMC1 0x00000010 /* SMC 1 */ -#define INTR_SMC2 0x00000008 /* SMC 2 */ -#define INTR_PIO_PC10 0x00000004 /* parallel i/o C bit 10 */ -#define INTR_PIO_PC11 0x00000002 /* parallel i/o C bit 11 */ -#define INTR_ERR 0x00000001 /* error */ - - -/***************************************************************** - CPM Interrupt vector encodings (MC68360UM p. 7-376) -*****************************************************************/ - -#define CPMVEC_NR 32 -#define CPMVEC_PIO_PC0 0x1f -#define CPMVEC_SCC1 0x1e -#define CPMVEC_SCC2 0x1d -#define CPMVEC_SCC3 0x1c -#define CPMVEC_SCC4 0x1b -#define CPMVEC_PIO_PC1 0x1a -#define CPMVEC_TIMER1 0x19 -#define CPMVEC_PIO_PC2 0x18 -#define CPMVEC_PIO_PC3 0x17 -#define CPMVEC_SDMA_CB_ERR 0x16 -#define CPMVEC_IDMA1 0x15 -#define CPMVEC_IDMA2 0x14 -#define CPMVEC_RESERVED3 0x13 -#define CPMVEC_TIMER2 0x12 -#define CPMVEC_RISCTIMER 0x11 -#define CPMVEC_RESERVED2 0x10 -#define CPMVEC_PIO_PC4 0x0f -#define CPMVEC_PIO_PC5 0x0e -#define CPMVEC_TIMER3 0x0c -#define CPMVEC_PIO_PC6 0x0b -#define CPMVEC_PIO_PC7 0x0a -#define CPMVEC_PIO_PC8 0x09 -#define CPMVEC_RESERVED1 0x08 -#define CPMVEC_TIMER4 0x07 -#define CPMVEC_PIO_PC9 0x06 -#define CPMVEC_SPI 0x05 -#define CPMVEC_SMC1 0x04 -#define CPMVEC_SMC2 0x03 -#define CPMVEC_PIO_PC10 0x02 -#define CPMVEC_PIO_PC11 0x01 -#define CPMVEC_ERROR 0x00 - -/* #define CPMVEC_PIO_PC0 ((ushort)0x1f) */ -/* #define CPMVEC_SCC1 ((ushort)0x1e) */ -/* #define CPMVEC_SCC2 ((ushort)0x1d) */ -/* #define CPMVEC_SCC3 ((ushort)0x1c) */ -/* #define CPMVEC_SCC4 ((ushort)0x1b) */ -/* #define CPMVEC_PIO_PC1 ((ushort)0x1a) */ -/* #define CPMVEC_TIMER1 ((ushort)0x19) */ -/* #define CPMVEC_PIO_PC2 ((ushort)0x18) */ -/* #define CPMVEC_PIO_PC3 ((ushort)0x17) */ -/* #define CPMVEC_SDMA_CB_ERR ((ushort)0x16) */ -/* #define CPMVEC_IDMA1 ((ushort)0x15) */ -/* #define CPMVEC_IDMA2 ((ushort)0x14) */ -/* #define CPMVEC_RESERVED3 ((ushort)0x13) */ -/* #define CPMVEC_TIMER2 ((ushort)0x12) */ -/* #define CPMVEC_RISCTIMER ((ushort)0x11) */ -/* #define CPMVEC_RESERVED2 ((ushort)0x10) */ -/* #define CPMVEC_PIO_PC4 ((ushort)0x0f) */ -/* #define CPMVEC_PIO_PC5 ((ushort)0x0e) */ -/* #define CPMVEC_TIMER3 ((ushort)0x0c) */ -/* #define CPMVEC_PIO_PC6 ((ushort)0x0b) */ -/* #define CPMVEC_PIO_PC7 ((ushort)0x0a) */ -/* #define CPMVEC_PIO_PC8 ((ushort)0x09) */ -/* #define CPMVEC_RESERVED1 ((ushort)0x08) */ -/* #define CPMVEC_TIMER4 ((ushort)0x07) */ -/* #define CPMVEC_PIO_PC9 ((ushort)0x06) */ -/* #define CPMVEC_SPI ((ushort)0x05) */ -/* #define CPMVEC_SMC1 ((ushort)0x04) */ -/* #define CPMVEC_SMC2 ((ushort)0x03) */ -/* #define CPMVEC_PIO_PC10 ((ushort)0x02) */ -/* #define CPMVEC_PIO_PC11 ((ushort)0x01) */ -/* #define CPMVEC_ERROR ((ushort)0x00) */ - - -/***************************************************************** - * PIO control registers - *****************************************************************/ - -/* Port A - See 360UM p. 7-358 - * - * Note that most of these pins have alternate functions - */ - - -/* The macros are nice, but there are all sorts of references to 1-indexed - * facilities on the 68360... */ -/* #define PA_RXD(n) ((ushort)(0x01<<(2*n))) */ -/* #define PA_TXD(n) ((ushort)(0x02<<(2*n))) */ - -#define PA_RXD1 ((ushort)0x0001) -#define PA_TXD1 ((ushort)0x0002) -#define PA_RXD2 ((ushort)0x0004) -#define PA_TXD2 ((ushort)0x0008) -#define PA_RXD3 ((ushort)0x0010) -#define PA_TXD3 ((ushort)0x0020) -#define PA_RXD4 ((ushort)0x0040) -#define PA_TXD4 ((ushort)0x0080) - -#define PA_CLK1 ((ushort)0x0100) -#define PA_CLK2 ((ushort)0x0200) -#define PA_CLK3 ((ushort)0x0400) -#define PA_CLK4 ((ushort)0x0800) -#define PA_CLK5 ((ushort)0x1000) -#define PA_CLK6 ((ushort)0x2000) -#define PA_CLK7 ((ushort)0x4000) -#define PA_CLK8 ((ushort)0x8000) - - -/* Port B - See 360UM p. 7-362 - */ - - -/* Port C - See 360UM p. 7-365 - */ - -#define PC_RTS1 ((ushort)0x0001) -#define PC_RTS2 ((ushort)0x0002) -#define PC__RTS3 ((ushort)0x0004) /* !RTS3 */ -#define PC__RTS4 ((ushort)0x0008) /* !RTS4 */ - -#define PC_CTS1 ((ushort)0x0010) -#define PC_CD1 ((ushort)0x0020) -#define PC_CTS2 ((ushort)0x0040) -#define PC_CD2 ((ushort)0x0080) -#define PC_CTS3 ((ushort)0x0100) -#define PC_CD3 ((ushort)0x0200) -#define PC_CTS4 ((ushort)0x0400) -#define PC_CD4 ((ushort)0x0800) - - - -/***************************************************************** - chip select option register -*****************************************************************/ -#define DTACK 0xe000 -#define ADR_MASK 0x1ffc -#define RDWR_MASK 0x0002 -#define FC_MASK 0x0001 - -/***************************************************************** - tbase and rbase registers -*****************************************************************/ -#define TBD_ADDR(quicc,pram) ((struct quicc_bd *) \ - (quicc->ch_or_u.u.udata_bd_ucode + pram->tbase)) -#define RBD_ADDR(quicc,pram) ((struct quicc_bd *) \ - (quicc->ch_or_u.u.udata_bd_ucode + pram->rbase)) -#define TBD_CUR_ADDR(quicc,pram) ((struct quicc_bd *) \ - (quicc->ch_or_u.u.udata_bd_ucode + pram->tbptr)) -#define RBD_CUR_ADDR(quicc,pram) ((struct quicc_bd *) \ - (quicc->ch_or_u.u.udata_bd_ucode + pram->rbptr)) -#define TBD_SET_CUR_ADDR(bd,quicc,pram) pram->tbptr = \ - ((unsigned short)((char *)(bd) - (char *)(quicc->ch_or_u.u.udata_bd_ucode))) -#define RBD_SET_CUR_ADDR(bd,quicc,pram) pram->rbptr = \ - ((unsigned short)((char *)(bd) - (char *)(quicc->ch_or_u.u.udata_bd_ucode))) -#define INCREASE_TBD(bd,quicc,pram) { \ - if((bd)->status & T_W) \ - (bd) = TBD_ADDR(quicc,pram); \ - else \ - (bd)++; \ -} -#define DECREASE_TBD(bd,quicc,pram) { \ - if ((bd) == TBD_ADDR(quicc, pram)) \ - while (!((bd)->status & T_W)) \ - (bd)++; \ - else \ - (bd)--; \ -} -#define INCREASE_RBD(bd,quicc,pram) { \ - if((bd)->status & R_W) \ - (bd) = RBD_ADDR(quicc,pram); \ - else \ - (bd)++; \ -} -#define DECREASE_RBD(bd,quicc,pram) { \ - if ((bd) == RBD_ADDR(quicc, pram)) \ - while (!((bd)->status & T_W)) \ - (bd)++; \ - else \ - (bd)--; \ -} - -/***************************************************************** - Macros for Multi channel -*****************************************************************/ -#define QMC_BASE(quicc,page) (struct global_multi_pram *)(&quicc->pram[page]) -#define MCBASE(quicc,page) (unsigned long)(quicc->pram[page].m.mcbase) -#define CHANNEL_PRAM_BASE(quicc,channel) ((struct quicc32_pram *) \ - (&(quicc->ch_or_u.ch_pram_tbl[channel]))) -#define TBD_32_ADDR(quicc,page,channel) ((struct quicc_bd *) \ - (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->tbase))) -#define RBD_32_ADDR(quicc,page,channel) ((struct quicc_bd *) \ - (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->rbase))) -#define TBD_32_CUR_ADDR(quicc,page,channel) ((struct quicc_bd *) \ - (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->tbptr))) -#define RBD_32_CUR_ADDR(quicc,page,channel) ((struct quicc_bd *) \ - (MCBASE(quicc,page) + (CHANNEL_PRAM_BASE(quicc,channel)->rbptr))) -#define TBD_32_SET_CUR_ADDR(bd,quicc,page,channel) \ - CHANNEL_PRAM_BASE(quicc,channel)->tbptr = \ - ((unsigned short)((char *)(bd) - (char *)(MCBASE(quicc,page)))) -#define RBD_32_SET_CUR_ADDR(bd,quicc,page,channel) \ - CHANNEL_PRAM_BASE(quicc,channel)->rbptr = \ - ((unsigned short)((char *)(bd) - (char *)(MCBASE(quicc,page)))) - -#define INCREASE_TBD_32(bd,quicc,page,channel) { \ - if((bd)->status & T_W) \ - (bd) = TBD_32_ADDR(quicc,page,channel); \ - else \ - (bd)++; \ -} -#define DECREASE_TBD_32(bd,quicc,page,channel) { \ - if ((bd) == TBD_32_ADDR(quicc, page,channel)) \ - while (!((bd)->status & T_W)) \ - (bd)++; \ - else \ - (bd)--; \ -} -#define INCREASE_RBD_32(bd,quicc,page,channel) { \ - if((bd)->status & R_W) \ - (bd) = RBD_32_ADDR(quicc,page,channel); \ - else \ - (bd)++; \ -} -#define DECREASE_RBD_32(bd,quicc,page,channel) { \ - if ((bd) == RBD_32_ADDR(quicc, page,channel)) \ - while (!((bd)->status & T_W)) \ - (bd)++; \ - else \ - (bd)--; \ -} - -#endif diff --git a/arch/m68k/include/asm/mac_iop.h b/arch/m68k/include/asm/mac_iop.h index fde874a..42566fd 100644 --- a/arch/m68k/include/asm/mac_iop.h +++ b/arch/m68k/include/asm/mac_iop.h @@ -48,7 +48,7 @@ /* IOP message status codes */ -#define IOP_MSGSTATUS_UNUSED 0 /* Unusued message structure */ +#define IOP_MSGSTATUS_UNUSED 0 /* Unused message structure */ #define IOP_MSGSTATUS_WAITING 1 /* waiting for channel */ #define IOP_MSGSTATUS_SENT 2 /* message sent, awaiting reply */ #define IOP_MSGSTATUS_COMPLETE 3 /* message complete and reply rcvd */ diff --git a/arch/m68k/include/asm/mcftimer.h b/arch/m68k/include/asm/mcftimer.h index 089f0f1..1150e42 100644 --- a/arch/m68k/include/asm/mcftimer.h +++ b/arch/m68k/include/asm/mcftimer.h @@ -51,7 +51,7 @@ * Bit definitions for the Timer Event Registers (TER). */ #define MCFTIMER_TER_CAP 0x01 /* Capture event */ -#define MCFTIMER_TER_REF 0x02 /* Refernece event */ +#define MCFTIMER_TER_REF 0x02 /* Reference event */ /****************************************************************************/ #endif /* mcftimer_h */ diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h index 848c3df..3a3dbcf 100644 --- a/arch/m68k/include/asm/pci.h +++ b/arch/m68k/include/asm/pci.h @@ -1,7 +1,6 @@ #ifndef _ASM_M68K_PCI_H #define _ASM_M68K_PCI_H -#include <asm-generic/pci-dma-compat.h> #include <asm-generic/pci.h> /* The PCI address space does equal the physical memory diff --git a/arch/m68k/include/asm/serial.h b/arch/m68k/include/asm/serial.h index 06d0cb1..6d44970 100644 --- a/arch/m68k/include/asm/serial.h +++ b/arch/m68k/include/asm/serial.h @@ -18,11 +18,11 @@ /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_8250_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ) #else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) -#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST) +#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF #endif #ifdef CONFIG_ISA diff --git a/arch/m68k/kernel/early_printk.c b/arch/m68k/kernel/early_printk.c index ff9708d..7d3fe08a 100644 --- a/arch/m68k/kernel/early_printk.c +++ b/arch/m68k/kernel/early_printk.c @@ -20,8 +20,8 @@ asmlinkage void __init debug_cons_nputs(const char *s, unsigned n); static void __ref debug_cons_write(struct console *c, const char *s, unsigned n) { -#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68360) || \ - defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE)) +#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \ + defined(CONFIG_COLDFIRE)) if (MACH_IS_MVME16x) mvme16x_cons_write(c, s, n); else @@ -52,8 +52,8 @@ early_param("earlyprintk", setup_early_printk); * debug_cons_nputs() defined in arch/m68k/kernel/head.S cannot be called * after init sections are discarded (for platforms that use it). */ -#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68360) || \ - defined(CONFIG_M68000) || defined(CONFIG_COLDFIRE)) +#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \ + defined(CONFIG_COLDFIRE)) static int __init unregister_early_console(void) { diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index b54ac7a..97cd3ea 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -71,13 +71,19 @@ ENTRY(__sys_vfork) ENTRY(sys_sigreturn) SAVE_SWITCH_STACK + movel %sp,%sp@- | switch_stack pointer + pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer jbsr do_sigreturn + addql #8,%sp RESTORE_SWITCH_STACK rts ENTRY(sys_rt_sigreturn) SAVE_SWITCH_STACK + movel %sp,%sp@- | switch_stack pointer + pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer jbsr do_rt_sigreturn + addql #8,%sp RESTORE_SWITCH_STACK rts diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 76b9113..9309789 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -68,9 +68,6 @@ void (*mach_power_off)(void); #define CPU_NAME "MC68000" #endif #endif /* CONFIG_M68000 */ -#ifdef CONFIG_M68360 -#define CPU_NAME "MC68360" -#endif #ifndef CPU_NAME #define CPU_NAME "UNKNOWN" #endif @@ -209,10 +206,6 @@ void __init setup_arch(char **cmdline_p) #if defined( CONFIG_PILOT ) && defined( CONFIG_M68EZ328 ) printk(KERN_INFO "PalmV support by Lineo Inc. <jeff@uclinux.com>\n"); #endif -#if defined (CONFIG_M68360) - printk(KERN_INFO "QUICC port done by SED Systems <hamilton@sedsystems.ca>,\n"); - printk(KERN_INFO "based on 2.0.38 port by Lineo Inc. <mleslie@lineo.com>.\n"); -#endif #ifdef CONFIG_DRAGEN2 printk(KERN_INFO "DragonEngine II board support by Georges Menie\n"); #endif diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index af1c4f3..2dcee3a 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -737,10 +737,8 @@ badframe: return 1; } -asmlinkage int do_sigreturn(unsigned long __unused) +asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) { - struct switch_stack *sw = (struct switch_stack *) &__unused; - struct pt_regs *regs = (struct pt_regs *) (sw + 1); unsigned long usp = rdusp(); struct sigframe __user *frame = (struct sigframe __user *)(usp - 4); sigset_t set; @@ -764,10 +762,8 @@ badframe: return 0; } -asmlinkage int do_rt_sigreturn(unsigned long __unused) +asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) { - struct switch_stack *sw = (struct switch_stack *) &__unused; - struct pt_regs *regs = (struct pt_regs *) (sw + 1); unsigned long usp = rdusp(); struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4); sigset_t set; diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index ce56e04..920ff63 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -68,7 +68,7 @@ static int gIER,gIFR,gBufA,gBufB; * interrupt. This limitation also seems to apply to VIA clone logic cores in * Quadra-like ASICs. (RBV and OSS machines don't have this limitation.) * - * We used to fake it by configuring the relevent VIA pin as an output + * We used to fake it by configuring the relevant VIA pin as an output * (to mask the interrupt) or input (to unmask). That scheme did not work on * (at least) the Quadra 700. A NuBus card's /NMRQ signal is an open-collector * circuit (see Designing Cards and Drivers for Macintosh II and Macintosh SE, diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/metag/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index c3c6f08..bad1323 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 53f0f6c..b38700ae 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pgd = pgd_offset(mm, addr); pud = pud_offset(pgd, addr); pmd = pmd_offset(pud, addr); - pte = pte_alloc_map(mm, NULL, pmd, addr); + pte = pte_alloc_map(mm, pmd, addr); pgd->pgd &= ~_PAGE_SZ_MASK; pgd->pgd |= _PAGE_SZHUGE; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 53b69de..3d793b5 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -267,6 +267,9 @@ config PCI config PCI_DOMAINS def_bool PCI +config PCI_DOMAINS_GENERIC + def_bool PCI_DOMAINS + config PCI_SYSCALL def_bool PCI diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index e2f6543..dc5dd5b 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -87,5 +87,4 @@ CONFIG_KGDB_KDB=y CONFIG_EARLY_PRINTK=y CONFIG_KEYS=y CONFIG_ENCRYPTED_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index a29ebd4..4cdaf56 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -92,7 +92,6 @@ CONFIG_DEBUG_INFO=y CONFIG_EARLY_PRINTK=y CONFIG_KEYS=y CONFIG_ENCRYPTED_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y diff --git a/arch/microblaze/include/asm/gpio.h b/arch/microblaze/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/microblaze/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index dc9eb66..fc3ecb5 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -22,8 +22,6 @@ #include <asm/prom.h> #include <asm/pci-bridge.h> -#include <asm-generic/pci-dma-compat.h> - #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 89a2a939..f31ebb5 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, memset(__bss_start, 0, __bss_stop-__bss_start); memset(_ssbss, 0, _esbss-_ssbss); - lockdep_init(); - /* initialize device tree for usage in early_printk */ early_init_devtree(_fdt_start); diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index ae838ed..35654be 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -123,17 +123,6 @@ unsigned long pci_address_to_pio(phys_addr_t address) } EXPORT_SYMBOL_GPL(pci_address_to_pio); -/* - * Return the domain number for this bus. - */ -int pci_domain_nr(struct pci_bus *bus) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - - return hose->global_number; -} -EXPORT_SYMBOL(pci_domain_nr); - /* This routine is meant to be used early during boot, when the * PCI bus numbers have not yet been assigned, and you need to * issue PCI config cycles to an OF device. @@ -863,26 +852,10 @@ void pcibios_setup_bus_devices(struct pci_bus *bus) void pcibios_fixup_bus(struct pci_bus *bus) { - /* When called from the generic PCI probe, read PCI<->PCI bridge - * bases. This is -not- called when generating the PCI tree from - * the OF device-tree. - */ - if (bus->self != NULL) - pci_read_bridge_bases(bus); - - /* Now fixup the bus bus */ - pcibios_setup_bus_self(bus); - - /* Now fixup devices on that bus */ - pcibios_setup_bus_devices(bus); + /* nothing to do */ } EXPORT_SYMBOL(pcibios_fixup_bus); -static int skip_isa_ioresource_align(struct pci_dev *dev) -{ - return 0; -} - /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the @@ -899,20 +872,18 @@ static int skip_isa_ioresource_align(struct pci_dev *dev) resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { - struct pci_dev *dev = data; - resource_size_t start = res->start; - - if (res->flags & IORESOURCE_IO) { - if (skip_isa_ioresource_align(dev)) - return start; - if (start & 0x300) - start = (start + 0x3ff) & ~0x3ff; - } - - return start; + return res->start; } EXPORT_SYMBOL(pcibios_align_resource); +int pcibios_add_device(struct pci_dev *dev) +{ + dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); + + return 0; +} +EXPORT_SYMBOL(pcibios_add_device); + /* * Reparent resource children of pr that conflict with res * under res, and make res replace those children. @@ -1333,13 +1304,6 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, (unsigned long)hose->io_base_virt - _IO_BASE); } -struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) -{ - struct pci_controller *hose = bus->sysdata; - - return of_node_get(hose->dn); -} - static void pcibios_scan_phb(struct pci_controller *hose) { LIST_HEAD(resources); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 74a3db9..7c4a4ce 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -151,6 +151,7 @@ config BMIPS_GENERIC select CSRC_R4K select SYNC_R4K select COMMON_CLK + select BCM6345_L1_IRQ select BCM7038_L1_IRQ select BCM7120_L2_IRQ select BRCMSTB_L2_IRQ @@ -2169,7 +2170,6 @@ config MIPS_MT_SMP select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select SYNC_R4K - select MIPS_GIC_IPI select MIPS_MT select SMP select SMP_UP @@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT config MIPS_CMP bool "MIPS CMP framework support (DEPRECATED)" depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6 - select MIPS_GIC_IPI select SMP select SYNC_R4K select SYS_SUPPORTS_SMP @@ -2287,7 +2286,6 @@ config MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU - select MIPS_GIC_IPI select SMP select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU @@ -2305,9 +2303,6 @@ config MIPS_CPS_PM select MIPS_CPC bool -config MIPS_GIC_IPI - bool - config MIPS_CM bool @@ -2876,8 +2871,6 @@ config PCI_DOMAINS source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" - # # ISA support is now enabled via select. Too many systems still have the one # or other ISA chip on the board that users don't know about so don't expect @@ -2937,8 +2930,6 @@ config ZONE_DMA32 source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - config RAPIDIO tristate "RapidIO support" depends on PCI diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index 84548f7..e6b90e7 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -160,14 +160,14 @@ static int __init alchemy_gpiochip_init(void) switch (alchemy_get_cputype()) { case ALCHEMY_CPU_AU1000: - ret = gpiochip_add(&alchemy_gpio_chip[0]); + ret = gpiochip_add_data(&alchemy_gpio_chip[0], NULL); break; case ALCHEMY_CPU_AU1500...ALCHEMY_CPU_AU1200: - ret = gpiochip_add(&alchemy_gpio_chip[0]); - ret |= gpiochip_add(&alchemy_gpio_chip[1]); + ret = gpiochip_add_data(&alchemy_gpio_chip[0], NULL); + ret |= gpiochip_add_data(&alchemy_gpio_chip[1], NULL); break; case ALCHEMY_CPU_AU1300: - ret = gpiochip_add(&au1300_gpiochip); + ret = gpiochip_add_data(&au1300_gpiochip, NULL); break; } return ret; diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index f969f58..ed5b3d2 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -33,8 +33,7 @@ struct ar7_gpio_chip { static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT; return !!(readl(gpio_in) & (1 << gpio)); @@ -42,8 +41,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio) static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0; void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1; @@ -53,8 +51,7 @@ static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio) static void ar7_gpio_set_value(struct gpio_chip *chip, unsigned gpio, int value) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT; unsigned tmp; @@ -67,8 +64,7 @@ static void ar7_gpio_set_value(struct gpio_chip *chip, static void titan_gpio_set_value(struct gpio_chip *chip, unsigned gpio, int value) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0; void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1; unsigned tmp; @@ -81,8 +77,7 @@ static void titan_gpio_set_value(struct gpio_chip *chip, static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; writel(readl(gpio_dir) | (1 << gpio), gpio_dir); @@ -92,8 +87,7 @@ static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; @@ -108,8 +102,7 @@ static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) static int ar7_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, int value) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; ar7_gpio_set_value(chip, gpio, value); @@ -121,8 +114,7 @@ static int ar7_gpio_direction_output(struct gpio_chip *chip, static int titan_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, int value) { - struct ar7_gpio_chip *gpch = - container_of(chip, struct ar7_gpio_chip, chip); + struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; @@ -335,7 +327,7 @@ int __init ar7_gpio_init(void) return -ENOMEM; } - ret = gpiochip_add(&gpch->chip); + ret = gpiochip_add_data(&gpch->chip, gpch); if (ret) { printk(KERN_ERR "%s: failed to add gpiochip\n", gpch->chip.label); diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 511c065..2dfff1f 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -26,90 +26,6 @@ #include "common.h" #include "machtypes.h" -static void __init ath79_misc_intc_domain_init( - struct device_node *node, int irq); - -static void ath79_misc_irq_handler(struct irq_desc *desc) -{ - struct irq_domain *domain = irq_desc_get_handler_data(desc); - void __iomem *base = domain->host_data; - u32 pending; - - pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) & - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - if (!pending) { - spurious_interrupt(); - return; - } - - while (pending) { - int bit = __ffs(pending); - - generic_handle_irq(irq_linear_revmap(domain, bit)); - pending &= ~BIT(bit); - } -} - -static void ar71xx_misc_irq_unmask(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); -} - -static void ar71xx_misc_irq_mask(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); -} - -static void ar724x_misc_irq_ack(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); - __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); -} - -static struct irq_chip ath79_misc_irq_chip = { - .name = "MISC", - .irq_unmask = ar71xx_misc_irq_unmask, - .irq_mask = ar71xx_misc_irq_mask, -}; - -static void __init ath79_misc_irq_init(void) -{ - if (soc_is_ar71xx() || soc_is_ar913x()) - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - else if (soc_is_ar724x() || - soc_is_ar933x() || - soc_is_ar934x() || - soc_is_qca955x()) - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - else - BUG(); - - ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6)); -} static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) { @@ -212,142 +128,12 @@ static void qca955x_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch); } -/* - * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for - * these devices typically allocate coherent DMA memory, however the - * DMA controller may still have some unsynchronized data in the FIFO. - * Issue a flush in the handlers to ensure that the driver sees - * the update. - * - * This array map the interrupt lines to the DDR write buffer channels. - */ - -static unsigned irq_wb_chan[8] = { - -1, -1, -1, -1, -1, -1, -1, -1, -}; - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned long pending; - int irq; - - pending = read_c0_status() & read_c0_cause() & ST0_IM; - - if (!pending) { - spurious_interrupt(); - return; - } - - pending >>= CAUSEB_IP; - while (pending) { - irq = fls(pending) - 1; - if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1) - ath79_ddr_wb_flush(irq_wb_chan[irq]); - do_IRQ(MIPS_CPU_IRQ_BASE + irq); - pending &= ~BIT(irq); - } -} - -static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) -{ - irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq); - irq_set_chip_data(irq, d->host_data); - return 0; -} - -static const struct irq_domain_ops misc_irq_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = misc_map, -}; - -static void __init ath79_misc_intc_domain_init( - struct device_node *node, int irq) -{ - void __iomem *base = ath79_reset_base; - struct irq_domain *domain; - - domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT, - ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base); - if (!domain) - panic("Failed to add MISC irqdomain"); - - /* Disable and clear all interrupts */ - __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); - - irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain); -} - -static int __init ath79_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - int irq; - - irq = irq_of_parse_and_map(node, 0); - if (!irq) - panic("Failed to get MISC IRQ"); - - ath79_misc_intc_domain_init(node, irq); - return 0; -} - -static int __init ar7100_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - return ath79_misc_intc_of_init(node, parent); -} - -IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", - ar7100_misc_intc_of_init); - -static int __init ar7240_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - return ath79_misc_intc_of_init(node, parent); -} - -IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", - ar7240_misc_intc_of_init); - -static int __init ar79_cpu_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - int err, i, count; - - /* Fill the irq_wb_chan table */ - count = of_count_phandle_with_args( - node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells"); - - for (i = 0; i < count; i++) { - struct of_phandle_args args; - u32 irq = i; - - of_property_read_u32_index( - node, "qca,ddr-wb-channel-interrupts", i, &irq); - if (irq >= ARRAY_SIZE(irq_wb_chan)) - continue; - - err = of_parse_phandle_with_args( - node, "qca,ddr-wb-channels", - "#qca,ddr-wb-channel-cells", - i, &args); - if (err) - return err; - - irq_wb_chan[irq] = args.args[0]; - pr_info("IRQ: Set flush channel of IRQ%d to %d\n", - irq, args.args[0]); - } - - return mips_cpu_irq_of_init(node, parent); -} -IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", - ar79_cpu_intc_of_init); - void __init arch_init_irq(void) { + unsigned irq_wb_chan2 = -1; + unsigned irq_wb_chan3 = -1; + bool misc_is_ar71xx; + if (mips_machtype == ATH79_MACH_GENERIC_OF) { irqchip_init(); return; @@ -355,14 +141,26 @@ void __init arch_init_irq(void) if (soc_is_ar71xx() || soc_is_ar724x() || soc_is_ar913x() || soc_is_ar933x()) { - irq_wb_chan[2] = 3; - irq_wb_chan[3] = 2; + irq_wb_chan2 = 3; + irq_wb_chan3 = 2; } else if (soc_is_ar934x()) { - irq_wb_chan[3] = 2; + irq_wb_chan3 = 2; } - mips_cpu_irq_init(); - ath79_misc_irq_init(); + ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3); + + if (soc_is_ar71xx() || soc_is_ar913x()) + misc_is_ar71xx = true; + else if (soc_is_ar724x() || + soc_is_ar933x() || + soc_is_ar934x() || + soc_is_qca955x()) + misc_is_ar71xx = false; + else + BUG(); + ath79_misc_irq_init( + ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS, + ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx); if (soc_is_ar934x()) ar934x_ip2_irq_init(); diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c index 468bc7b..7c256da 100644 --- a/arch/mips/bcm63xx/gpio.c +++ b/arch/mips/bcm63xx/gpio.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <bcm63xx_cpu.h> #include <bcm63xx_gpio.h> @@ -147,5 +147,5 @@ int __init bcm63xx_gpio_init(void) bcm63xx_gpio_chip.ngpio = bcm63xx_gpio_count(); pr_info("registering %d GPIOs\n", bcm63xx_gpio_chip.ngpio); - return gpiochip_add(&bcm63xx_gpio_chip); + return gpiochip_add_data(&bcm63xx_gpio_chip, NULL); } diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c index e7fc6f934..7efefcf 100644 --- a/arch/mips/bmips/irq.c +++ b/arch/mips/bmips/irq.c @@ -15,6 +15,12 @@ #include <asm/irq_cpu.h> #include <asm/time.h> +static const struct of_device_id smp_intc_dt_match[] = { + { .compatible = "brcm,bcm7038-l1-intc" }, + { .compatible = "brcm,bcm6345-l1-intc" }, + {} +}; + unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; @@ -24,8 +30,8 @@ void __init arch_init_irq(void) { struct device_node *dn; - /* Only the STB (bcm7038) controller supports SMP IRQ affinity */ - dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc"); + /* Only these controllers support SMP IRQ affinity */ + dn = of_find_matching_node(NULL, smp_intc_dt_match); if (dn) of_node_put(dn); else diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 408799a..f752114 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -17,7 +17,7 @@ #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) #endif -#ifdef CONFIG_MACH_JZ4740 +#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780) #include <asm/mach-jz4740/base.h> #define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset)) #endif diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi index d61b161..9d19236 100644 --- a/arch/mips/boot/dts/brcm/bcm6328.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi @@ -74,7 +74,7 @@ timer: timer@10000040 { compatible = "syscon"; reg = <0x10000040 0x2c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi index 9c8d3fe2..1f6b9b5 100644 --- a/arch/mips/boot/dts/brcm/bcm6368.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi @@ -54,7 +54,7 @@ periph_cntl: syscon@10000000 { compatible = "syscon"; reg = <0x10000000 0x14>; - little-endian; + native-endian; }; reboot: syscon-reboot@10000008 { diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi index 1a7efa8..3ae1605 100644 --- a/arch/mips/boot/dts/brcm/bcm7125.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi @@ -98,7 +98,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7125-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index d4bf52c..be79919 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -118,7 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7346-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 8e25016..060805b 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -112,7 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7358-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index 7e5f760..bcdb09b 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -112,7 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7360-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index c739ea7..d3b1b76 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -118,7 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7362-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi index 5f55d0a..3302a1b 100644 --- a/arch/mips/boot/dts/brcm/bcm7420.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi @@ -99,7 +99,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7420-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index e24d41a..15b27aa 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -100,7 +100,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index 8b9432c..adb33e3 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -114,7 +114,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; - little-endian; + native-endian; }; reboot { diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index b3e7a1b..e070dac 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -247,7 +247,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 57ed466..6ba9ce9 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -358,7 +358,6 @@ CONFIG_DLM=m CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_CRYPTD=m diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 48e16d9..77e9f50 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -346,7 +346,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DLM=m # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITYFS=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_NULL=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index fe48220..f9af98f 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -181,7 +181,6 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 4f37a59..a5e85e1 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -362,7 +362,6 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m CONFIG_DLM=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 004cf52..d1f198b 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -412,7 +412,6 @@ CONFIG_DEBUG_FS=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_CRYPTD=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index db029f4..82db4e3 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -453,7 +453,6 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m CONFIG_DLM=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index 51bab13..7fca09fe 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -87,7 +87,6 @@ CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_DLM=m CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h index 2b34872..441faa9 100644 --- a/arch/mips/include/asm/mach-ath79/ath79.h +++ b/arch/mips/include/asm/mach-ath79/ath79.h @@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg) void ath79_device_reset_set(u32 mask); void ath79_device_reset_clear(u32 mask); +void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3); +void ath79_misc_irq_init(void __iomem *regs, int irq, + int irq_base, bool is_ar71xx); + #endif /* __ASM_MACH_ATH79_H */ diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h index 774bb45..19e139c 100644 --- a/arch/mips/include/asm/octeon/cvmx.h +++ b/arch/mips/include/asm/octeon/cvmx.h @@ -275,6 +275,11 @@ static inline void cvmx_write_csr(uint64_t csr_addr, uint64_t val) cvmx_read64(CVMX_MIO_BOOT_BIST_STAT); } +static inline void cvmx_writeq_csr(void __iomem *csr_addr, uint64_t val) +{ + cvmx_write_csr((__force uint64_t)csr_addr, val); +} + static inline void cvmx_write_io(uint64_t io_addr, uint64_t val) { cvmx_write64(io_addr, val); @@ -287,6 +292,10 @@ static inline uint64_t cvmx_read_csr(uint64_t csr_addr) return val; } +static inline uint64_t cvmx_readq_csr(void __iomem *csr_addr) +{ + return cvmx_read_csr((__force uint64_t) csr_addr); +} static inline void cvmx_send_single(uint64_t data) { diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 98c31e5..8c16fb7 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -102,7 +102,6 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, #include <linux/scatterlist.h> #include <linux/string.h> #include <asm/io.h> -#include <asm-generic/pci-bridge.h> struct pci_dev; @@ -125,9 +124,6 @@ static inline int pci_proc_domain(struct pci_bus *bus) #endif /* __KERNEL__ */ -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - /* Do platform specific device initialization at pci_enable_device() time */ extern int pcibios_plat_dev_init(struct pci_dev *dev); diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 6ba1fb8..db7c322 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -44,8 +44,9 @@ static inline void plat_smp_setup(void) mp_ops->smp_setup(); } -extern void gic_send_ipi_single(int cpu, unsigned int action); -extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action); +extern void mips_smp_send_ipi_single(int cpu, unsigned int action); +extern void mips_smp_send_ipi_mask(const struct cpumask *mask, + unsigned int action); #else /* !CONFIG_SMP */ diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index d9907e5..b765773 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/io.h> +#include <linux/gpio/driver.h> +/* FIXME: needed for gpio_request(), try to remove consumer API from driver */ #include <linux/gpio.h> #include <linux/delay.h> #include <linux/interrupt.h> @@ -91,9 +93,9 @@ static inline struct jz_gpio_chip *gpio_to_jz_gpio_chip(unsigned int gpio) return &jz4740_gpio_chips[gpio >> 5]; } -static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gpio_chip) +static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gc) { - return container_of(gpio_chip, struct jz_gpio_chip, gpio_chip); + return gpiochip_get_data(gc); } static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data) @@ -234,7 +236,7 @@ static int jz_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) static int jz_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) { - struct jz_gpio_chip *jz_gpio = gpio_chip_to_jz_gpio_chip(chip); + struct jz_gpio_chip *jz_gpio = gpiochip_get_data(chip); return jz_gpio->irq_base + gpio; } @@ -449,7 +451,7 @@ static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) irq_setup_generic_chip(gc, IRQ_MSK(chip->gpio_chip.ngpio), IRQ_GC_INIT_NESTED_LOCK, 0, IRQ_NOPROBE | IRQ_LEVEL); - gpiochip_add(&chip->gpio_chip); + gpiochip_add_data(&chip->gpio_chip, chip); } static int __init jz4740_gpio_init(void) diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 68e2b7d..b0988fd 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o obj-$(CONFIG_MIPS_CMP) += smp-cmp.o obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o -obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o obj-$(CONFIG_MIPS_SPRAM) += spram.o obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c index 705be43c..cbd47f3 100644 --- a/arch/mips/kernel/gpio_txx9.c +++ b/arch/mips/kernel/gpio_txx9.c @@ -10,7 +10,7 @@ #include <linux/init.h> #include <linux/spinlock.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <linux/errno.h> #include <linux/io.h> #include <asm/txx9pio.h> @@ -85,5 +85,5 @@ int __init txx9_gpio_init(unsigned long baseaddr, return -ENODEV; txx9_gpio_chip.base = base; txx9_gpio_chip.ngpio = num; - return gpiochip_add(&txx9_gpio_chip); + return gpiochip_add_data(&txx9_gpio_chip, NULL); } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 5fdaf8b..4f60734 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -732,21 +732,23 @@ static void __init resource_init(void) end = HIGHMEM_START - 1; res = alloc_bootmem(sizeof(struct resource)); + + res->start = start; + res->end = end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + switch (boot_mem_map.map[i].type) { case BOOT_MEM_RAM: case BOOT_MEM_INIT_RAM: case BOOT_MEM_ROM_DATA: res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; break; case BOOT_MEM_RESERVED: default: res->name = "reserved"; } - res->start = start; - res->end = end; - - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); /* diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index d5e0f94..7692334 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus) } struct plat_smp_ops cmp_smp_ops = { - .send_ipi_single = gic_send_ipi_single, - .send_ipi_mask = gic_send_ipi_mask, + .send_ipi_single = mips_smp_send_ipi_single, + .send_ipi_mask = mips_smp_send_ipi_mask, .init_secondary = cmp_init_secondary, .smp_finish = cmp_smp_finish, .boot_secondary = cmp_boot_secondary, diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 2ad4e4c..253e140 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = { .boot_secondary = cps_boot_secondary, .init_secondary = cps_init_secondary, .smp_finish = cps_smp_finish, - .send_ipi_single = gic_send_ipi_single, - .send_ipi_mask = gic_send_ipi_mask, + .send_ipi_single = mips_smp_send_ipi_single, + .send_ipi_mask = mips_smp_send_ipi_mask, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 86311a1..4f9570a 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action) #ifdef CONFIG_MIPS_GIC if (gic_present) { - gic_send_ipi_single(cpu, action); + mips_smp_send_ipi_single(cpu, action); return; } #endif diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index bd4385a..37708d9 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -33,12 +33,16 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/ftrace.h> +#include <linux/irqdomain.h> +#include <linux/of.h> +#include <linux/of_irq.h> #include <linux/atomic.h> #include <asm/cpu.h> #include <asm/processor.h> #include <asm/idle.h> #include <asm/r4k-timer.h> +#include <asm/mips-cpc.h> #include <asm/mmu_context.h> #include <asm/time.h> #include <asm/setup.h> @@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map; cpumask_t cpu_coherent_mask; +#ifdef CONFIG_GENERIC_IRQ_IPI +static struct irq_desc *call_desc; +static struct irq_desc *sched_desc; +#endif + static inline void set_cpu_sibling_map(int cpu) { int i; @@ -121,6 +130,7 @@ static inline void calculate_cpu_foreign_map(void) cpumask_t temp_foreign_map; /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); for_each_online_cpu(i) { core_present = 0; for_each_cpu(k, &temp_foreign_map) @@ -145,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops) mp_ops = ops; } +#ifdef CONFIG_GENERIC_IRQ_IPI +void mips_smp_send_ipi_single(int cpu, unsigned int action) +{ + mips_smp_send_ipi_mask(cpumask_of(cpu), action); +} + +void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned long flags; + unsigned int core; + int cpu; + + local_irq_save(flags); + + switch (action) { + case SMP_CALL_FUNCTION: + __ipi_send_mask(call_desc, mask); + break; + + case SMP_RESCHEDULE_YOURSELF: + __ipi_send_mask(sched_desc, mask); + break; + + default: + BUG(); + } + + if (mips_cpc_present()) { + for_each_cpu(cpu, mask) { + core = cpu_data[cpu].core; + + if (core == current_cpu_data.core) + continue; + + while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) { + mips_cpc_lock_other(core); + write_cpc_co_cmd(CPC_Cx_CMD_PWRUP); + mips_cpc_unlock_other(); + } + } + } + + local_irq_restore(flags); +} + + +static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) +{ + scheduler_ipi(); + + return IRQ_HANDLED; +} + +static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) +{ + generic_smp_call_function_interrupt(); + + return IRQ_HANDLED; +} + +static struct irqaction irq_resched = { + .handler = ipi_resched_interrupt, + .flags = IRQF_PERCPU, + .name = "IPI resched" +}; + +static struct irqaction irq_call = { + .handler = ipi_call_interrupt, + .flags = IRQF_PERCPU, + .name = "IPI call" +}; + +static __init void smp_ipi_init_one(unsigned int virq, + struct irqaction *action) +{ + int ret; + + irq_set_handler(virq, handle_percpu_irq); + ret = setup_irq(virq, action); + BUG_ON(ret); +} + +static int __init mips_smp_ipi_init(void) +{ + unsigned int call_virq, sched_virq; + struct irq_domain *ipidomain; + struct device_node *node; + + node = of_irq_find_parent(of_root); + ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI); + + /* + * Some platforms have half DT setup. So if we found irq node but + * didn't find an ipidomain, try to search for one that is not in the + * DT. + */ + if (node && !ipidomain) + ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI); + + BUG_ON(!ipidomain); + + call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); + BUG_ON(!call_virq); + + sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); + BUG_ON(!sched_virq); + + if (irq_domain_is_ipi_per_cpu(ipidomain)) { + int cpu; + + for_each_cpu(cpu, cpu_possible_mask) { + smp_ipi_init_one(call_virq + cpu, &irq_call); + smp_ipi_init_one(sched_virq + cpu, &irq_resched); + } + } else { + smp_ipi_init_one(call_virq, &irq_call); + smp_ipi_init_one(sched_virq, &irq_resched); + } + + call_desc = irq_to_desc(call_virq); + sched_desc = irq_to_desc(sched_virq); + + return 0; +} +early_initcall(mips_smp_ipi_init); +#endif + /* * First C code run on the secondary CPUs after being started up by * the master. @@ -191,7 +328,7 @@ asmlinkage void start_secondary(void) WARN_ON_ONCE(!irqs_disabled()); mp_ops->smp_finish(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void stop_this_cpu(void *dummy) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 3110447..70ef1a4 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; - if (waitqueue_active(&dvcpu->wq)) - wake_up_interruptible(&dvcpu->wq); + if (swait_active(&dvcpu->wq)) + swake_up(&dvcpu->wq); return 0; } @@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); + if (swait_active(&vcpu->wq)) + swake_up(&vcpu->wq); } /* low level hrtimer wake routine */ diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 1afd87c..6cdffc7 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -64,7 +64,7 @@ static inline void get_head_page_multiple(struct page *page, int nr) { VM_BUG_ON(page != compound_head(page)); VM_BUG_ON(page_count(page) == 0); - atomic_add(nr, &page->_count); + page_ref_add(page, nr); SetPageReferenced(page); } diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c index d708ae4..2b6d5e1 100644 --- a/arch/mips/pci/fixup-loongson3.c +++ b/arch/mips/pci/fixup-loongson3.c @@ -40,20 +40,25 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) static void pci_fixup_radeon(struct pci_dev *pdev) { - if (pdev->resource[PCI_ROM_RESOURCE].start) + struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; + + if (res->start) return; if (!loongson_sysconf.vgabios_addr) return; - pdev->resource[PCI_ROM_RESOURCE].start = - loongson_sysconf.vgabios_addr; - pdev->resource[PCI_ROM_RESOURCE].end = - loongson_sysconf.vgabios_addr + 256*1024 - 1; - pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_COPY; + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr); + res->end = res->start + 256*1024 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", - PCI_ROM_RESOURCE, &pdev->resource[PCI_ROM_RESOURCE]); + PCI_ROM_RESOURCE, res); } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c index d304be2..8e6e8db 100644 --- a/arch/mips/pmcs-msp71xx/msp_serial.c +++ b/arch/mips/pmcs-msp71xx/msp_serial.c @@ -110,7 +110,7 @@ void __init msp_serial_setup(void) up.uartclk = uartclk; up.regshift = 2; up.iotype = UPIO_MEM; - up.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST; + up.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; up.type = PORT_16550A; up.line = 0; up.serial_out = msp_serial_out; diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index fd11085..fdc704a 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -32,7 +32,7 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <asm/mach-rc32434/rb.h> #include <asm/mach-rc32434/gpio.h> @@ -88,7 +88,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset) { struct rb532_gpio_chip *gpch; - gpch = container_of(chip, struct rb532_gpio_chip, chip); + gpch = gpiochip_get_data(chip); return !!rb532_get_bit(offset, gpch->regbase + GPIOD); } @@ -100,7 +100,7 @@ static void rb532_gpio_set(struct gpio_chip *chip, { struct rb532_gpio_chip *gpch; - gpch = container_of(chip, struct rb532_gpio_chip, chip); + gpch = gpiochip_get_data(chip); rb532_set_bit(value, offset, gpch->regbase + GPIOD); } @@ -111,7 +111,7 @@ static int rb532_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { struct rb532_gpio_chip *gpch; - gpch = container_of(chip, struct rb532_gpio_chip, chip); + gpch = gpiochip_get_data(chip); /* disable alternate function in case it's set */ rb532_set_bit(0, offset, gpch->regbase + GPIOFUNC); @@ -128,7 +128,7 @@ static int rb532_gpio_direction_output(struct gpio_chip *chip, { struct rb532_gpio_chip *gpch; - gpch = container_of(chip, struct rb532_gpio_chip, chip); + gpch = gpiochip_get_data(chip); /* disable alternate function in case it's set */ rb532_set_bit(0, offset, gpch->regbase + GPIOFUNC); @@ -200,7 +200,7 @@ int __init rb532_gpio_init(void) } /* Register our GPIO chip */ - gpiochip_add(&rb532_gpio_chip->chip); + gpiochip_add_data(&rb532_gpio_chip->chip, rb532_gpio_chip); return 0; } diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 2fd350f..108f8a8 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -17,7 +17,7 @@ #include <linux/module.h> #include <linux/clk.h> #include <linux/err.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <linux/platform_device.h> #include <linux/serial_core.h> #include <linux/mtd/physmap.h> @@ -687,16 +687,14 @@ struct txx9_iocled_data { static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset) { - struct txx9_iocled_data *data = - container_of(chip, struct txx9_iocled_data, chip); + struct txx9_iocled_data *data = gpiochip_get_data(chip); return !!(data->cur_val & (1 << offset)); } static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset, int value) { - struct txx9_iocled_data *data = - container_of(chip, struct txx9_iocled_data, chip); + struct txx9_iocled_data *data = gpiochip_get_data(chip); unsigned long flags; spin_lock_irqsave(&txx9_iocled_lock, flags); if (value) @@ -749,7 +747,7 @@ void __init txx9_iocled_init(unsigned long baseaddr, iocled->chip.label = "iocled"; iocled->chip.base = basenum; iocled->chip.ngpio = num; - if (gpiochip_add(&iocled->chip)) + if (gpiochip_add_data(&iocled->chip, iocled)) goto out_unmap; if (basenum < 0) basenum = iocled->chip.base; diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index c9afd05..54de668 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -14,6 +14,7 @@ #include <linux/ioport.h> #include <linux/delay.h> #include <linux/platform_device.h> +#include <linux/gpio/driver.h> #include <linux/gpio.h> #include <linux/mtd/physmap.h> @@ -335,7 +336,7 @@ static void __init rbtx4938_mtd_init(void) static void __init rbtx4938_arch_init(void) { - gpiochip_add(&rbtx4938_spi_gpio_chip); + gpiochip_add_data(&rbtx4938_spi_gpio_chip, NULL); rbtx4938_pci_setup(); rbtx4938_spi_init(); } diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 10607f0..06ddb55 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -53,6 +53,7 @@ config GENERIC_HWEIGHT config GENERIC_BUG def_bool y + depends on BUG config QUICKLIST def_bool y diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h index be3debb..51159ff 100644 --- a/arch/mn10300/include/asm/pci.h +++ b/arch/mn10300/include/asm/pci.h @@ -80,9 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #endif /* __KERNEL__ */ -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return channel ? 15 : 14; diff --git a/arch/mn10300/include/asm/serial.h b/arch/mn10300/include/asm/serial.h index c199021..594ebff 100644 --- a/arch/mn10300/include/asm/serial.h +++ b/arch/mn10300/include/asm/serial.h @@ -14,15 +14,15 @@ /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_8250_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) +#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ) #else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) -#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST) +#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF #endif #ifdef CONFIG_SERIAL_8250_MANY_PORTS -#define FOURPORT_FLAGS ASYNC_FOURPORT +#define FOURPORT_FLAGS UPF_FOURPORT #define ACCENT_FLAGS 0 #define BOCA_FLAGS 0 #define HUB6_FLAGS 0 diff --git a/arch/mn10300/kernel/fpu-nofpu.c b/arch/mn10300/kernel/fpu-nofpu.c index 31c765b..8d0e041 100644 --- a/arch/mn10300/kernel/fpu-nofpu.c +++ b/arch/mn10300/kernel/fpu-nofpu.c @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include <asm/fpu.h> +#include <asm/elf.h> /* * handle an FPU operational exception diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index f984193..426173c 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -675,7 +675,7 @@ int __init start_secondary(void *unused) #ifdef CONFIG_GENERIC_CLOCKEVENTS init_clockevents(); #endif - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/openrisc/include/asm/gpio.h b/arch/openrisc/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/openrisc/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig index 9387cc2..db8f56b 100644 --- a/arch/parisc/configs/712_defconfig +++ b/arch/parisc/configs/712_defconfig @@ -183,7 +183,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_DEBUG_RODATA=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index 0490199..1a4f776 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -193,7 +193,6 @@ CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig index 4d8127e..310b665 100644 --- a/arch/parisc/configs/default_defconfig +++ b/arch/parisc/configs/default_defconfig @@ -211,7 +211,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_MD4=m diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 0ffb08f..5b04d70 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -301,7 +301,6 @@ CONFIG_RCU_CPU_STALL_INFO=y CONFIG_LATENCYTOP=y CONFIG_LKDTM=m CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 3d0e17b..df0f52b 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,6 +22,9 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 845272c..7bd69bd 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma } } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #include <asm/kmap_types.h> #define ARCH_HAS_KMAP diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 89c53bf..defebd9 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -194,9 +194,6 @@ extern void pcibios_init_bridge(struct pci_dev *); #define PCIBIOS_MIN_IO 0x10 #define PCIBIOS_MIN_MEM 0x1000 /* NBPG - but pci/setup-res.c dies */ -/* export the pci_ DMA API in terms of the dma_ one */ -#include <asm-generic/pci-dma-compat.h> - static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return channel ? 15 : 14; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 52e8597..c2a9cc5 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -305,7 +305,7 @@ void __init smp_callin(void) local_irq_enable(); /* Interrupts have been off until now */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* NOTREACHED */ panic("smp_callin() AAAAaaaaahhhh....\n"); diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index 54ba392..5d6eea9 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -63,7 +63,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, NULL, pmd, addr); + pte = pte_alloc_map(mm, pmd, addr); } return pte; } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1b366c4..3c07d6b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource pdcdata_resource = { @@ -201,7 +201,7 @@ static void __init setup_bootmem(void) res->name = "System RAM"; res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT; res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9faa18c..a030e5e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -158,6 +158,7 @@ config PPC select ARCH_HAS_DEVMEM_IS_ALLOWED select HAVE_ARCH_SECCOMP_FILTER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -828,14 +829,10 @@ config PCI_8260 select PPC_INDIRECT_PCI default y -source "drivers/pci/pcie/Kconfig" - source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - config HAS_RAPIDIO bool default n diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 9186229..340685c 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -387,7 +387,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_SPINLOCK=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index e5d2c3d..99ccbeba 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1175,7 +1175,6 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index 93ee046..ab11319 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -22,6 +22,7 @@ #include <asm/byteorder.h> #include <asm/switch_to.h> #include <crypto/algapi.h> +#include <crypto/xts.h> /* * MAX_BYTES defines the number of bytes that are allowed to be processed @@ -126,6 +127,11 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = xts_check_key(tfm, in_key, key_len); + if (err) + return err; key_len >>= 1; diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/powerpc/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2aa79c8..7529aab 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) } #endif -#define SPAPR_TCE_SHIFT 12 - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9d08d8c..d7b3431 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table { struct list_head list; struct kvm *kvm; u64 liobn; - u32 window_size; + struct rcu_head rcu; + u32 page_shift; + u64 offset; /* in pages */ + u64 size; /* window size in pages */ struct page *pages[0]; }; @@ -289,7 +292,7 @@ struct kvmppc_vcore { struct list_head runnable_threads; struct list_head preempt_list; spinlock_t lock; - wait_queue_head_t wq; + struct swait_queue_head wq; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; @@ -629,7 +632,7 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - wait_queue_head_t *wqp; + struct swait_queue_head *wqp; struct kvmppc_vcore *vcore; int ret; int trap; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2241d53..2544eda 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, - struct kvm_create_spapr_tce *args); + struct kvm_create_spapr_tce_64 *args); +extern struct kvmppc_spapr_tce_table *kvmppc_find_table( + struct kvm_vcpu *vcpu, unsigned long liobn); +extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, + unsigned long ioba, unsigned long npages); +extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, + unsigned long tce); +extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, + unsigned long *ua, unsigned long **prmap); +extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, + unsigned long idx, unsigned long tce); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); +extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages); +extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); @@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; } +extern void kvmppc_alloc_host_rm_ops(void); +extern void kvmppc_free_host_rm_ops(void); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); @@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); +extern void kvmppc_xics_ipi_action(void); +extern int h_ipi_redirect; #else +static inline void kvmppc_alloc_host_rm_ops(void) {}; +static inline void kvmppc_free_host_rm_ops(void) {}; static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } @@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +/* + * Host-side operations we want to set up while running in real + * mode in the guest operating on the xics. + * Currently only VCPU wakeup is supported. + */ + +union kvmppc_rm_state { + unsigned long raw; + struct { + u32 in_host; + u32 rm_action; + }; +}; + +struct kvmppc_host_rm_core { + union kvmppc_rm_state rm_state; + void *rm_data; + char pad[112]; +}; + +struct kvmppc_host_rm_ops { + struct kvmppc_host_rm_core *rm_core; + void (*vcpu_kick)(struct kvm_vcpu *vcpu); +}; + +extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; + static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_KVM_BOOKE_HV diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 54843ca..78968c1 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -10,7 +10,6 @@ #include <linux/pci.h> #include <linux/list.h> #include <linux/ioport.h> -#include <asm-generic/pci-bridge.h> struct device_node; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 6f8065a..a6f3ac0 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -20,8 +20,6 @@ #include <asm/prom.h> #include <asm/pci-bridge.h> -#include <asm-generic/pci-dma-compat.h> - /* Return values for pci_controller_ops.probe_mode function */ #define PCI_PROBE_NONE -1 /* Don't look at this bus at all */ #define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index ac9fb11..47897a3 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, } return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); } + +unsigned long vmalloc_to_phys(void *vmalloc_addr); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 825663c..78083ed 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu); #define PPC_MSG_TICK_BROADCAST 2 #define PPC_MSG_DEBUGGER_BREAK 3 +/* This is only used by the powernv kernel */ +#define PPC_MSG_RM_HOST_ACTION 4 + /* for irq controllers that have dedicated ipis per message (4) */ extern int smp_request_message_ipi(int virq, int message); extern const char *smp_ipi_name[]; @@ -121,6 +124,7 @@ extern const char *smp_ipi_name[]; /* for irq controllers with only a single ipi */ extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); extern void smp_muxed_ipi_message_pass(int cpu, int msg); +extern void smp_muxed_ipi_set_message(int cpu, int msg); extern irqreturn_t smp_ipi_demux(void); void smp_init_pSeries(void); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0e25bdb..2546048 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -30,6 +30,7 @@ #ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); extern void icp_native_flush_interrupt(void); +extern void icp_native_cause_ipi_rm(int cpu); #else static inline int icp_native_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index ab4d473..c93cf35 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -333,6 +333,15 @@ struct kvm_create_spapr_tce { __u32 window_size; }; +/* for KVM_CAP_SPAPR_TCE_64 */ +struct kvm_create_spapr_tce_64 { + __u64 liobn; + __u32 page_shift; + __u32 flags; + __u64 offset; /* in pages */ + __u64 size; /* in pages */ +}; + /* for KVM_ALLOCATE_RMA */ struct kvm_allocate_rma { __u64 rma_size; diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 5a2c049..aa610ce 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max; static unsigned int event_scan; static unsigned int rtas_event_scan_rate; -static int full_rtas_msgs = 0; +static bool full_rtas_msgs; /* Stop logging to nvram after first fatal error */ static int logging_enabled; /* Until we initialize everything, @@ -592,11 +592,6 @@ __setup("surveillance=", surveillance_setup); static int __init rtasmsgs_setup(char *str) { - if (strcmp(str, "on") == 0) - full_rtas_msgs = 1; - else if (strcmp(str, "off") == 0) - full_rtas_msgs = 0; - - return 1; + return (kstrtobool(str, &full_rtas_msgs) == 0); } __setup("rtasmsgs=", rtasmsgs_setup); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ad8c9db..d544fa3 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { - lockdep_init(); - /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5c03a6a..f98be83 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr) setup_paca(&boot_paca); fixup_boot_paca(); - /* Initialize lockdep early or else spinlocks will blow */ - lockdep_init(); - /* -------- printk is now safe to use ------- */ /* Enable early debugging if any specified (see udbg.h) */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ec9ec20..b7dea05f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg) #ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { - int messages; /* current messages */ + long messages; /* current messages */ unsigned long data; /* data for cause ipi */ }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); @@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data) info->data = data; } -void smp_muxed_ipi_message_pass(int cpu, int msg) +void smp_muxed_ipi_set_message(int cpu, int msg) { struct cpu_messages *info = &per_cpu(ipi_message, cpu); char *message = (char *)&info->messages; @@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) */ smp_mb(); message[msg] = 1; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + smp_muxed_ipi_set_message(cpu, msg); /* * cause_ipi functions are required to include a full barrier * before doing whatever causes the IPI. @@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) } #ifdef __BIG_ENDIAN__ -#define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) +#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A))) #else -#define IPI_MESSAGE(A) (1 << (8 * (A))) +#define IPI_MESSAGE(A) (1uL << (8 * (A))) #endif irqreturn_t smp_ipi_demux(void) { struct cpu_messages *info = this_cpu_ptr(&ipi_message); - unsigned int all; + unsigned long all; mb(); /* order any irq clear */ do { all = xchg(&info->messages, 0); +#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) + /* + * Must check for PPC_MSG_RM_HOST_ACTION messages + * before PPC_MSG_CALL_FUNCTION messages because when + * a VM is destroyed, we call kick_all_cpus_sync() + * to ensure that any pending PPC_MSG_RM_HOST_ACTION + * messages have completed before we free any VCPUs. + */ + if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION)) + kvmppc_xics_ipi_action(); +#endif if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) @@ -727,7 +745,7 @@ void start_secondary(void *unused) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); BUG(); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b6becc7..33c47fc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -203,9 +203,8 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err) #ifdef CONFIG_SMP printk("SMP NR_CPUS=%d ", NR_CPUS); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC "); #ifdef CONFIG_NUMA printk("NUMA "); #endif diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 0570eef..7f7b6d8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm KVM := ../../../virt/kvm common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o + $(KVM)/eventfd.o $(KVM)/vfio.o CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 638c6d9..b34220d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 54cf9bc..2c2d103 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -14,6 +14,7 @@ * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> + * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> */ #include <linux/types.h> @@ -36,28 +37,69 @@ #include <asm/ppc-opcode.h> #include <asm/kvm_host.h> #include <asm/udbg.h> +#include <asm/iommu.h> +#include <asm/tce.h> -#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) +static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) +{ + return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; +} -static long kvmppc_stt_npages(unsigned long window_size) +static unsigned long kvmppc_stt_pages(unsigned long tce_pages) { - return ALIGN((window_size >> SPAPR_TCE_SHIFT) - * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; + unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + + (tce_pages * sizeof(struct page *)); + + return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; } -static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) +static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) { - struct kvm *kvm = stt->kvm; - int i; + long ret = 0; - mutex_lock(&kvm->lock); - list_del(&stt->list); - for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + if (!current || !current->mm) + return ret; /* process exited */ + + down_write(¤t->mm->mmap_sem); + + if (inc) { + unsigned long locked, lock_limit; + + locked = current->mm->locked_vm + stt_pages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + current->mm->locked_vm += stt_pages; + } else { + if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) + stt_pages = current->mm->locked_vm; + + current->mm->locked_vm -= stt_pages; + } + + pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, + inc ? '+' : '-', + stt_pages << PAGE_SHIFT, + current->mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK), + ret ? " - exceeded" : ""); + + up_write(¤t->mm->mmap_sem); + + return ret; +} + +static void release_spapr_tce_table(struct rcu_head *head) +{ + struct kvmppc_spapr_tce_table *stt = container_of(head, + struct kvmppc_spapr_tce_table, rcu); + unsigned long i, npages = kvmppc_tce_pages(stt->size); + + for (i = 0; i < npages; i++) __free_page(stt->pages[i]); - kfree(stt); - mutex_unlock(&kvm->lock); - kvm_put_kvm(kvm); + kfree(stt); } static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; struct page *page; - if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) + if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) return VM_FAULT_SIGBUS; page = stt->pages[vmf->pgoff]; @@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; - release_spapr_tce_table(stt); + list_del_rcu(&stt->list); + + kvm_put_kvm(stt->kvm); + + kvmppc_account_memlimit( + kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); + call_rcu(&stt->rcu, release_spapr_tce_table); + return 0; } @@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = { }; long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, - struct kvm_create_spapr_tce *args) + struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; - long npages; + unsigned long npages, size; int ret = -ENOMEM; int i; + if (!args->size) + return -EINVAL; + /* Check this LIOBN hasn't been previously allocated */ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { if (stt->liobn == args->liobn) return -EBUSY; } - npages = kvmppc_stt_npages(args->window_size); + size = args->size; + npages = kvmppc_tce_pages(size); + ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); + if (ret) { + stt = NULL; + goto fail; + } stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); @@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; stt->liobn = args->liobn; - stt->window_size = args->window_size; + stt->page_shift = args->page_shift; + stt->offset = args->offset; + stt->size = size; stt->kvm = kvm; for (i = 0; i < npages; i++) { @@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kvm_get_kvm(kvm); mutex_lock(&kvm->lock); - list_add(&stt->list, &kvm->arch.spapr_tce_tables); + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); mutex_unlock(&kvm->lock); @@ -148,3 +208,59 @@ fail: } return ret; } + +long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages) +{ + struct kvmppc_spapr_tce_table *stt; + long i, ret = H_SUCCESS, idx; + unsigned long entry, ua = 0; + u64 __user *tces, tce; + + stt = kvmppc_find_table(vcpu, liobn); + if (!stt) + return H_TOO_HARD; + + entry = ioba >> stt->page_shift; + /* + * SPAPR spec says that the maximum size of the list is 512 TCEs + * so the whole table fits in 4K page + */ + if (npages > 512) + return H_PARAMETER; + + if (tce_list & (SZ_4K - 1)) + return H_PARAMETER; + + ret = kvmppc_ioba_validate(stt, ioba, npages); + if (ret != H_SUCCESS) + return ret; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { + ret = H_TOO_HARD; + goto unlock_exit; + } + tces = (u64 __user *) ua; + + for (i = 0; i < npages; ++i) { + if (get_user(tce, tces + i)) { + ret = H_TOO_HARD; + goto unlock_exit; + } + tce = be64_to_cpu(tce); + + ret = kvmppc_tce_validate(stt, tce); + if (ret != H_SUCCESS) + goto unlock_exit; + + kvmppc_tce_put(stt, entry + i, tce); + } + +unlock_exit: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return ret; +} +EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 89e96b3..44be73e 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -14,6 +14,7 @@ * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> + * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> */ #include <linux/types.h> @@ -30,76 +31,321 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> #include <asm/mmu-hash64.h> +#include <asm/mmu_context.h> #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> #include <asm/kvm_host.h> #include <asm/udbg.h> +#include <asm/iommu.h> +#include <asm/tce.h> +#include <asm/iommu.h> #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) -/* WARNING: This will be called in real-mode on HV KVM and virtual +/* + * Finds a TCE table descriptor by LIOBN. + * + * WARNING: This will be called in real or virtual mode on HV KVM and virtual * mode on PR KVM */ -long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, - unsigned long ioba, unsigned long tce) +struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, + unsigned long liobn) { struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; + list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) + if (stt->liobn == liobn) + return stt; + + return NULL; +} +EXPORT_SYMBOL_GPL(kvmppc_find_table); + +/* + * Validates IO address. + * + * WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ +long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, + unsigned long ioba, unsigned long npages) +{ + unsigned long mask = (1ULL << stt->page_shift) - 1; + unsigned long idx = ioba >> stt->page_shift; + + if ((ioba & mask) || (idx < stt->offset) || + (idx - stt->offset + npages > stt->size) || + (idx + npages < idx)) + return H_PARAMETER; + + return H_SUCCESS; +} +EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); + +/* + * Validates TCE address. + * At the moment flags and page mask are validated. + * As the host kernel does not access those addresses (just puts them + * to the table and user space is supposed to process them), we can skip + * checking other things (such as TCE is a guest RAM address or the page + * was actually allocated). + * + * WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ +long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) +{ + unsigned long page_mask = ~((1ULL << stt->page_shift) - 1); + unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ); + + if (tce & mask) + return H_PARAMETER; + + return H_SUCCESS; +} +EXPORT_SYMBOL_GPL(kvmppc_tce_validate); + +/* Note on the use of page_address() in real mode, + * + * It is safe to use page_address() in real mode on ppc64 because + * page_address() is always defined as lowmem_page_address() + * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic + * operation and does not access page struct. + * + * Theoretically page_address() could be defined different + * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL + * would have to be enabled. + * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64, + * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only + * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP + * is not expected to be enabled on ppc32, page_address() + * is safe for ppc32 as well. + * + * WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ +static u64 *kvmppc_page_address(struct page *page) +{ +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL) +#error TODO: fix to avoid page_address() here +#endif + return (u64 *) page_address(page); +} + +/* + * Handles TCE requests for emulated devices. + * Puts guest TCE values to the table and expects user space to convert them. + * Called in both real and virtual modes. + * Cannot fail so kvmppc_tce_validate must be called before it. + * + * WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ +void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, + unsigned long idx, unsigned long tce) +{ + struct page *page; + u64 *tbl; + + idx -= stt->offset; + page = stt->pages[idx / TCES_PER_PAGE]; + tbl = kvmppc_page_address(page); + + tbl[idx % TCES_PER_PAGE] = tce; +} +EXPORT_SYMBOL_GPL(kvmppc_tce_put); + +long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, + unsigned long *ua, unsigned long **prmap) +{ + unsigned long gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = search_memslots(kvm_memslots(kvm), gfn); + if (!memslot) + return -EINVAL; + + *ua = __gfn_to_hva_memslot(memslot, gfn) | + (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (prmap) + *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; +#endif + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce) +{ + struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); + long ret; + /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == liobn) { - unsigned long idx = ioba >> SPAPR_TCE_SHIFT; - struct page *page; - u64 *tbl; - - /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */ - /* liobn, stt, stt->window_size); */ - if (ioba >= stt->window_size) - return H_PARAMETER; - - page = stt->pages[idx / TCES_PER_PAGE]; - tbl = (u64 *)page_address(page); - - /* FIXME: Need to validate the TCE itself */ - /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */ - tbl[idx % TCES_PER_PAGE] = tce; - return H_SUCCESS; - } - } + if (!stt) + return H_TOO_HARD; + + ret = kvmppc_ioba_validate(stt, ioba, 1); + if (ret != H_SUCCESS) + return ret; - /* Didn't find the liobn, punt it to userspace */ - return H_TOO_HARD; + ret = kvmppc_tce_validate(stt, tce); + if (ret != H_SUCCESS) + return ret; + + kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); + + return H_SUCCESS; } EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); -long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, - unsigned long ioba) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, + unsigned long ua, unsigned long *phpa) +{ + pte_t *ptep, pte; + unsigned shift = 0; + + ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); + if (!ptep || !pte_present(*ptep)) + return -ENXIO; + pte = *ptep; + + if (!shift) + shift = PAGE_SHIFT; + + /* Avoid handling anything potentially complicated in realmode */ + if (shift > PAGE_SHIFT) + return -EAGAIN; + + if (!pte_young(pte)) + return -EAGAIN; + + *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) | + (ua & ~PAGE_MASK); + + return 0; +} + +long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages) { - struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; + long i, ret = H_SUCCESS; + unsigned long tces, entry, ua = 0; + unsigned long *rmap = NULL; + + stt = kvmppc_find_table(vcpu, liobn); + if (!stt) + return H_TOO_HARD; + + entry = ioba >> stt->page_shift; + /* + * The spec says that the maximum size of the list is 512 TCEs + * so the whole table addressed resides in 4K page + */ + if (npages > 512) + return H_PARAMETER; + + if (tce_list & (SZ_4K - 1)) + return H_PARAMETER; + + ret = kvmppc_ioba_validate(stt, ioba, npages); + if (ret != H_SUCCESS) + return ret; - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == liobn) { - unsigned long idx = ioba >> SPAPR_TCE_SHIFT; - struct page *page; - u64 *tbl; + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) + return H_TOO_HARD; - if (ioba >= stt->window_size) - return H_PARAMETER; + rmap = (void *) vmalloc_to_phys(rmap); - page = stt->pages[idx / TCES_PER_PAGE]; - tbl = (u64 *)page_address(page); + /* + * Synchronize with the MMU notifier callbacks in + * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). + * While we have the rmap lock, code running on other CPUs + * cannot finish unmapping the host real page that backs + * this guest real page, so we are OK to access the host + * real page. + */ + lock_rmap(rmap); + if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + ret = H_TOO_HARD; + goto unlock_exit; + } + + for (i = 0; i < npages; ++i) { + unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); + + ret = kvmppc_tce_validate(stt, tce); + if (ret != H_SUCCESS) + goto unlock_exit; - vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; - return H_SUCCESS; - } + kvmppc_tce_put(stt, entry + i, tce); } - /* Didn't find the liobn, punt it to userspace */ - return H_TOO_HARD; +unlock_exit: + unlock_rmap(rmap); + + return ret; +} + +long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages) +{ + struct kvmppc_spapr_tce_table *stt; + long i, ret; + + stt = kvmppc_find_table(vcpu, liobn); + if (!stt) + return H_TOO_HARD; + + ret = kvmppc_ioba_validate(stt, ioba, npages); + if (ret != H_SUCCESS) + return ret; + + /* Check permission bits only to allow userspace poison TCE for debug */ + if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) + return H_PARAMETER; + + for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) + kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); + + return H_SUCCESS; +} +EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); + +long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba) +{ + struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); + long ret; + unsigned long idx; + struct page *page; + u64 *tbl; + + if (!stt) + return H_TOO_HARD; + + ret = kvmppc_ioba_validate(stt, ioba, 1); + if (ret != H_SUCCESS) + return ret; + + idx = (ioba >> stt->page_shift) - stt->offset; + page = stt->pages[idx / TCES_PER_PAGE]; + tbl = (u64 *)page_address(page); + + vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; + + return H_SUCCESS; } EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); + +#endif /* KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index baeddb0..84fb4fc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -81,6 +81,17 @@ static int target_smt_mode; module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); +#ifdef CONFIG_KVM_XICS +static struct kernel_param_ops module_param_ops = { + .set = param_set_int, + .get = param_get_int, +}; + +module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); +#endif + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -114,11 +125,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu; - wait_queue_head_t *wqp; + struct swait_queue_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (waitqueue_active(wqp)) { - wake_up_interruptible(wqp); + if (swait_active(wqp)) { + swake_up(wqp); ++vcpu->stat.halt_wakeup; } @@ -701,8 +712,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) tvcpu->arch.prodded = 1; smp_mb(); if (vcpu->arch.ceded) { - if (waitqueue_active(&vcpu->wq)) { - wake_up_interruptible(&vcpu->wq); + if (swait_active(&vcpu->wq)) { + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (kvmppc_xics_enabled(vcpu)) { ret = kvmppc_xics_hcall(vcpu, req); break; - } /* fallthrough */ + } + return RESUME_HOST; + case H_PUT_TCE: + ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_PUT_TCE_INDIRECT: + ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; + case H_STUFF_TCE: + ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; default: return RESUME_HOST; } @@ -1459,7 +1494,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_waitqueue_head(&vcore->wq); + init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = core * threads_per_subcore; @@ -2279,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) } /* + * Clear core from the list of active host cores as we are about to + * enter the guest. Only do this if it is the primary thread of the + * core (not if a subcore) that is entering the guest. + */ +static inline void kvmppc_clear_host_core(int cpu) +{ + int core; + + if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) + return; + /* + * Memory barrier can be omitted here as we will do a smp_wmb() + * later in kvmppc_start_thread and we need ensure that state is + * visible to other CPUs only after we enter guest. + */ + core = cpu >> threads_shift; + kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0; +} + +/* + * Advertise this core as an active host core since we exited the guest + * Only need to do this if it is the primary thread of the core that is + * exiting. + */ +static inline void kvmppc_set_host_core(int cpu) +{ + int core; + + if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) + return; + + /* + * Memory barrier can be omitted here because we do a spin_unlock + * immediately after this which provides the memory barrier. + */ + core = cpu >> threads_shift; + kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1; +} + +/* * Run a set of guest threads on a physical core. * Called with vc->lock held. */ @@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } } + kvmppc_clear_host_core(pcpu); + /* Start all the threads */ active = 0; for (sub = 0; sub < core_info.n_subcores; ++sub) { @@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_ipi_thread(pcpu + i); } + kvmppc_set_host_core(pcpu); + spin_unlock(&vc->lock); /* make sure updates to secondary vcpu structs are visible now */ @@ -2531,10 +2610,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu; int do_sleep = 1; + DECLARE_SWAITQUEUE(wait); - DEFINE_WAIT(wait); - - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); /* * Check one last time for pending exceptions and ceded state after @@ -2548,7 +2626,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } if (!do_sleep) { - finish_wait(&vc->wq, &wait); + finish_swait(&vc->wq, &wait); return; } @@ -2556,7 +2634,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_wait(&vc->wq, &wait); + finish_swait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -2612,7 +2690,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - wake_up(&vc->wq); + swake_up(&vc->wq); } } @@ -2984,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto out_srcu; } +#ifdef CONFIG_KVM_XICS +static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned long cpu = (long)hcpu; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + kvmppc_set_host_core(cpu); + break; + +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_DEAD_FROZEN: + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + kvmppc_clear_host_core(cpu); + break; +#endif + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block kvmppc_cpu_notifier = { + .notifier_call = kvmppc_cpu_notify, +}; + +/* + * Allocate a per-core structure for managing state about which cores are + * running in the host versus the guest and for exchanging data between + * real mode KVM and CPU running in the host. + * This is only done for the first VM. + * The allocated structure stays even if all VMs have stopped. + * It is only freed when the kvm-hv module is unloaded. + * It's OK for this routine to fail, we just don't support host + * core operations like redirecting H_IPI wakeups. + */ +void kvmppc_alloc_host_rm_ops(void) +{ + struct kvmppc_host_rm_ops *ops; + unsigned long l_ops; + int cpu, core; + int size; + + /* Not the first time here ? */ + if (kvmppc_host_rm_ops_hv != NULL) + return; + + ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL); + if (!ops) + return; + + size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core); + ops->rm_core = kzalloc(size, GFP_KERNEL); + + if (!ops->rm_core) { + kfree(ops); + return; + } + + get_online_cpus(); + + for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) { + if (!cpu_online(cpu)) + continue; + + core = cpu >> threads_shift; + ops->rm_core[core].rm_state.in_host = 1; + } + + ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv; + + /* + * Make the contents of the kvmppc_host_rm_ops structure visible + * to other CPUs before we assign it to the global variable. + * Do an atomic assignment (no locks used here), but if someone + * beats us to it, just free our copy and return. + */ + smp_wmb(); + l_ops = (unsigned long) ops; + + if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) { + put_online_cpus(); + kfree(ops->rm_core); + kfree(ops); + return; + } + + register_cpu_notifier(&kvmppc_cpu_notifier); + + put_online_cpus(); +} + +void kvmppc_free_host_rm_ops(void) +{ + if (kvmppc_host_rm_ops_hv) { + unregister_cpu_notifier(&kvmppc_cpu_notifier); + kfree(kvmppc_host_rm_ops_hv->rm_core); + kfree(kvmppc_host_rm_ops_hv); + kvmppc_host_rm_ops_hv = NULL; + } +} +#endif + static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; @@ -2996,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) return -ENOMEM; kvm->arch.lpid = lpid; + kvmppc_alloc_host_rm_ops(); + /* * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, @@ -3229,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void) static void kvmppc_book3s_exit_hv(void) { + kvmppc_free_host_rm_ops(); kvmppc_hv_ops = NULL; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fd7006b..5f0380d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap) kvmhv_interrupt_vcore(vc, ee); } } + +struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; +EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 24f5807..980d8a6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -17,12 +17,16 @@ #include <asm/xics.h> #include <asm/debug.h> #include <asm/synch.h> +#include <asm/cputhreads.h> #include <asm/ppc-opcode.h> #include "book3s_xics.h" #define DEBUG_PASSUP +int h_ipi_redirect = 1; +EXPORT_SYMBOL(h_ipi_redirect); + static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); @@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics, /* -- ICP routines -- */ +#ifdef CONFIG_SMP +static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) +{ + int hcpu; + + hcpu = hcore << threads_shift; + kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; + smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); + icp_native_cause_ipi_rm(hcpu); +} +#else +static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } +#endif + +/* + * We start the search from our current CPU Id in the core map + * and go in a circle until we get back to our ID looking for a + * core that is running in host context and that hasn't already + * been targeted for another rm_host_ops. + * + * In the future, could consider using a fairer algorithm (one + * that distributes the IPIs better) + * + * Returns -1, if no CPU could be found in the host + * Else, returns a CPU Id which has been reserved for use + */ +static inline int grab_next_hostcore(int start, + struct kvmppc_host_rm_core *rm_core, int max, int action) +{ + bool success; + int core; + union kvmppc_rm_state old, new; + + for (core = start + 1; core < max; core++) { + old = new = READ_ONCE(rm_core[core].rm_state); + + if (!old.in_host || old.rm_action) + continue; + + /* Try to grab this host core if not taken already. */ + new.rm_action = action; + + success = cmpxchg64(&rm_core[core].rm_state.raw, + old.raw, new.raw) == old.raw; + if (success) { + /* + * Make sure that the store to the rm_action is made + * visible before we return to caller (and the + * subsequent store to rm_data) to synchronize with + * the IPI handler. + */ + smp_wmb(); + return core; + } + } + + return -1; +} + +static inline int find_available_hostcore(int action) +{ + int core; + int my_core = smp_processor_id() >> threads_shift; + struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core; + + core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action); + if (core == -1) + core = grab_next_hostcore(core, rm_core, my_core, action); + + return core; +} + static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu *this_vcpu) { struct kvmppc_icp *this_icp = this_vcpu->arch.icp; int cpu; + int hcore; /* Mark the target VCPU as having an interrupt pending */ vcpu->stat.queue_intr++; @@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, return; } - /* Check if the core is loaded, if not, too hard */ + /* + * Check if the core is loaded, + * if not, find an available host core to post to wake the VCPU, + * if we can't find one, set up state to eventually return too hard. + */ cpu = vcpu->arch.thread_cpu; if (cpu < 0 || cpu >= nr_cpu_ids) { - this_icp->rm_action |= XICS_RM_KICK_VCPU; - this_icp->rm_kick_target = vcpu; + hcore = -1; + if (kvmppc_host_rm_ops_hv && h_ipi_redirect) + hcore = find_available_hostcore(XICS_RM_KICK_VCPU); + if (hcore != -1) { + icp_send_hcore_msg(hcore, vcpu); + } else { + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + } return; } @@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) bail: return check_too_hard(xics, icp); } + +/* --- Non-real mode XICS-related built-in routines --- */ + +/** + * Host Operations poked by RM KVM + */ +static void rm_host_ipi_action(int action, void *data) +{ + switch (action) { + case XICS_RM_KICK_VCPU: + kvmppc_host_rm_ops_hv->vcpu_kick(data); + break; + default: + WARN(1, "Unexpected rm_action=%d data=%p\n", action, data); + break; + } + +} + +void kvmppc_xics_ipi_action(void) +{ + int core; + unsigned int cpu = smp_processor_id(); + struct kvmppc_host_rm_core *rm_corep; + + core = cpu >> threads_shift; + rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core]; + + if (rm_corep->rm_data) { + rm_host_ipi_action(rm_corep->rm_state.rm_action, + rm_corep->rm_data); + /* Order these stores against the real mode KVM */ + rm_corep->rm_data = NULL; + smp_wmb(); + rm_corep->rm_state.rm_action = 0; + } +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6ee26de..85b32f1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6, VCPU_ACOP(r9) stw r7, VCPU_GUEST_PID(r9) std r8, VCPU_WORT(r9) + /* + * Restore various registers to 0, where non-zero values + * set by the guest could disrupt the host. + */ + li r0, 0 + mtspr SPRN_IAMR, r0 + mtspr SPRN_CIABR, r0 + mtspr SPRN_DAWRX, r0 + mtspr SPRN_TCSCR, r0 + mtspr SPRN_WORT, r0 + /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ + li r0, 1 + sldi r0, r0, 31 + mtspr SPRN_MMCRS, r0 8: /* Save and reset AMR and UAMOR before turning on the MMU */ @@ -2006,8 +2020,8 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table - .long 0 /* 0x138 */ - .long 0 /* 0x13c */ + .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table + .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table .long 0 /* 0x140 */ .long 0 /* 0x144 */ .long 0 /* 0x148 */ diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f2c75a1..02176fd 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu) +{ + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); + unsigned long tce = kvmppc_get_gpr(vcpu, 6); + unsigned long npages = kvmppc_get_gpr(vcpu, 7); + long rc; + + rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba, + tce, npages); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + +static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu) +{ + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); + unsigned long tce_value = kvmppc_get_gpr(vcpu, 6); + unsigned long npages = kvmppc_get_gpr(vcpu, 7); + long rc; + + rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { long rc = kvmppc_xics_hcall(vcpu, cmd); @@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_bulk_remove(vcpu); case H_PUT_TCE: return kvmppc_h_pr_put_tce(vcpu); + case H_PUT_TCE_INDIRECT: + return kvmppc_h_pr_put_tce_indirect(vcpu); + case H_STUFF_TCE: + return kvmppc_h_pr_stuff_tce(vcpu); case H_CEDE: kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); kvm_vcpu_block(vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a3b182d..19aa59b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,6 +33,7 @@ #include <asm/tlbflush.h> #include <asm/cputhreads.h> #include <asm/irqflags.h> +#include <asm/iommu.h> #include "timing.h" #include "irq.h" #include "../mm/mmu_decl.h" @@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm) unsigned int i; struct kvm_vcpu *vcpu; +#ifdef CONFIG_KVM_XICS + /* + * We call kick_all_cpus_sync() to ensure that all + * CPUs have executed any pending IPIs before we + * continue and free VCPUs structures below. + */ + if (is_kvmppc_hv_enabled(kvm)) + kick_all_cpus_sync(); +#endif + kvm_for_each_vcpu(i, vcpu, kvm) kvm_arch_vcpu_free(vcpu); @@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: + case KVM_CAP_SPAPR_TCE_64: case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: @@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_GET_SMMU_INFO: r = 1; break; + case KVM_CAP_SPAPR_MULTITCE: + r = 1; + break; #endif default: r = 0; @@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } #ifdef CONFIG_PPC_BOOK3S_64 + case KVM_CREATE_SPAPR_TCE_64: { + struct kvm_create_spapr_tce_64 create_tce_64; + + r = -EFAULT; + if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64))) + goto out; + if (create_tce_64.flags) { + r = -EINVAL; + goto out; + } + r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64); + goto out; + } case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; + struct kvm_create_spapr_tce_64 create_tce_64; r = -EFAULT; if (copy_from_user(&create_tce, argp, sizeof(create_tce))) goto out; - r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); + + create_tce_64.liobn = create_tce.liobn; + create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K; + create_tce_64.offset = 0; + create_tce_64.size = create_tce.window_size >> + IOMMU_PAGE_SHIFT_4K; + create_tce_64.flags = 0; + r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64); goto out; } case KVM_PPC_GET_SMMU_INFO: { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ba59d59..1005281 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -255,8 +255,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (ret < 0) break; + #ifdef CONFIG_DEBUG_PAGEALLOC - if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + if (debug_pagealloc_enabled() && + (paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; #endif /* CONFIG_DEBUG_PAGEALLOC */ } @@ -512,17 +514,17 @@ static void __init htab_init_page_sizes(void) if (mmu_has_feature(MMU_FTR_16M_PAGE)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); - found: -#ifndef CONFIG_DEBUG_PAGEALLOC - /* - * Pick a size for the linear mapping. Currently, we only support - * 16M, 1M and 4K which is the default - */ - if (mmu_psize_defs[MMU_PAGE_16M].shift) - mmu_linear_psize = MMU_PAGE_16M; - else if (mmu_psize_defs[MMU_PAGE_1M].shift) - mmu_linear_psize = MMU_PAGE_1M; -#endif /* CONFIG_DEBUG_PAGEALLOC */ +found: + if (!debug_pagealloc_enabled()) { + /* + * Pick a size for the linear mapping. Currently, we only + * support 16M, 1M and 4K which is the default + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + mmu_linear_psize = MMU_PAGE_16M; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + mmu_linear_psize = MMU_PAGE_1M; + } #ifdef CONFIG_PPC_64K_PAGES /* @@ -721,10 +723,12 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); #ifdef CONFIG_DEBUG_PAGEALLOC - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, - 1, ppc64_rma_size)); - memset(linear_map_hash_slots, 0, linear_map_hash_count); + if (debug_pagealloc_enabled()) { + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(memblock_alloc_base( + linear_map_hash_count, 1, ppc64_rma_size)); + memset(linear_map_hash_slots, 0, linear_map_hash_count); + } #endif /* CONFIG_DEBUG_PAGEALLOC */ /* On U3 based machines, we need to reserve the DART area and diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index a10be66..c2b7716 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -112,10 +112,10 @@ void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } -#ifdef CONFIG_DEBUG_PAGEALLOC - __map_without_bats = 1; - __map_without_ltlbs = 1; -#endif + if (debug_pagealloc_enabled()) { + __map_without_bats = 1; + __map_without_ltlbs = 1; + } } /* diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0f0a51..f078a1f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void) res->name = "System RAM"; res->start = base; res->end = base + size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; WARN_ON(request_resource(&iomem_resource, res) < 0); } } diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 4e4efbc..9ca6fe1 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -118,8 +118,7 @@ static void destroy_pagetable_page(struct mm_struct *mm) /* drop all the pending references */ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count); - if (!count) { + if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) { pgtable_page_dtor(page); free_hot_cold_page(page, 0); } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 83dfd79..de37ff4 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_DEBUG_VM */ +unsigned long vmalloc_to_phys(void *va) +{ + unsigned long pfn = vmalloc_to_pfn(va); + + BUG_ON(!pfn); + return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); +} +EXPORT_SYMBOL_GPL(vmalloc_to_phys); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdf2123..d9cc66c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -403,7 +403,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) * count. */ if (likely(!mm->context.pte_frag)) { - atomic_set(&page->_count, PTE_FRAG_NR); + set_page_count(page, PTE_FRAG_NR); mm->context.pte_frag = ret + PTE_FRAG_SIZE; } spin_unlock(&mm->page_table_lock); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9f9dfda..3b09ecf 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event) } } -static unsigned long vmalloc_to_phys(void *v) -{ - struct page *p = vmalloc_to_page(v); - - BUG_ON(!p); - return page_to_phys(p) + offset_in_page(v); -} - /* */ struct event_uniq { struct rb_node node; diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 711f3d3..452da23 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -188,7 +188,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb; static inline void mpc512x_free_bootmem(struct page *page) { BUG_ON(PageTail(page)); - BUG_ON(atomic_read(&page->_count) > 1); + BUG_ON(page_ref_count(page) > 1); free_reserved_page(page); } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 32274f7..282837a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; -static int cede_offline_enabled __read_mostly = 1; +static bool cede_offline_enabled __read_mostly = true; /* * Enable/disable cede_offline when available. */ static int __init setup_cede_offline(char *str) { - if (!strcmp(str, "off")) - cede_offline_enabled = 0; - else if (!strcmp(str, "on")) - cede_offline_enabled = 1; - else - return 0; - return 1; + return (kstrtobool(str, &cede_offline_enabled) == 0); } __setup("cede_offline=", setup_cede_offline); diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c index fc65ad1..d7a7ef13 100644 --- a/arch/powerpc/sysdev/ppc4xx_gpio.c +++ b/arch/powerpc/sysdev/ppc4xx_gpio.c @@ -78,7 +78,7 @@ static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio) struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); struct ppc4xx_gpio __iomem *regs = mm_gc->regs; - return in_be32(®s->ir) & GPIO_MASK(gpio); + return !!(in_be32(®s->ir) & GPIO_MASK(gpio)); } static inline void diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c index ff5e732..56ce8ca 100644 --- a/arch/powerpc/sysdev/simple_gpio.c +++ b/arch/powerpc/sysdev/simple_gpio.c @@ -46,7 +46,7 @@ static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio) { struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - return in_8(mm_gc->regs) & u8_pin2mask(gpio); + return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio)); } static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index eae3265..afdf62f 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void icp_native_cause_ipi_rm(int cpu) +{ + /* + * Currently not used to send IPIs to another CPU + * on the same core. Only caller is KVM real mode. + * Need the physical address of the XICS to be + * previously saved in kvm_hstate in the paca. + */ + unsigned long xics_phys; + + /* + * Just like the cause_ipi functions, it is required to + * include a full barrier (out8 includes a sync) before + * causing the IPI. + */ + xics_phys = paca[cpu].kvm_hstate.xics_phys; + out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY); +} +#endif + /* * Called when an interrupt is received on an off-line CPU to * clear the interrupt, so that the CPU can go back to nap mode. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3be9c83..7e3e8a8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -254,12 +254,12 @@ config MARCH_ZEC12 older machines. config MARCH_Z13 - bool "IBM z13" + bool "IBM z13s and z13" select HAVE_MARCH_Z13_FEATURES help - Select this to enable optimizations for IBM z13 (2964 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z13s and z13 (2965 and + 2964 series). The kernel will be slightly faster but will not work on + older machines. endchoice @@ -605,8 +605,6 @@ config PCI_NR_MSI PCI devices. source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" -source "drivers/pci/hotplug/Kconfig" endif # PCI diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 0b9b95f..48e1a2d 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -27,6 +27,7 @@ #include <linux/cpufeature.h> #include <linux/init.h> #include <linux/spinlock.h> +#include <crypto/xts.h> #include "crypt_s390.h" #define AES_KEYLEN_128 1 @@ -587,6 +588,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; + int err; + + err = xts_check_key(tfm, in_key, key_len); + if (err) + return err; switch (key_len) { case 32: diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index a0e71a5..5687d62 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -4,14 +4,23 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +#define CLP_LPS_BASE 0 +#define CLP_LPS_PCI 2 + struct clp_req_hdr { u16 len; u16 cmd; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; struct clp_rsp_hdr { u16 len; u16 rsp; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; /* CLP Response Codes */ @@ -25,4 +34,22 @@ struct clp_rsp_hdr { #define CLP_RC_NODATA 0x0080 /* No data available */ #define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ +/* Store logical-processor characteristics request */ +struct clp_req_slpc { + struct clp_req_hdr hdr; +} __packed; + +struct clp_rsp_slpc { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[8]; + u32 lpic[8]; +} __packed; + +struct clp_req_rsp_slpc { + struct clp_req_slpc request; + struct clp_rsp_slpc response; +} __packed; + #endif diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h new file mode 100644 index 0000000..d054c1b --- /dev/null +++ b/arch/s390/include/asm/gmap.h @@ -0,0 +1,64 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_S390_GMAP_H +#define _ASM_S390_GMAP_H + +/** + * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space + * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct list_head crst_list; + struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; + unsigned long *table; + unsigned long asce; + unsigned long asce_end; + void *private; + bool pfault_enabled; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); +unsigned long gmap_translate(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); +void gmap_discard(struct gmap *, unsigned long from, unsigned long to); +void __gmap_zap(struct gmap *, unsigned long gaddr); +void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); + +#endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8959ebb..6da41fa 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,6 +20,7 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> #include <linux/kvm.h> +#include <linux/seqlock.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu/api.h> @@ -229,17 +230,11 @@ struct kvm_s390_itdb { __u8 data[256]; } __packed; -struct kvm_s390_vregs { - __vector128 vrs[32]; - __u8 reserved200[512]; /* for future vector expansion */ -} __packed; - struct sie_page { struct kvm_s390_sie_block sie_block; __u8 reserved200[1024]; /* 0x0200 */ struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[1280]; /* 0x0700 */ - struct kvm_s390_vregs vregs; /* 0x0c00 */ + __u8 reserved700[2304]; /* 0x0700 */ } __packed; struct kvm_vcpu_stat { @@ -467,7 +462,7 @@ struct kvm_s390_irq_payload { struct kvm_s390_local_interrupt { spinlock_t lock; struct kvm_s390_float_interrupt *float_int; - wait_queue_head_t *wq; + struct swait_queue_head *wq; atomic_t *cpuflags; DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_irq_payload irq; @@ -558,6 +553,15 @@ struct kvm_vcpu_arch { unsigned long pfault_token; unsigned long pfault_select; unsigned long pfault_compare; + bool cputm_enabled; + /* + * The seqcount protects updates to cputm_start and sie_block.cputm, + * this way we can have non-blocking reads with consistent values. + * Only the owning VCPU thread (vcpu->cpu) is allowed to change these + * values and to start/stop/enable/disable cpu timer accounting. + */ + seqcount_t cputm_seqcount; + __u64 cputm_start; }; struct kvm_vm_stat { @@ -596,15 +600,11 @@ struct s390_io_adapter { #define S390_ARCH_FAC_MASK_SIZE_U64 \ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) -struct kvm_s390_fac { - /* facility list requested by guest */ - __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; - /* facility mask supported by kvm & hosting machine */ - __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; -}; - struct kvm_s390_cpu_model { - struct kvm_s390_fac *fac; + /* facility mask supported by kvm & hosting machine */ + __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; + /* facility list requested by guest (in dma page) */ + __u64 *fac_list; struct cpuid cpu_id; unsigned short ibc; }; @@ -623,6 +623,16 @@ struct kvm_s390_crypto_cb { __u8 reserved80[128]; /* 0x0080 */ }; +/* + * sie_page2 has to be allocated as DMA because fac_list and crycb need + * 31bit addresses in the sie control block. + */ +struct sie_page2 { + __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ + struct kvm_s390_crypto_cb crycb; /* 0x0800 */ + u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ +} __packed; + struct kvm_arch{ void *sca; int use_esca; @@ -643,6 +653,7 @@ struct kvm_arch{ int ipte_lock_count; struct mutex ipte_mutex; spinlock_t start_stop_lock; + struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; u64 epoch; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index a52b6cc..d5427c7 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -19,7 +19,6 @@ #include <linux/module.h> -#ifdef CONFIG_LIVEPATCH static inline int klp_check_compiler_support(void) { return 0; @@ -36,8 +35,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->psw.addr = ip; } -#else -#error Include linux/livepatch.h, not asm/livepatch.h -#endif #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fb1b93e..e485817 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,17 +15,25 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; - mm->context.asce_bits |= _ASCE_TYPE_REGION3; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - mm->context.asce_limit = STACK_TOP_MAX; + if (mm->context.asce_limit == 0) { + /* context created by exec, set asce limit to 4TB */ + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + mm->context.asce_limit = STACK_TOP_MAX; + } else if (mm->context.asce_limit == (1UL << 31)) { + mm_inc_nr_pmds(mm); + } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; } @@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev, static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (oldmm->context.asce_limit < mm->context.asce_limit) - crst_table_downgrade(mm, oldmm->context.asce_limit); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c873e68..b6bfa16 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -9,7 +9,6 @@ #include <linux/pci.h> #include <linux/mutex.h> #include <asm-generic/pci.h> -#include <asm-generic/pci-dma-compat.h> #include <asm/pci_clp.h> #include <asm/pci_debug.h> @@ -45,7 +44,7 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(16); +} __packed __aligned(64); enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -66,7 +65,6 @@ struct s390_domain; /* Private data per function */ struct zpci_dev { - struct pci_dev *pdev; struct pci_bus *bus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ @@ -192,7 +190,7 @@ int zpci_fmb_disable_device(struct zpci_dev *); /* Debug */ int zpci_debug_init(void); void zpci_debug_exit(void); -void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); void zpci_debug_info(struct zpci_dev *, struct seq_file *); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index dd78f92..e75c64cb 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -49,9 +49,6 @@ struct clp_fh_list_entry { /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u64 reserved2; } __packed; @@ -59,9 +56,6 @@ struct clp_req_list_pci { /* List PCI functions response */ struct clp_rsp_list_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u32 reserved2; u16 max_fn; @@ -73,9 +67,6 @@ struct clp_rsp_list_pci { /* Query PCI function request */ struct clp_req_query_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved2; u64 reserved3; @@ -84,9 +75,6 @@ struct clp_req_query_pci { /* Query PCI function response */ struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 : 64; u16 vfn; /* virtual fn number */ u16 : 7; u16 util_str_avail : 1; /* utility string available? */ @@ -108,21 +96,15 @@ struct clp_rsp_query_pci { /* Query PCI function group request */ struct clp_req_query_pci_grp { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; - u32 : 24; + u32 reserved2 : 24; u32 pfgid : 8; /* function group id */ - u32 reserved2; - u64 reserved3; + u32 reserved3; + u64 reserved4; } __packed; /* Query PCI function group response */ struct clp_rsp_query_pci_grp { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; @@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp { /* Set PCI function request */ struct clp_req_set_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u16 reserved2; u8 oc; /* operation controls */ @@ -154,9 +133,6 @@ struct clp_req_set_pci { /* Set PCI function response */ struct clp_rsp_set_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved3; u64 reserved4; diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 6d6556c..90240df 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -178,7 +178,6 @@ ret__; \ }) -#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double #include <asm-generic/percpu.h> diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index f897ec7..1f7ff85 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -21,7 +21,7 @@ #define PMU_F_ERR_LSDA 0x0200 #define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); extern ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b7858f..9b3d9b6 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); extern int page_table_allocate_pgste; -int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; @@ -100,12 +96,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); - INIT_LIST_HEAD(&mm->context.gmap_list); - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + if (mm->context.asce_limit == (1UL << 31)) { + /* Forking a compat process with 2 page table levels */ + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } + } + return (pgd_t *) table; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (mm->context.asce_limit == (1UL << 31)) + pgtable_pmd_page_dtor(virt_to_page(pgd)); + crst_table_free(mm, (unsigned long *) pgd); } -#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 64ead80..2f66645 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr) /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * dy..R...I...wr + * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 * prot-none, dirty, old 10..1...1...00 * prot-none, dirty, young 11..1...1...00 - * read-only, clean, old 00..1...1...01 - * read-only, clean, young 01..1...0...01 - * read-only, dirty, old 10..1...1...01 - * read-only, dirty, young 11..1...0...01 + * read-only, clean, old 00..1...1...10 + * read-only, clean, young 01..1...0...10 + * read-only, dirty, old 10..1...1...10 + * read-only, dirty, young 11..1...0...10 * read-write, clean, old 00..1...1...11 * read-write, clean, young 01..1...0...11 * read-write, dirty, old 10..0...1...11 @@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -extern int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty); - -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return pmd; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long new = 0; -#ifdef CONFIG_PGSTE - unsigned long old; - - preempt_disable(); - asm( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear PCL bit in old */ - " oihh %1,0x0080\n" /* set PCL bit in new */ - " csg %0,%1,%2\n" - " jl 0b\n" - : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); -#endif - return __pgste(new); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - asm( - " nihh %1,0xff7f\n" /* clear PCL bit */ - " stg %1,%0\n" - : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) - : "cc", "memory"); - preempt_enable(); -#endif -} - -static inline pgste_t pgste_get(pte_t *ptep) -{ - unsigned long pgste = 0; -#ifdef CONFIG_PGSTE - pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); -#endif - return __pgste(pgste); -} - -static inline void pgste_set(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; -#endif -} - -static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address, bits, skey; - - if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) - return pgste; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); - bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; -#endif - return pgste; - -} - -static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address; - unsigned long nkey; - - if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) - return; - VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); - address = pte_val(entry) & PAGE_MASK; - /* - * Set page access key and fetch protection bit from pgste. - * The guest C/R information is still in the PGSTE, set real - * key C/R to 0. - */ - nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; - nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; - page_set_storage_key(address, nkey, 0); -#endif -} - -static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) -{ - if ((pte_val(entry) & _PAGE_PRESENT) && - (pte_val(entry) & _PAGE_WRITE) && - !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { - /* - * Without enhanced suppression-on-protection force - * the dirty bit on for all writable ptes. - */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; - } - if (!(pte_val(entry) & _PAGE_PROTECT)) - /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; - } - *ptep = entry; - return pgste; -} - -/** - * struct gmap_struct - guest address space - * @crst_list: list of all crst tables used in the guest address space - * @mm: pointer to the parent mm_struct - * @guest_to_host: radix tree with guest to host address translation - * @host_to_guest: radix tree with pointer to segment table entries - * @guest_table_lock: spinlock to protect all entries in the guest page table - * @table: pointer to the page directory - * @asce: address space control element for gmap page table - * @pfault_enabled: defines if pfaults are applicable for the guest - */ -struct gmap { - struct list_head list; - struct list_head crst_list; - struct mm_struct *mm; - struct radix_tree_root guest_to_host; - struct radix_tree_root host_to_guest; - spinlock_t guest_table_lock; - unsigned long *table; - unsigned long asce; - unsigned long asce_end; - void *private; - bool pfault_enabled; -}; - -/** - * struct gmap_notifier - notify function block for page invalidation - * @notifier_call: address of callback function - */ -struct gmap_notifier { - struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); -}; - -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); -void gmap_free(struct gmap *gmap); -void gmap_enable(struct gmap *gmap); -void gmap_disable(struct gmap *gmap); -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len); -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); -unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); -void __gmap_zap(struct gmap *, unsigned long gaddr); -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); - - -void gmap_register_ipte_notifier(struct gmap_notifier *); -void gmap_unregister_ipte_notifier(struct gmap_notifier *); -int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); - -static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, addr, ptep); - } -#endif - return pgste; -} - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; - pgste_set_key(ptep, pgste, entry, mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else { - *ptep = entry; - } -} - /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) } while (nr != 255); } -static inline void ptep_flush_direct(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte_local(address, ptep); - else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - -static inline void ptep_flush_lazy(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pte_val(*ptep) |= _PAGE_INVALID; - mm->context.flush_mm = 1; - } else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - /* - * Get (and clear) the user dirty bit for a pte. + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. */ -static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - int dirty; - - if (!mm_has_pgste(mm)) - return 0; - pgste = pgste_get_lock(ptep); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; - pte = *ptep; - if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; - } - pgste_set_unlock(ptep, pgste); - return dirty; -} +pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t); +pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte, oldpte; - int young; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); - } - - oldpte = pte = *ptep; - ptep_flush_direct(vma->vm_mm, addr, ptep); - young = pte_young(pte); - pte = pte_mkold(pte); - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; + pte_t pte = *ptep; - return young; + pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return pte_young(pte); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } -/* - * This is hard to understand. ptep_get_and_clear and ptep_clear_flush - * both clear the TLB for the unmapped pte. The reason is that - * ptep_get_and_clear is used in common code (e.g. change_pte_range) - * to modify an active pte. The sequence is - * 1) ptep_get_and_clear - * 2) set_pte_at - * 3) flush_tlb_range - * On s390 the tlb needs to get flushed with the modification of the pte - * if the pte is active. The only way how this can be implemented is to - * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range - * is a nop. - */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION -static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, - unsigned long address, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set(ptep, pgste); - } - return pte; -} - -static inline void ptep_modify_prot_commit(struct mm_struct *mm, - unsigned long address, - pte_t *ptep, pte_t pte) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get(ptep); - pgste_set_key(ptep, pgste, pte, mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; -} +pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *); +void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_direct(vma->vm_mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(vma->vm_mm)) { - if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == - _PGSTE_GPS_USAGE_UNUSED) - pte_val(pte) |= _PAGE_UNUSED; - pgste = pgste_update_all(&pte, pgste, vma->vm_mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); } /* @@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pte_t *ptep, int full) { - pgste_t pgste; - pte_t pte; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - if (!full) - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); + if (full) { + pte_t pte = *ptep; + *ptep = __pte(_PAGE_INVALID); + return pte; } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - pgste_t pgste; pte_t pte = *ptep; - if (pte_write(pte)) { - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - ptep_flush_lazy(mm, address, ptep); - pte = pte_wrprotect(pte); - - if (mm_has_pgste(mm)) { - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; - } - return pte; + if (pte_write(pte)) + ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, + unsigned long addr, pte_t *ptep, pte_t entry, int dirty) { - pgste_t pgste; - pte_t oldpte; - - oldpte = *ptep; - if (pte_same(oldpte, entry)) + if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } + ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); + return 1; +} - ptep_flush_direct(vma->vm_mm, address, ptep); +/* + * Additional functions to handle KVM guest page tables + */ +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry); +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , int reset); +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, bool nq); +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - if (mm_has_pgste(vma->vm_mm)) { - if (pte_val(oldpte) & _PAGE_INVALID) - pgste_set_key(ptep, pgste, entry, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + if (mm_has_pgste(mm)) + ptep_set_pte_at(mm, addr, ptep, entry); + else *ptep = entry; - return 1; } /* @@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } -static inline void pmdp_flush_direct(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) -{ - int active, count; +pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return; - } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte_local(address, pmdp); - else - __pmdp_idte(address, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); -} +#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void pmdp_flush_lazy(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PGTABLE_DEPOSIT +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + pmd_t entry, int dirty) { - int active, count; + VM_BUG_ON(addr & ~HPAGE_MASK); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; - mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(address, pmdp); - else - __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); + if (pmd_val(*pmdp) == pmd_val(entry)) + return 0; + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry); + return 1; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); + pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd)); + return pmd_young(pmd); +} -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + VM_BUG_ON(addr & ~HPAGE_MASK); + return pmdp_test_and_clear_young(vma, addr, pmdp); +} static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) @@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return pmd; } -#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - pmd_t pmd; - - pmd = *pmdp; - pmdp_flush_direct(vma->vm_mm, address, pmdp); - *pmdp = pmd_mkold(pmd); - return pmd_young(pmd); -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; - - pmdp_flush_direct(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pmd_t *pmdp, int full) { - pmd_t pmd = *pmdp; - - if (!full) - pmdp_flush_lazy(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + if (full) { + pmd_t pmd = *pmdp; + *pmdp = __pmd(_SEGMENT_ENTRY_INVALID); + return pmd; + } + return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE static inline void pmdp_invalidate(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmdp_flush_direct(vma->vm_mm, address, pmdp); + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t pmd = *pmdp; - if (pmd_write(pmd)) { - pmdp_flush_direct(mm, address, pmdp); - set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); - } + if (pmd_write(pmd)) + pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd)); } static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 1c4fe12..d6fd22e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -184,6 +184,10 @@ struct task_struct; struct mm_struct; struct seq_file; +typedef int (*dump_trace_func_t)(void *data, unsigned long address); +void dump_trace(dump_trace_func_t func, void *data, + struct task_struct *task, unsigned long sp); + void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ @@ -203,6 +207,14 @@ unsigned long get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("la %0,0(15)" : "=a" (sp)); + return sp; +} + static inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 4b43ee7..fead491 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -31,7 +31,7 @@ * This should be totally fair - if anything is waiting, a process that wants a * lock will go to the back of the queue. When the currently active lock is * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consequtive readers at the + * that will be woken up; if there's a bunch of consecutive readers at the * front, then they'll all be woken up, but no other readers will be. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6983722..c0f0efb 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -101,6 +101,8 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); + extern void cmma_init(void); extern void (*_machine_restart)(char *command); diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index c82eb12..c988df7 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h @@ -1 +1,20 @@ -#include <asm-generic/xor.h> +/* + * Optimited xor routines + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#ifndef _ASM_S390_XOR_H +#define _ASM_S390_XOR_H + +extern struct xor_block_template xor_block_xc; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_xc); \ +} while (0) + +#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) + +#endif /* _ASM_S390_XOR_H */ diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h new file mode 100644 index 0000000..ab72d9d --- /dev/null +++ b/arch/s390/include/uapi/asm/clp.h @@ -0,0 +1,28 @@ +/* + * ioctl interface for /dev/clp + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_CLP_H +#define _ASM_CLP_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +struct clp_req { + unsigned int c : 1; + unsigned int r : 1; + unsigned int lps : 6; + unsigned int cmd : 8; + unsigned int : 16; + unsigned int reserved; + __u64 data_p; +}; + +#define CLP_IOCTL_MAGIC 'c' + +#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req) + +#endif diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index fe84bd5..347fe5a 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -154,6 +154,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) +#define KVM_SYNC_FPRS (1UL << 8) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,9 +169,12 @@ struct kvm_sync_regs { __u64 pft; /* pfault token [PFAULT] */ __u64 pfs; /* pfault select [PFAULT] */ __u64 pfc; /* pfault compare [PFAULT] */ - __u64 vrs[32][2]; /* vector registers */ + union { + __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */ + __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */ + }; __u8 reserved[512]; /* for future vector expansion */ - __u32 fpc; /* only valid with vector registers */ + __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ __u8 padding[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ }; diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index ee69c08..5dbaa72 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -7,6 +7,7 @@ { 0x9c, "DIAG (0x9c) time slice end directed" }, \ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ { 0x258, "DIAG (0x258) page-reference services" }, \ + { 0x288, "DIAG (0x288) watchdog functions" }, \ { 0x308, "DIAG (0x308) ipl functions" }, \ { 0x500, "DIAG (0x500) KVM virtio functions" }, \ { 0x501, "DIAG (0x501) KVM breakpoint" } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 53bbc9e..1f95cc1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> +#include <asm/gmap.h> /* * Make sure that the compiler is new enough. We want a compiler that diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 7f76891..7f48e56 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - pr_warning("The cpcmd kernel function failed to " - "allocate a response buffer\n"); + pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c890a55..aa12de7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - pr_warning("Root becomes the owner of all s390dbf files " - "in sysfs\n"); + pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); mutex_lock(&debug_mutex); @@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - pr_warning("%s is not a valid level for a debug " - "feature\n", str); + pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 62973ef..8cb9bfd 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); - /* - * Use four '%' characters below because of the - * following two conversions: - * - * 1) sprintf: %%%%r -> %%r - * 2) printk : %%r -> %r - */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%%%r%i", value); + ptr += sprintf(ptr, "%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%%%f%i", value); + ptr += sprintf(ptr, "%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%%%a%i", value); + ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%%%c%i", value); + ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%%%v%i", value); + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 02bd02f..1b6081c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/kdebug.h> #include <linux/ptrace.h> +#include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> #include <asm/processor.h> @@ -19,28 +20,28 @@ #include <asm/ipl.h> /* - * For show_trace we have tree different stack to consider: + * For dump_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown * - the asynchronous interrupt stack (cpu related) * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially + * The stack trace can start at any of the three stacks and can potentially * touch all of them. The order is: panic stack, async stack, sync stack. */ static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +__dump_trace(dump_trace_func_t func, void *data, unsigned long sp, + unsigned long low, unsigned long high) { struct stack_frame *sf; struct pt_regs *regs; - unsigned long addr; while (1) { if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { + if (func(data, sf->gprs[8])) + return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -48,46 +49,58 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - addr = regs->psw.addr; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + if (!user_mode(regs)) { + if (func(data, regs->psw.addr)) + return sp; + } low = sp; sp = regs->gprs[15]; } } -static void show_trace(struct task_struct *task, unsigned long *stack) +void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, + unsigned long sp) { - const unsigned long frame_size = - STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - register unsigned long __r15 asm ("15"); - unsigned long sp; + unsigned long frame_size; - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.panic_stack + frame_size - 4096, S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); else - __show_trace(sp, S390_lowcore.thread_info, + __dump_trace(func, data, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); +} +EXPORT_SYMBOL_GPL(dump_trace); + +static int show_address(void *data, unsigned long address) +{ + printk("([<%016lx>] %pSR)\n", address, (void *)address); + return 0; +} + +static void show_trace(struct task_struct *task, unsigned long sp) +{ + if (!sp) + sp = task ? task->thread.ksp : current_stack_pointer(); + printk("Call Trace:\n"); + dump_trace(show_address, NULL, task, sp); if (!task) task = current; debug_show_held_locks(task); @@ -95,15 +108,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack) void show_stack(struct task_struct *task, unsigned long *sp) { - register unsigned long *__r15 asm ("15"); unsigned long *stack; int i; - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - + stack = sp; + if (!stack) { + if (!task) + stack = (unsigned long *)current_stack_pointer(); + else + stack = (unsigned long *)task->thread.ksp; + } for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; @@ -112,7 +126,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) printk("%016lx ", *stack++); } printk("\n"); - show_trace(task, sp); + show_trace(task, (unsigned long)sp); } static void show_last_breaking_event(struct pt_regs *regs) @@ -121,13 +135,9 @@ static void show_last_breaking_event(struct pt_regs *regs) printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); } -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - void show_registers(struct pt_regs *regs) { + struct psw_bits *psw = &psw_bits(regs->psw); char *mode; mode = user_mode(regs) ? "User" : "Krnl"; @@ -136,13 +146,9 @@ void show_registers(struct pt_regs *regs) printk(" (%pSR)", (void *)regs->psw.addr); printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); + "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, + psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + printk(" RI:%x EA:%x", psw->ri, psw->eaba); printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", @@ -160,7 +166,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_trace(NULL, regs->gprs[15]); show_last_breaking_event(regs); } @@ -184,9 +190,8 @@ void die(struct pt_regs *regs, const char *str) #ifdef CONFIG_SMP printk("SMP "); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC"); printk("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c55576b..a0684de 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -448,7 +448,6 @@ void __init startup_init(void) rescue_initrd(); clear_bss_section(); init_kernel_storage_key(); - lockdep_init(); lockdep_off(); setup_lowcore_early(); setup_facility_list(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cd5a191..2d47f9c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -186,6 +186,7 @@ ENTRY(__switch_to) stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next + /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task @@ -1199,114 +1200,12 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 - clg %r9,BASED(.Lcleanup_save_fpu_regs_done) - jhe 5f - clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) - jhe 4f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) - jhe 2f - clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) - jhe 1f - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread -0: # Store floating-point controls - stfpc __THREAD_FPU_fpc(%r2) -1: # Load register save area and check if VX is active - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz 4f # no VX -> store FP regs -2: # Store vector registers (V0-V15) - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) -3: # Store vector registers (V16-V31) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j 5f # -> done, set CIF_FPU flag -4: # Store floating-point registers - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -5: # Set CIF_FPU flag - oi __LC_CPU_FLAGS+7,_CIF_FPU - lg %r9,48(%r11) # return from save_fpu_regs + larl %r9,save_fpu_regs br %r14 -.Lcleanup_save_fpu_fpc_end: - .quad .Lsave_fpu_regs_fpc_end -.Lcleanup_save_fpu_regs_vx_low: - .quad .Lsave_fpu_regs_vx_low -.Lcleanup_save_fpu_regs_vx_high: - .quad .Lsave_fpu_regs_vx_high -.Lcleanup_save_fpu_regs_fp: - .quad .Lsave_fpu_regs_fp -.Lcleanup_save_fpu_regs_done: - .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 - clg %r9,BASED(.Lcleanup_load_fpu_regs_done) - jhe 1f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) - jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 4f - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 2f # -> no VX, load FP regs -4: # Load V0 ..V15 registers - VLM %v0,%v15,0,%r4 -3: # Load V16..V31 registers - VLM %v16,%v31,256,%r4 - j 1f -2: # Load floating-point registers - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -1: # Clear CIF_FPU bit - ni __LC_CPU_FLAGS+7,255-_CIF_FPU - lg %r9,48(%r11) # return from load_fpu_regs + larl %r9,load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx: - .quad .Lload_fpu_regs_vx -.Lcleanup_load_fpu_regs_vx_high: - .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp: - .quad .Lload_fpu_regs_fp -.Lcleanup_load_fpu_regs_done: - .quad .Lload_fpu_regs_done /* * Integer constants diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index c5febe8..03c2b46 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFLE_FAC_LIST+6,0x80 # LPP available ? + tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f41d520..c373a1d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -164,8 +164,7 @@ void do_softirq_own_stack(void) { unsigned long old, new; - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); + old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147..58bf457 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Validate the counter that is assigned to this event. * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicity + * same time without constraints, it is not necessary to explicitly * validate event groups (event->group_leader != event). */ err = validate_event(hwc); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3d8da1e..1a43474 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) /* * A non-zero guest program parameter indicates a guest * sample. - * Note that some early samples might be misaccounted to - * the host. + * Note that some early samples or samples from guests without + * lpp usage would be misaccounted to the host. We use the asn + * value as a heuristic to detect most of these guest samples. + * If the value differs from the host hpp value, we assume + * it to be a KVM guest. */ - if (sfr->basic.gpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0943b11..c3e4099 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,67 +222,23 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -/* See also arch/s390/kernel/traps.c */ -static unsigned long __store_trace(struct perf_callchain_entry *entry, - unsigned long sp, - unsigned long low, unsigned long high) +static int __perf_callchain_kernel(void *data, unsigned long address) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } + struct perf_callchain_entry *entry = data; + + perf_callchain_store(entry, address); + return 0; } void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - unsigned long head, frame_size; - struct stack_frame *head_sf; - if (user_mode(regs)) return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame *) head; - - if (!head_sf || !head_sf->back_chain) - return; - - head = head_sf->back_chain; - head = __store_trace(entry, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __store_trace(entry, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]); } -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9220db5..d3f9688 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -327,6 +327,7 @@ static void __init setup_lowcore(void) + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, @@ -374,17 +375,17 @@ static void __init setup_lowcore(void) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata *standard_resources[] = { @@ -408,7 +409,7 @@ static void __init setup_resources(void) for_each_memblock(memory, reg) { res = alloc_bootmem_low(sizeof(*res)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; res->start = reg->base; @@ -779,6 +780,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "zEC12"); break; case 0x2964: + case 0x2965: strcpy(elf_platform, "z13"); break; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3c65a8e..40a6b4f 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); inc_irq_stat(CPU_RST); local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Upping and downing of CPUs */ diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 8f64ebd..44f84b2 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,78 +10,39 @@ #include <linux/kallsyms.h> #include <linux/module.h> -static unsigned long save_context_stack(struct stack_trace *trace, - unsigned long sp, - unsigned long low, - unsigned long high, - int savesched) +static int __save_address(void *data, unsigned long address, int nosched) { - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long addr; + struct stack_trace *trace = data; - while(1) { - if (sp < low || sp > high) - return sp; - sf = (struct stack_frame *)sp; - while(1) { - addr = sf->gprs[8]; - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *)sp; - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long)(sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *)sp; - addr = regs->psw.addr; - if (savesched || !in_sched_functions(addr)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - } - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = regs->gprs[15]; + if (nosched && in_sched_functions(address)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; } + if (trace->nr_entries < trace->max_entries) { + trace->entries[trace->nr_entries++] = address; + return 0; + } + return 1; } -static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) +static int save_address(void *data, unsigned long address) { - unsigned long new_sp, frame_size; + return __save_address(data, address, 0); +} - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - new_sp = save_context_stack(trace, sp, - S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size, 1); - new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size, 1); - save_context_stack(trace, new_sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 1); +static int save_address_nosched(void *data, unsigned long address) +{ + return __save_address(data, address, 1); } void save_stack_trace(struct stack_trace *trace) { - register unsigned long r15 asm ("15"); unsigned long sp; - sp = r15; - __save_stack_trace(trace, sp); + sp = current_stack_pointer(); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - unsigned long sp, low, high; + unsigned long sp; sp = tsk->thread.ksp; - if (tsk == current) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (sp)); - } - low = (unsigned long) task_stack_page(tsk); - high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 0); + if (tsk == current) + sp = current_stack_pointer(); + dump_trace(save_address_nosched, trace, tsk, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) unsigned long sp; sp = kernel_stack_pointer(regs); - __save_stack_trace(trace, sp); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 99f84ac31..9409d32 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -499,8 +499,7 @@ static void etr_reset(void) if (etr_port0_online && etr_port1_online) set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { - pr_warning("The real or virtual hardware system does " - "not provide an ETR interface\n"); + pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); etr_port0_online = etr_port1_online = 0; } } @@ -1433,7 +1432,7 @@ device_initcall(etr_init_sysfs); /* * Server Time Protocol (STP) code. */ -static int stp_online; +static bool stp_online; static struct stp_sstpi stp_info; static void *stp_page; @@ -1444,11 +1443,7 @@ static struct timer_list stp_timer; static int __init early_parse_stp(char *p) { - if (strncmp(p, "off", 3) == 0) - stp_online = 0; - else if (strncmp(p, "on", 2) == 0) - stp_online = 1; - return 0; + return kstrtobool(p, &stp_online); } early_param("stp", early_parse_stp); @@ -1464,8 +1459,7 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warning("The real or virtual hardware system does " - "not provide an STP interface\n"); + pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = 0; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 40b8102..64298a8 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -37,7 +37,7 @@ static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; -static int topology_enabled = 1; +static bool topology_enabled = true; static DECLARE_WORK(topology_work, topology_work_fn); /* @@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu) static int __init early_parse_topology(char *p) { - if (strncmp(p, "off", 3)) - return 0; - topology_enabled = 0; - return 0; + return kstrtobool(p, &topology_enabled); } early_param("topology", early_parse_topology); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 017eb03d..dd97a3e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -22,8 +22,6 @@ #include <asm/fpu/api.h> #include "entry.h" -int show_unhandled_signals = 1; - static inline void __user *get_trap_ip(struct pt_regs *regs) { unsigned long address; @@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) return (void __user *) (address - (regs->int_code >> 16)); } -static inline void report_user_fault(struct pt_regs *regs, int signr) -{ - if ((task_pid_nr(current) > 1) && !show_unhandled_signals) - return; - if (!unhandled_signal(current, signr)) - return; - if (!printk_ratelimit()) - return; - printk("User process fault: interruption code %04x ilc:%d ", - regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr("in ", regs->psw.addr); - printk("\n"); - show_regs(regs); -} - int is_valid_bugaddr(unsigned long addr) { return 1; @@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) info.si_code = si_code; info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); - report_user_fault(regs, si_signo); + report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr); @@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap); void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 0); do_exit(SIGSEGV); } else die(regs, "Unknown program exception"); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 05f7de9..1ea4095 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -14,6 +14,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d30db40..66938d2 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, - int write) + enum gacc_mode mode) { union alet alet; struct ale ale; @@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, } } - if (ale.fo == 1 && write) + if (ale.fo == 1 && mode == GACC_STORE) return PGM_PROTECTION; asce->val = aste.asce; @@ -477,25 +477,28 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, int write) + ar_t ar, enum gacc_mode mode) { int rc; - psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec_bits; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; + tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw.as; - if (!psw_bits(*psw).t) { + if (!psw.t) { asce->val = 0; asce->r = 1; return 0; } - switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + if (mode == GACC_IFETCH) + psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + + switch (psw.as) { case PSW_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; @@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, asce->val = vcpu->arch.sie_block->gcr[13]; return 0; case PSW_AS_ACCREG: - rc = ar_translation(vcpu, asce, ar, write); + rc = ar_translation(vcpu, asce, ar, mode); switch (rc) { case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored * @asce: effective asce - * @write: indicates if access is a write access + * @mode: indicates the access mode to be used * * Translate a guest virtual address into a guest absolute address by means * of dynamic address translation as specified by the architecture. @@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, unsigned long *gpa, const union asce asce, - int write) + enum gacc_mode mode) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, real_address: raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); absolute_address: - if (write && dat_protection) + if (mode == GACC_STORE && dat_protection) return PGM_PROTECTION; if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; @@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - const union asce asce, int write) + const union asce asce, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; - if (write && lap_enabled && is_low_address(ga)) { + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { pgm->code = PGM_PROTECTION; return pgm->code; } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, asce, write); + rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write) + unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long _len, nr_pages, gpa, idx; @@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode == GACC_STORE) rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); else rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); @@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write) + void *data, unsigned long len, enum gacc_mode mode) { unsigned long _len, gpa; int rc = 0; @@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode) rc = write_guest_abs(vcpu, gpa, data, _len); else rc = read_guest_abs(vcpu, gpa, data, _len); @@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * has to take care of this. */ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long *gpa, int write) + unsigned long *gpa, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (write) { + if (mode == GACC_STORE) { rc = pgm->code = PGM_PROTECTION; return rc; } } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, write); + rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; @@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, * check_gva_range - test a range of guest virtual addresses for accessibility */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write) + unsigned long length, enum gacc_mode mode) { unsigned long gpa; unsigned long currlen; @@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, ipte_lock(vcpu); while (length > 0 && !rc) { currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); gva += currlen; length -= currlen; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index ef03726..df0a79d 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return kvm_read_guest(vcpu->kvm, gpa, data, len); } +enum gacc_mode { + GACC_FETCH, + GACC_STORE, + GACC_IFETCH, +}; + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - ar_t ar, unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, enum gacc_mode mode); int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write); + unsigned long length, enum gacc_mode mode); int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write); + unsigned long len, enum gacc_mode mode); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write); + void *data, unsigned long len, enum gacc_mode mode); /** * write_guest - copy data from kernel space to guest space @@ -215,7 +221,7 @@ static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, GACC_STORE); } /** @@ -235,7 +241,27 @@ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); +} + +/** + * read_guest_instr - copy instruction data from guest space to kernel space + * @vcpu: virtual cpu + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from the current psw address (guest space) to @data (kernel + * space). + * + * The behaviour of read_guest_instr is identical to read_guest, except that + * instruction data will be read from primary space when in home-space or + * address-space mode. + */ +static inline __must_check +int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) +{ + return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, + GACC_IFETCH); } /** diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d697312..e8c6843 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -17,7 +17,7 @@ /* * Extends the address range given by *start and *stop to include the address * range starting with estart and the length len. Takes care of overflowing - * intervals and tries to minimize the overall intervall size. + * intervals and tries to minimize the overall interval size. */ static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) { @@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu) return; /* - * If the guest is not interrested in branching events, we can savely + * If the guest is not interested in branching events, we can safely * limit them to the PER address range. */ if (!(*cr9 & PER_EVENT_BRANCH)) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index d53c107..2e6b54e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + u8 ilen = 0; - /* Use the length of the EXECUTE instruction if necessary */ - if (sie_block->icptstatus & 1) { - ilc = (sie_block->icptstatus >> 4) & 0x6; - if (!ilc) - ilc = 4; + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* instruction only stored for these icptcodes */ + ilen = insn_length(vcpu->arch.sie_block->ipa >> 8); + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilen = (sie_block->icptstatus >> 4) & 0x6; + if (!ilen) + ilen = 4; + } + break; + case ICPT_PROGI: + /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */ + ilen = vcpu->arch.sie_block->pgmilc & 0x6; + break; } - sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); + return ilen; } static int handle_noop(struct kvm_vcpu *vcpu) @@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } -static void __extract_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) { - memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); - pgm_info->code = vcpu->arch.sie_block->iprcc; + struct kvm_s390_pgm_info pgm_info = { + .code = vcpu->arch.sie_block->iprcc, + /* the PSW has already been rewound */ + .flags = KVM_S390_PGM_FLAGS_NO_REWIND, + }; switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { case PGM_AFX_TRANSLATION: @@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; - pgm_info->op_access_id = vcpu->arch.sie_block->oai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.op_access_id = vcpu->arch.sie_block->oai; break; case PGM_MONITOR: - pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; - pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info.mon_code = vcpu->arch.sie_block->tecmc; break; case PGM_VECTOR_PROCESSING: case PGM_DATA: - pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + pgm_info.data_exc_code = vcpu->arch.sie_block->dxc; break; case PGM_PROTECTION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; default: break; } if (vcpu->arch.sie_block->iprcc & PGM_PER) { - pgm_info->per_code = vcpu->arch.sie_block->perc; - pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; - pgm_info->per_address = vcpu->arch.sie_block->peraddr; - pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + pgm_info.per_code = vcpu->arch.sie_block->perc; + pgm_info.per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info.per_address = vcpu->arch.sie_block->peraddr; + pgm_info.per_access_id = vcpu->arch.sie_block->peraid; } + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } /* @@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu) static int handle_prog(struct kvm_vcpu *vcpu) { - struct kvm_s390_pgm_info pgm_info; psw_t psw; int rc; @@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) if (rc) return rc; - __extract_prog_irq(vcpu, &pgm_info); - return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + return inject_prog_on_prog_intercept(vcpu); } /** @@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, 0); + reg2, &srcaddr, GACC_FETCH); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, 1); + reg1, &dstaddr, GACC_STORE); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); if (rc != 0) return rc; - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f88ca72..84efc2b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -23,6 +23,7 @@ #include <asm/uaccess.h> #include <asm/sclp.h> #include <asm/isc.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -182,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) { - return (vcpu->arch.sie_block->cputm >> 63) && - cpu_timer_interrupts_enabled(vcpu); + if (!cpu_timer_interrupts_enabled(vcpu)) + return 0; + return kvm_s390_get_cpu_timer(vcpu) >> 63; } static inline int is_ioirq(unsigned long irq_type) @@ -335,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) set_intercept_indicators_stop(vcpu); } -static u16 get_ilc(struct kvm_vcpu *vcpu) -{ - switch (vcpu->arch.sie_block->icptcode) { - case ICPT_INST: - case ICPT_INSTPROGI: - case ICPT_OPEREXC: - case ICPT_PARTEXEC: - case ICPT_IOINST: - /* last instruction only stored for these icptcodes */ - return insn_length(vcpu->arch.sie_block->ipa >> 8); - case ICPT_PROGI: - return vcpu->arch.sie_block->pgmilc; - default: - return 0; - } -} - static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -588,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; int rc = 0, nullifying = false; - u16 ilc = get_ilc(vcpu); + u16 ilen; spin_lock(&li->lock); pgm_info = li->irq.pgm; @@ -596,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", - pgm_info.code, ilc); + ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", + pgm_info.code, ilen); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info.code, 0); @@ -681,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } - if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) - kvm_s390_rewind_psw(vcpu, ilc); + if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND)) + kvm_s390_rewind_psw(vcpu, ilen); - rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + /* bit 1+2 of the target are the ilc, so we can directly use ilen */ + rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, @@ -923,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } +static u64 __calculate_sltime(struct kvm_vcpu *vcpu) +{ + u64 now, cputm, sltime = 0; + + if (ckc_interrupts_enabled(vcpu)) { + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + /* already expired or overflow? */ + if (!sltime || vcpu->arch.sie_block->ckc <= now) + return 0; + if (cpu_timer_interrupts_enabled(vcpu)) { + cputm = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (cputm >> 63) + return 0; + return min(sltime, tod_to_ns(cputm)); + } + } else if (cpu_timer_interrupts_enabled(vcpu)) { + sltime = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (sltime >> 63) + return 0; + } + return sltime; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { - u64 now, sltime; + u64 sltime; vcpu->stat.exit_wait_state++; @@ -938,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - if (!ckc_interrupts_enabled(vcpu)) { + if (!ckc_interrupts_enabled(vcpu) && + !cpu_timer_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); __set_cpu_idle(vcpu); goto no_timer; } - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - - /* underflow */ - if (vcpu->arch.sie_block->ckc < now) + sltime = __calculate_sltime(vcpu); + if (!sltime) return 0; __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -966,13 +977,13 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - if (waitqueue_active(&vcpu->wq)) { + if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ vcpu->preempted = true; - wake_up_interruptible(&vcpu->wq); + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -980,18 +991,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - u64 now, sltime; + u64 sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + sltime = __calculate_sltime(vcpu); /* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks. */ - if (vcpu->arch.sie_block->ckc > now && - hrtimer_forward_now(timer, ns_to_ktime(sltime))) + if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART; kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; @@ -1059,8 +1068,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, irq->u.pgm.code, 0); + if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { + /* auto detection if no valid ILC was given */ + irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK; + irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu); + irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID; + } + if (irq->u.pgm.code == PGM_PER) { li->irq.pgm.code |= PGM_PER; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify PER related information */ li->irq.pgm.per_address = irq->u.pgm.per_address; li->irq.pgm.per_code = irq->u.pgm.per_code; @@ -1069,6 +1086,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) } else if (!(irq->u.pgm.code & PGM_PER)) { li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | irq->u.pgm.code; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify non-PER information */ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; li->irq.pgm.mon_code = irq->u.pgm.mon_code; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4af21c7..668c087 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <asm/lowcore.h> #include <asm/etr.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> #include <asm/isc.h> @@ -158,6 +159,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.sie_block->epoch -= *delta; + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start += *delta; } } return NOTIFY_OK; @@ -274,16 +277,17 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, unsigned long address; struct gmap *gmap = kvm->arch.gmap; - down_read(&gmap->mm->mmap_sem); /* Loop over all guest pages */ last_gfn = memslot->base_gfn + memslot->npages; for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (gmap_test_and_clear_dirty(address, gmap)) + if (test_and_clear_guest_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); + if (fatal_signal_pending(current)) + return; + cond_resched(); } - up_read(&gmap->mm->mmap_sem); } /* Section: vm related */ @@ -352,8 +356,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (MACHINE_HAS_VX) { - set_kvm_facility(kvm->arch.model.fac->mask, 129); - set_kvm_facility(kvm->arch.model.fac->list, 129); + set_kvm_facility(kvm->arch.model.fac_mask, 129); + set_kvm_facility(kvm->arch.model.fac_list, 129); r = 0; } else r = -EINVAL; @@ -367,8 +371,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (test_facility(64)) { - set_kvm_facility(kvm->arch.model.fac->mask, 64); - set_kvm_facility(kvm->arch.model.fac->list, 64); + set_kvm_facility(kvm->arch.model.fac_mask, 64); + set_kvm_facility(kvm->arch.model.fac_list, 64); r = 0; } mutex_unlock(&kvm->lock); @@ -651,7 +655,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, sizeof(struct cpuid)); kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->list, proc->fac_list, + memcpy(kvm->arch.model.fac_list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -685,7 +689,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -705,7 +710,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp.ibc; - memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); @@ -1082,16 +1087,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) cpu_id->version = 0xff; } -static int kvm_s390_crypto_init(struct kvm *kvm) +static void kvm_s390_crypto_init(struct kvm *kvm) { if (!test_kvm_facility(kvm, 76)) - return 0; - - kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), - GFP_KERNEL | GFP_DMA); - if (!kvm->arch.crypto.crycb) - return -ENOMEM; + return; + kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); /* Enable AES/DEA protected key functions by default */ @@ -1101,8 +1102,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm) sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); - - return 0; } static void sca_dispose(struct kvm *kvm) @@ -1156,37 +1155,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_err; - /* - * The architectural maximum amount of facilities is 16 kbit. To store - * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the guest facility list (arch.model.fac->list) and the - * facility mask (arch.model.fac->mask). Its address size has to be - * 31 bits and word aligned. - */ - kvm->arch.model.fac = - (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!kvm->arch.model.fac) + kvm->arch.sie_page2 = + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!kvm->arch.sie_page2) goto out_err; /* Populate the facility mask initially. */ - memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, + memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->mask[i] = 0UL; + kvm->arch.model.fac_mask[i] = 0UL; } /* Populate the facility list initially. */ - memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; + memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); kvm->arch.model.ibc = sclp.ibc & 0x0fff; - if (kvm_s390_crypto_init(kvm) < 0) - goto out_err; + kvm_s390_crypto_init(kvm); spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) @@ -1222,8 +1214,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; out_err: - kfree(kvm->arch.crypto.crycb); - free_page((unsigned long)kvm->arch.model.fac); + free_page((unsigned long)kvm->arch.sie_page2); debug_unregister(kvm->arch.dbf); sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); @@ -1269,10 +1260,9 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); - free_page((unsigned long)kvm->arch.model.fac); sca_dispose(kvm); debug_unregister(kvm->arch.dbf); - kfree(kvm->arch.crypto.crycb); + free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); @@ -1414,8 +1404,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_PFAULT; if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; - if (test_kvm_facility(vcpu->kvm, 129)) + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1423,6 +1418,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start != 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.cputm_start = get_tod_clock_fast(); + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start == 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; + vcpu->arch.cputm_start = 0; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_enabled); + vcpu->arch.cputm_enabled = true; + __start_cpu_timer_accounting(vcpu); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!vcpu->arch.cputm_enabled); + __stop_cpu_timer_accounting(vcpu); + vcpu->arch.cputm_enabled = false; +} + +static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __enable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __disable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +/* set the cpu timer - may only be called from the VCPU thread itself */ +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start = get_tod_clock_fast(); + vcpu->arch.sie_block->cputm = cputm; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); + preempt_enable(); +} + +/* update and get the cpu timer - can also be called from other VCPU threads */ +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) +{ + unsigned int seq; + __u64 value; + + if (unlikely(!vcpu->arch.cputm_enabled)) + return vcpu->arch.sie_block->cputm; + + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + do { + seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); + /* + * If the writer would ever execute a read in the critical + * section, e.g. in irq context, we have a deadlock. + */ + WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); + value = vcpu->arch.sie_block->cputm; + /* if cputm_start is 0, accounting is being started/stopped */ + if (likely(vcpu->arch.cputm_start)) + value -= get_tod_clock_fast() - vcpu->arch.cputm_start; + } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); + preempt_enable(); + return value; +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ @@ -1430,10 +1512,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; - /* Depending on MACHINE_HAS_VX, data stored to vrs either - * has vector register or floating point register format. - */ - current->thread.fpu.regs = vcpu->run->s.regs.vrs; + if (MACHINE_HAS_VX) + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + else + current->thread.fpu.regs = vcpu->run->s.regs.fprs; current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ @@ -1443,10 +1525,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __start_cpu_timer_accounting(vcpu); + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + vcpu->cpu = -1; + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); @@ -1468,7 +1556,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; kvm_s390_set_prefix(vcpu, 0); - vcpu->arch.sie_block->cputm = 0UL; + kvm_s390_set_cpu_timer(vcpu, 0); vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); @@ -1538,7 +1626,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.cpu_id = model->cpu_id; vcpu->arch.sie_block->ibc = model->ibc; - vcpu->arch.sie_block->fac = (int) (long) model->fac->list; + if (test_kvm_facility(vcpu->kvm, 7)) + vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) @@ -1616,6 +1705,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + seqcount_init(&vcpu->arch.cputm_seqcount); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -1715,7 +1805,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = put_user(vcpu->arch.sie_block->cputm, + r = put_user(kvm_s390_get_cpu_timer(vcpu), (u64 __user *)reg->addr); break; case KVM_REG_S390_CLOCK_COMP: @@ -1753,6 +1843,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = -EINVAL; + __u64 val; switch (reg->id) { case KVM_REG_S390_TODPR: @@ -1764,8 +1855,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = get_user(vcpu->arch.sie_block->cputm, - (u64 __user *)reg->addr); + r = get_user(val, (u64 __user *)reg->addr); + if (!r) + kvm_s390_set_cpu_timer(vcpu, val); break; case KVM_REG_S390_CLOCK_COMP: r = get_user(vcpu->arch.sie_block->ckc, @@ -2158,8 +2250,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - u8 opcode; + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_ADDRESSING, + }; + u8 opcode, ilen; int rc; VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); @@ -2173,12 +2267,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - psw->addr = __rewind_psw(*psw, -insn_length(opcode)); - - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = read_guest_instr(vcpu, &opcode, 1); + ilen = insn_length(opcode); + if (rc < 0) { + return rc; + } else if (rc) { + /* Instruction-Fetching Exceptions - we can't detect the ilen. + * Forward by arbitrary ilc, injection will take care of + * nullification if necessary. + */ + pgm_info = vcpu->arch.pgm; + ilen = 4; + } + pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; + kvm_s390_forward_psw(vcpu, ilen); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -2244,10 +2347,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ local_irq_disable(); __kvm_guest_enter(); + __disable_cpu_timer_accounting(vcpu); local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); local_irq_disable(); + __enable_cpu_timer_accounting(vcpu); __kvm_guest_exit(); local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2271,7 +2376,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { - vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; + kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; @@ -2293,7 +2398,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); - kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; + kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; @@ -2325,6 +2430,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } sync_regs(vcpu, kvm_run); + enable_cpu_timer_accounting(vcpu); might_fault(); rc = __vcpu_run(vcpu); @@ -2344,6 +2450,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } + disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); if (vcpu->sigset_active) @@ -2364,7 +2471,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) unsigned char archmode = 1; freg_t fprs[NUM_FPRS]; unsigned int px; - u64 clkcomp; + u64 clkcomp, cputm; int rc; px = kvm_s390_get_prefix(vcpu); @@ -2381,12 +2488,12 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) /* manually convert vector registers if necessary */ if (MACHINE_HAS_VX) { - convert_vx_to_fp(fprs, current->thread.fpu.vxrs); + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, fprs, 128); } else { rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, - vcpu->run->s.regs.vrs, 128); + vcpu->run->s.regs.fprs, 128); } rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); @@ -2398,8 +2505,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) &vcpu->run->s.regs.fpc, 4); rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); + cputm = kvm_s390_get_cpu_timer(vcpu); rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, - &vcpu->arch.sie_block->cputm, 8); + &cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); @@ -2605,7 +2713,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_FETCH); break; } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); @@ -2616,7 +2725,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_STORE); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index df1abad..8621ab0 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,6 +19,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/facility.h> +#include <asm/processor.h> typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); @@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } +static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; +} + static inline int kvm_is_ucontrol(struct kvm *kvm) { #ifdef CONFIG_KVM_S390_UCONTROL @@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) /* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->mask) && - __test_facility(nr, kvm->arch.model.fac->list); + return __test_facility(nr, kvm->arch.model.fac_mask) && + __test_facility(nr, kvm->arch.model.fac_list); } static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) @@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm, int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu); int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen); +} +static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) +{ + kvm_s390_rewind_psw(vcpu, -ilen); +} +static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) +{ + kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); +} /* implemented in priv.c */ int is_valid_psw(psw_t *psw); @@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed74e86..0a1591d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -23,6 +23,7 @@ #include <asm/sysinfo.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> @@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu) if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -354,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->list >> 32; + fac = *vcpu->kvm->arch.model.fac_list >> 32; rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) @@ -759,8 +760,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Rewind PSW to repeat the ESSA instruction */ - kvm_s390_rewind_psw(vcpu, 4); + /* Retry the ESSA instruction */ + kvm_s390_retry_instr(vcpu); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); @@ -981,11 +982,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, + GACC_FETCH); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 0e8fefe..1d1af31 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o -obj-y += mem.o +obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c new file mode 100644 index 0000000..7d94e3e --- /dev/null +++ b/arch/s390/lib/xor.c @@ -0,0 +1,134 @@ +/* + * Optimized xor_block operation for RAID4/5 + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/raid/xor.h> + +static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + "3:\n" + : : "d" (bytes), "a" (p1), "a" (p2) + : "0", "1", "cc", "memory"); +} + +static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + /* Get around a gcc oddity */ + register unsigned long *reg7 asm ("7") = p5; + + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " xc 0(256,%1),0(%5)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " la %5,256(%5)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " ex %0,18(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + " xc 0(1,%1),0(%5)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + "+a" (reg7) + : : "0", "1", "cc", "memory"); +} + +struct xor_block_template xor_block_xc = { + .name = "xc", + .do_2 = xor_xc_2, + .do_3 = xor_xc_3, + .do_4 = xor_xc_4, + .do_5 = xor_xc_5, +}; +EXPORT_SYMBOL(xor_block_xc); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 839592c..2ae54ca 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,9 +2,11 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o +obj-y += pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PGSTE) += gmap.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index a1bf4ad..02042b6 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg) goto out_free; } if (diag_cc > 1) { - pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); + pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } @@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_resource; } if (diag_cc > 1) { - pr_warning("Loading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); @@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } if (atomic_read (&seg->ref_count) != 1) { - pr_warning("DCSS %s is in use and cannot be reloaded\n", - name); + pr_warn("DCSS %s is in use and cannot be reloaded\n", name); rc = -EAGAIN; goto out_unlock; } @@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared) seg->res->flags |= IORESOURCE_READONLY; if (request_resource(&iomem_resource, seg->res)) { - pr_warning("DCSS %s overlaps with used memory resources " - "and cannot be reloaded\n", name); + pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n", + name); rc = -EBUSY; kfree(seg->res); goto out_del_mem; @@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared) goto out_del_res; } if (diag_cc > 1) { - pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Reloading DCSS %s failed with rc=%ld\n", + name, end_addr); rc = dcss_diag_translate_rc(end_addr); goto out_del_res; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 791a414..cce577f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include <asm/asm-offsets.h> #include <asm/diag.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/facility.h> @@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs) { unsigned long asce; + pr_alert("Failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); pr_alert("Fault in "); switch (regs->int_parm_long & 3) { case 3: @@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs) dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); } -static inline void report_user_fault(struct pt_regs *regs, long signr) +int show_unhandled_signals = 1; + +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); - dump_fault_info(regs); + if (is_mm_fault) + dump_fault_info(regs); show_regs(regs); } @@ -244,7 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 1); si.si_signo = SIGSEGV; si.si_code = si_code; si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); @@ -272,8 +276,6 @@ static noinline void do_no_context(struct pt_regs *regs) else printk(KERN_ALERT "Unable to handle kernel paging request" " in virtual user address space\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c new file mode 100644 index 0000000..69247b4 --- /dev/null +++ b/arch/s390/mm/gmap.c @@ -0,0 +1,774 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/swapops.h> +#include <linux/ksm.h> +#include <linux/mman.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * @limit: maximum size of the gmap address space + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + unsigned long etype, atype; + + if (limit < (1UL << 31)) { + limit = (1UL << 31) - 1; + atype = _ASCE_TYPE_SEGMENT; + etype = _SEGMENT_ENTRY_EMPTY; + } else if (limit < (1UL << 42)) { + limit = (1UL << 42) - 1; + atype = _ASCE_TYPE_REGION3; + etype = _REGION3_ENTRY_EMPTY; + } else if (limit < (1UL << 53)) { + limit = (1UL << 53) - 1; + atype = _ASCE_TYPE_REGION2; + etype = _REGION2_ENTRY_EMPTY; + } else { + limit = -1UL; + atype = _ASCE_TYPE_REGION1; + etype = _REGION1_ENTRY_EMPTY; + } + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + goto out_free; + page->index = 0; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, etype); + gmap->table = table; + gmap->asce = atype | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + gmap->asce_end = limit; + down_write(&mm->mmap_sem); + list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); +} + +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) + __free_pages(page, 2); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); + list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) + __free_pages(page, 2); + return 0; +} + +/** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset, mask; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @to: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_map_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * @len: length of the memory area to map + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len < from || to + len < to || + from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) { + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; + } + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + if (off >= len) + return 0; + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/** + * __gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(gmap, gaddr); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +{ + struct mm_struct *mm; + unsigned long *table; + spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + int rc; + + /* Create higher level tables in the gmap page table */ + table = gmap->table; + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { + table += (gaddr >> 53) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { + table += (gaddr >> 42) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { + table += (gaddr >> 31) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + table += (gaddr >> 20) & 0x7ff; + /* Walk the parent mm page table */ + mm = gmap->mm; + pgd = pgd_offset(mm, vmaddr); + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* Link gmap segment table entry location to page table. */ + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; +} + +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + */ +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + unsigned long vmaddr; + int rc; + bool unlocked; + + down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { + rc = -EFAULT; + goto out_up; + } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + spinlock_t *ptl; + pte_t *ptep; + + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (vmaddr) { + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); + if (likely(ptep)) + ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); + pte_unmap_unlock(ptep, ptl); + } +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +{ + unsigned long gaddr, vmaddr, size; + struct vm_area_struct *vma; + + down_read(&gmap->mm->mmap_sem); + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + continue; + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); + size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); + zap_page_range(vma, vmaddr, size, NULL); + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep; + bool unlocked; + int rc = 0; + + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + unlocked = false; + /* Convert gmap address and connect the page tables */ + addr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { + rc = -EFAULT; + break; + } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + VM_BUG_ON(!ptep); + /* Set notification bit in the pgste of the pte */ + if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + ptep_set_notify(gmap->mm, addr, ptep); + gaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + pte_unmap_unlock(ptep, ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * ptep_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +{ + unsigned long offset, gaddr; + unsigned long *table; + struct gmap_notifier *nb; + struct gmap *gmap; + + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); + spin_lock(&gmap_notifier_lock); + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(gmap, gaddr); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(ptep_notify); + +static inline void thp_split_mm(struct mm_struct *mm) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct vm_area_struct *vma; + unsigned long addr; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (addr = vma->vm_start; + addr < vma->vm_end; + addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +#endif +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct mm_struct *mm = current->mm; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(mm)) + return 0; + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; + down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + up_write(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +static int __s390_enable_skey(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) + ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); + /* Clear storage key */ + ptep_zap_key(walk->mm, addr, pte); + return 0; +} + +int s390_enable_skey(void) +{ + struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; + + down_write(&mm->mmap_sem); + if (mm_use_skey(mm)) + goto out_up; + + mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + +out_up: + up_write(&mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + +/* + * Reset CMMA state, make all pages stable again. + */ +static int __s390_reset_cmma(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + ptep_zap_unused(walk->mm, addr, pte, 1); + return 0; +} + +void s390_reset_cmma(struct mm_struct *mm) +{ + struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; + + down_write(&mm->mmap_sem); + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(s390_reset_cmma); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f81096b..1b5e898 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pmd_t *pmdp = (pmd_t *) ptep; - pte_t pte = huge_ptep_get(ptep); + pmd_t old; - pmdp_flush_direct(mm, addr, pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; - return pte; + old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return __pmd_to_pte(old); } pte_t *huge_pte_alloc(struct mm_struct *mm, diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 749c984..f2a5c29 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr) static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { - pte_t *ptep, pte; + pte_t *ptep; int i; for (i = 0; i < numpages; i++) { ptep = walk_page_table(addr); if (WARN_ON_ONCE(!ptep)) break; - pte = *ptep; - pte = set(pte); - __ptep_ipte(addr, ptep); - *ptep = pte; + *ptep = set(*ptep); addr += PAGE_SIZE; } + __tlb_flush_kernel(); } int set_memory_ro(unsigned long addr, int numpages) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c new file mode 100644 index 0000000..f6c3de2 --- /dev/null +++ b/arch/s390/mm/pgalloc.c @@ -0,0 +1,360 @@ +/* + * Page table allocation functions + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_PGSTE + +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + +#endif /* CONFIG_PGSTE */ + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, 2); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, 2); +} + +static void __crst_table_upgrade(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } + __tlb_flush_local(); +} + +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +{ + unsigned long *table, *pgd; + unsigned long entry; + int flush; + + BUG_ON(limit > TASK_MAX_SIZE); + flush = 0; +repeat: + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); + if (mm->context.asce_limit < limit) { + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit <= (1UL << 31)) { + entry = _REGION3_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + } else { + entry = _REGION2_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 53; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION2; + } + crst_table_init(table, entry); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; + table = NULL; + flush = 1; + } + spin_unlock_bh(&mm->page_table_lock); + if (table) + crst_table_free(mm, table); + if (mm->context.asce_limit < limit) + goto repeat; + if (flush) + on_each_cpu(__crst_table_upgrade, mm, 0); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +{ + pgd_t *pgd; + + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } + while (mm->context.asce_limit > limit) { + pgd = mm->pgd; + switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { + case _REGION_ENTRY_TYPE_R2: + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + break; + case _REGION_ENTRY_TYPE_R3: + mm->context.asce_limit = 1UL << 31; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_SEGMENT; + break; + default: + BUG(); + } + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + } + if (current->active_mm == mm) + set_user_asce(mm); +} + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm) +{ + unsigned long *table; + struct page *page; + unsigned int mask, bit; + + /* Try to get a fragment of a 4K page as a 2K page table */ + if (!mm_alloc_pgste(mm)) { + table = NULL; + spin_lock_bh(&mm->context.list_lock); + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + mask = atomic_read(&page->_mapcount); + mask = (mask | (mask >> 4)) & 3; + if (mask != 3) { + table = (unsigned long *) page_to_phys(page); + bit = mask & 1; /* =1 -> second 2K */ + if (bit) + table += PTRS_PER_PTE; + atomic_xor_bits(&page->_mapcount, 1U << bit); + list_del(&page->lru); + } + } + spin_unlock_bh(&mm->context.list_lock); + if (table) + return table; + } + /* Allocate a fresh page */ + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + /* Initialize page table */ + table = (unsigned long *) page_to_phys(page); + if (mm_alloc_pgste(mm)) { + /* Return 4K page table with PGSTEs */ + atomic_set(&page->_mapcount, 3); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } else { + /* Return the first 2K fragment of the page */ + atomic_set(&page->_mapcount, 1); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + } + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (!mm_alloc_pgste(mm)) { + /* Free 2K page table fragment of a 4K page */ + bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + if (mask & 3) + list_add(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (mask != 0) + return; + } + + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (mm_alloc_pgste(mm)) { + gmap_unlink(mm, table, vmaddr); + table = (unsigned long *) (__pa(table) | 3); + tlb_remove_table(tlb, table); + return; + } + bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + if (mask & 3) + list_add_tail(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (1U << bit)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + unsigned int mask = (unsigned long) _table & 3; + void *table = (void *)((unsigned long) _table ^ mask); + struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + + switch (mask) { + case 0: /* pmd or pud */ + free_pages((unsigned long) table, 2); + break; + case 1: /* lower 2K of a 4K page table */ + case 2: /* higher 2K of a 4K page table */ + if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + break; + /* fallthrough */ + case 3: /* 4K page table with pgstes */ + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + break; + } +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5109827..4324b87 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,591 +24,397 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -unsigned long *crst_table_alloc(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 2); - - if (!page) - return NULL; - return (unsigned long *) page_to_phys(page); +static inline pte_t ptep_flush_direct(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(addr, ptep); + else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static inline pte_t ptep_flush_lazy(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - free_pages((unsigned long) table, 2); + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -static void __crst_table_upgrade(void *arg) +static inline pgste_t pgste_get_lock(pte_t *ptep) { - struct mm_struct *mm = arg; + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; - if (current->active_mm == mm) { - clear_user_asce(); - set_user_asce(mm); - } - __tlb_flush_local(); + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); +#endif + return __pgste(new); } -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { - unsigned long *table, *pgd; - unsigned long entry; - int flush; - - BUG_ON(limit > TASK_MAX_SIZE); - flush = 0; -repeat: - table = crst_table_alloc(mm); - if (!table) - return -ENOMEM; - spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } - spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); + preempt_enable(); +#endif } -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_get(pte_t *ptep) { - pgd_t *pgd; - - if (current->active_mm == mm) { - clear_user_asce(); - __tlb_flush_mm(mm); - } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } - if (current->active_mm == mm) - set_user_asce(mm); + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); } +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ #ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} -/** - * gmap_alloc - allocate a guest address space - * @mm: pointer to the parent mm_struct - * @limit: maximum address of the gmap address space - * - * Returns a guest address space structure. - */ -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, + struct mm_struct *mm) { - struct gmap *gmap; - struct page *page; - unsigned long *table; - unsigned long etype, atype; - - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; - atype = _ASCE_TYPE_SEGMENT; - etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; - atype = _ASCE_TYPE_REGION3; - etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; - atype = _ASCE_TYPE_REGION2; - etype = _REGION2_ENTRY_EMPTY; - } else { - limit = -1UL; - atype = _ASCE_TYPE_REGION1; - etype = _REGION1_ENTRY_EMPTY; - } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); - if (!gmap) - goto out; - INIT_LIST_HEAD(&gmap->crst_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - spin_lock_init(&gmap->guest_table_lock); - gmap->mm = mm; - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); - crst_table_init(table, etype); - gmap->table = table; - gmap->asce = atype | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(table); - gmap->asce_end = limit; - down_write(&mm->mmap_sem); - list_add(&gmap->list, &mm->context.gmap_list); - up_write(&mm->mmap_sem); - return gmap; - -out_free: - kfree(gmap); -out: - return NULL; +#ifdef CONFIG_PGSTE + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) + return pgste; + address = pte_val(pte) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + } -EXPORT_SYMBOL_GPL(gmap_alloc); -static void gmap_flush_tlb(struct gmap *gmap) +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) { - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif } -static void gmap_radix_tree_free(struct radix_tree_root *root) +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { - struct radix_tree_iter iter; - unsigned long indices[16]; - unsigned long index; - void **slot; - int i, nr; - - /* A radix tree is freed by deleting all of its entries */ - index = 0; - do { - nr = 0; - radix_tree_for_each_slot(slot, root, &iter, index) { - indices[nr] = iter.index; - if (++nr == 16) - break; - } - for (i = 0; i < nr; i++) { - index = indices[i]; - radix_tree_delete(root, index); +#ifdef CONFIG_PGSTE + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } - } while (nr > 0); + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; + } +#endif + *ptep = entry; + return pgste; } -/** - * gmap_free - free a guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_free(struct gmap *gmap) +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) { - struct page *page, *next; - - /* Flush tlb. */ - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); - - /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); - gmap_radix_tree_free(&gmap->guest_to_host); - gmap_radix_tree_free(&gmap->host_to_guest); - down_write(&gmap->mm->mmap_sem); - list_del(&gmap->list); - up_write(&gmap->mm->mmap_sem); - kfree(gmap); +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + ptep_notify(mm, addr, ptep); + } +#endif + return pgste; } -EXPORT_SYMBOL_GPL(gmap_free); -/** - * gmap_enable - switch primary space to the guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_enable(struct gmap *gmap) +static inline pgste_t ptep_xchg_start(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - S390_lowcore.gmap = (unsigned long) gmap; + pgste_t pgste = __pgste(0); + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + } + return pgste; } -EXPORT_SYMBOL_GPL(gmap_enable); -/** - * gmap_disable - switch back to the standard primary address space - * @gmap: pointer to the guest address space structure - */ -void gmap_disable(struct gmap *gmap) +static inline void ptep_xchg_commit(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pgste_t pgste, pte_t old, pte_t new) { - S390_lowcore.gmap = 0UL; + if (mm_has_pgste(mm)) { + if (pte_val(old) & _PAGE_INVALID) + pgste_set_key(ptep, pgste, new, mm); + if (pte_val(new) & _PAGE_INVALID) { + pgste = pgste_update_all(old, pgste, mm); + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(old) |= _PAGE_UNUSED; + } + pgste = pgste_set_pte(ptep, pgste, new); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = new; + } } -EXPORT_SYMBOL_GPL(gmap_disable); -/* - * gmap_alloc_table is assumed to be called with mmap_sem held - */ -static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, - unsigned long init, unsigned long gaddr) +pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long *new; - - /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - return -ENOMEM; - new = (unsigned long *) page_to_phys(page); - crst_table_init(new, init); - spin_lock(&gmap->mm->page_table_lock); - if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | - (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; - page = NULL; - } - spin_unlock(&gmap->mm->page_table_lock); - if (page) - __free_pages(page, 2); - return 0; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_direct(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_direct); -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long offset, mask; - - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); - return page->index + offset; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_lazy); -/** - * __gmap_unlink_by_vmaddr - unlink a single segment via a host address - * @gmap: pointer to the guest address space structure - * @vmaddr: address in the host process address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - unsigned long *entry; - int flush = 0; - - spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_INVALID); - *entry = _SEGMENT_ENTRY_INVALID; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(old, pgste, mm); + pgste_set(ptep, pgste); } - spin_unlock(&gmap->guest_table_lock); - return flush; + return old; } +EXPORT_SYMBOL(ptep_modify_prot_start); -/** - * __gmap_unmap_by_gaddr - unmap a single segment via a guest address - * @gmap: pointer to the guest address space structure - * @gaddr: address in the guest address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - unsigned long vmaddr; + pgste_t pgste; - vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = pte; + } } +EXPORT_SYMBOL(ptep_modify_prot_commit); -/** - * gmap_unmap_segment - unmap segment from the guest address space - * @gmap: pointer to the guest address space structure - * @to: address in the guest address space - * @len: length of the memory area to unmap - * - * Returns 0 if the unmap succeeded, -EINVAL if not. - */ -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) { - unsigned long off; - int flush; - - if ((to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || to + len < to) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - return 0; + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return old; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(addr, pmdp); + else + __pmdp_idte(addr, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -EXPORT_SYMBOL_GPL(gmap_unmap_segment); - -/** - * gmap_mmap_segment - map a segment to the guest address space - * @gmap: pointer to the guest address space structure - * @from: source address in the parent address space - * @to: target address in the guest address space - * @len: length of the memory area to map - * - * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. - */ -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len) + +pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long off; - int flush; - - if ((from | to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) { - /* Remove old translation */ - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - /* Store new translation */ - if (radix_tree_insert(&gmap->guest_to_host, - (to + off) >> PMD_SHIFT, - (void *) from + off)) - break; - } - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - if (off >= len) - return 0; - gmap_unmap_segment(gmap, to, len); - return -ENOMEM; + pmd_t old; + + old = pmdp_flush_direct(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(gmap_map_segment); - -/** - * __gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_direct); + +pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long vmaddr; + pmd_t old; - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); - return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; + old = pmdp_flush_lazy(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(__gmap_translate); - -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_lazy); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { - unsigned long rc; + struct list_head *lh = (struct list_head *) pgtable; - down_read(&gmap->mm->mmap_sem); - rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); - return rc; + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } -EXPORT_SYMBOL_GPL(gmap_translate); -/** - * gmap_unlink - disconnect a page table from the gmap shadow tables - * @gmap: pointer to guest mapping meta data structure - * @table: pointer to the host page table - * @vmaddr: vm address associated with the host page table - */ -static void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - struct gmap *gmap; - int flush; + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); - if (flush) - gmap_flush_tlb(gmap); + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_INVALID; + ptep++; + pte_val(*ptep) = _PAGE_INVALID; + return pgtable; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/** - * gmap_link - set up shadow page tables to connect a host to a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @vmaddr: vm address - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +#ifdef CONFIG_PGSTE +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) { - struct mm_struct *mm; - unsigned long *table; - spinlock_t *ptl; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - int rc; - - /* Create higher level tables in the gmap page table */ - table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - table += (gaddr >> 20) & 0x7ff; - /* Walk the parent mm page table */ - mm = gmap->mm; - pgd = pgd_offset(mm, vmaddr); - VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); - VM_BUG_ON(pud_none(*pud)); - pmd = pmd_offset(pud, vmaddr); - VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) - return -EFAULT; - /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); - if (rc) - return rc; - ptl = pmd_lock(mm, pmd); - spin_lock(&gmap->guest_table_lock); - if (*table == _SEGMENT_ENTRY_INVALID) { - rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; - spin_unlock(&gmap->guest_table_lock); - spin_unlock(ptl); - radix_tree_preload_end(); - return rc; + pgste_t pgste; + + /* the mm_has_pgste() check is done in set_pte_at() */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); } -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long vmaddr; - int rc; - bool unlocked; - - down_read(&gmap->mm->mmap_sem); - -retry: - unlocked = false; - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) { - rc = vmaddr; - goto out_up; - } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, - &unlocked)) { - rc = -EFAULT; - goto out_up; - } - /* - * In the case that fixup_user_fault unlocked the mmap_sem during - * faultin redo __gmap_translate to not race with a map/unmap_segment. - */ - if (unlocked) - goto retry; + pgste_t pgste; - rc = __gmap_link(gmap, gaddr, vmaddr); -out_up: - up_read(&gmap->mm->mmap_sem); - return rc; + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_fault); -static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) dec_mm_counter(mm, MM_SWAPENTS); @@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) free_swap_and_cache(entry); } -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int reset) { - unsigned long vmaddr, ptev, pgstev; - pte_t *ptep, pte; - spinlock_t *ptl; + unsigned long pgstev; pgste_t pgste; + pte_t pte; - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - return; - vmaddr |= gaddr & ~PMD_MASK; - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (unlikely(!ptep)) - return; - pte = *ptep; - if (!pte_swap(pte)) - goto out_pte; /* Zap unused and logically-zero pages */ pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); - ptev = pte_val(pte); - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); - pte_clear(gmap->mm, vmaddr, ptep); - } + pte = *ptep; + if (pte_swap(pte) && + ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO))) { + ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + pte_clear(mm, addr, ptep); + } + if (reset) + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; pgste_set_unlock(ptep, pgste); -out_pte: - pte_unmap_unlock(ptep, ptl); } -EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - down_read(&gmap->mm->mmap_sem); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); - } - up_read(&gmap->mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(gmap_discard); - -static LIST_HEAD(gmap_notifier_list); -static DEFINE_SPINLOCK(gmap_notifier_lock); + unsigned long ptev; + pgste_t pgste; -/** - * gmap_register_ipte_notifier - register a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_register_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_add(&nb->list, &gmap_notifier_list); - spin_unlock(&gmap_notifier_lock); + /* Clear storage key */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + ptev = pte_val(*ptep); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); -/** - * gmap_unregister_ipte_notifier - remove a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_del_init(&nb->list); - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); - -/** - * gmap_ipte_notify - mark a range of ptes for invalidation notification - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * - * Returns 0 if for each page in the given range a gmap mapping exists and - * the invalidation notification could be set. If the gmap mapping is missing - * for one or more pages -EFAULT is returned. If no memory could be allocated - * -ENOMEM is returned. This function establishes missing page table entries. +/* + * Test and reset if a guest page is dirty */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { - unsigned long addr; spinlock_t *ptl; - pte_t *ptep, entry; pgste_t pgste; - bool unlocked; - int rc = 0; - - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) - return -EINVAL; - down_read(&gmap->mm->mmap_sem); - while (len) { - unlocked = false; - /* Convert gmap address and connect the page tables */ - addr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(addr)) { - rc = addr; - break; - } - /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - rc = -EFAULT; - break; - } - /* While trying to map mmap_sem got unlocked. Let us retry */ - if (unlocked) - continue; - rc = __gmap_link(gmap, gaddr, addr); - if (rc) - break; - /* Walk the process page table, lock and get pte pointer */ - ptep = get_locked_pte(gmap->mm, addr, &ptl); - VM_BUG_ON(!ptep); - /* Set notification bit in the pgste of the pte */ - entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; - pgste_set_unlock(ptep, pgste); - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; - } - pte_unmap_unlock(ptep, ptl); - } - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_ipte_notify); - -/** - * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. - * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space - * @pte: pointer to the page table entry - * - * This function is assumed to be called with the page table lock held - * for the pte to notify. - */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) -{ - unsigned long offset, gaddr; - unsigned long *table; - struct gmap_notifier *nb; - struct gmap *gmap; - - offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); - spin_lock(&gmap_notifier_lock); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (!table) - continue; - gaddr = __gmap_segment_gaddr(table) + offset; - list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(gmap, gaddr); + pte_t *ptep; + pte_t pte; + bool dirty; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return false; + + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; } - spin_unlock(&gmap_notifier_lock); + pgste_set_unlock(ptep, pgste); + + spin_unlock(ptl); + return dirty; } -EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); +EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq) + unsigned char key, bool nq) { + unsigned long keyul; spinlock_t *ptl; pgste_t old, new; pte_t *ptep; - bool unlocked; down_read(&mm->mmap_sem); -retry: - unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } - if (!(pte_val(*ptep) & _PAGE_INVALID) && - (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(ptep, ptl); - /* - * We do not really care about unlocked. We will retry either - * way. But this allows fixup_user_fault to enable userfaultfd. - */ - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - goto retry; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + keyul = (unsigned long) key; + pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long address, bits, skey; @@ -863,13 +543,12 @@ retry: } EXPORT_SYMBOL(set_guest_storage_key); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) { + unsigned char key; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; - uint64_t physaddr; - unsigned long key = 0; down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); @@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) pgste = pgste_get_lock(ptep); if (pte_val(*ptep) & _PAGE_INVALID) { - key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; } else { - physaddr = pte_val(*ptep) & PAGE_MASK; - key = page_get_storage_key(physaddr); + key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); /* Reflect guest's logical view, not physical */ if (pgste_val(pgste) & PGSTE_GR_BIT) @@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return key; } EXPORT_SYMBOL(get_guest_storage_key); - -static int page_table_allocate_pgste_min = 0; -static int page_table_allocate_pgste_max = 1; -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, - .extra1 = &page_table_allocate_pgste_min, - .extra2 = &page_table_allocate_pgste_max, - }, - { } -}; - -static struct ctl_table page_table_sysctl_dir[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = page_table_sysctl, - }, - { } -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#else /* CONFIG_PGSTE */ - -static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) -{ -} - -#endif /* CONFIG_PGSTE */ - -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - unsigned int old, new; - - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; -} - -/* - * page table entry allocation/free routines. - */ -unsigned long *page_table_alloc(struct mm_struct *mm) -{ - unsigned long *table; - struct page *page; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.list_lock); - if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - mask = atomic_read(&page->_mapcount); - mask = (mask | (mask >> 4)) & 3; - if (mask != 3) { - table = (unsigned long *) page_to_phys(page); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&page->_mapcount, 1U << bit); - list_del(&page->lru); - } - } - spin_unlock_bh(&mm->context.list_lock); - if (table) - return table; - } - /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - atomic_set(&page->_mapcount, 3); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); - } else { - /* Return the first 2K fragment of the page */ - atomic_set(&page->_mapcount, 1); - clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); - } - return table; -} - -void page_table_free(struct mm_struct *mm, unsigned long *table) -{ - struct page *page; - unsigned int bit, mask; - - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 1U << bit); - if (mask & 3) - list_add(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (mask != 0) - return; - } - - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); -} - -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) -{ - struct mm_struct *mm; - struct page *page; - unsigned int bit, mask; - - mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); - tlb_remove_table(tlb, table); - return; - } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); - if (mask & 3) - list_add_tail(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); - tlb_remove_table(tlb, table); -} - -static void __tlb_remove_table(void *_table) -{ - unsigned int mask = (unsigned long) _table & 3; - void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - - switch (mask) { - case 0: /* pmd or pud */ - free_pages((unsigned long) table, 2); - break; - case 1: /* lower 2K of a 4K page table */ - case 2: /* higher 2K of a 4K page table */ - if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) - break; - /* fallthrough */ - case 3: /* 4K page table with pgstes */ - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); - break; - } -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely - * on IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - tlb->mm->context.flush_mm = 1; - if (*batch == NULL) { - *batch = (struct mmu_table_batch *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - __tlb_flush_mm_lazy(tlb->mm); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_flush_mmu(tlb); -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void thp_split_vma(struct vm_area_struct *vma) -{ - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); -} - -static inline void thp_split_mm(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - thp_split_vma(vma); - vma->vm_flags &= ~VM_HUGEPAGE; - vma->vm_flags |= VM_NOHUGEPAGE; - } - mm->def_flags |= VM_NOHUGEPAGE; -} -#else -static inline void thp_split_mm(struct mm_struct *mm) -{ -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - -/* - * switch on pgstes for its userspace process (for kvm) - */ -int s390_enable_sie(void) -{ - struct mm_struct *mm = current->mm; - - /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(mm)) - return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; - down_write(&mm->mmap_sem); - mm->context.has_pgste = 1; - /* split thp mappings and disable thp for future mappings */ - thp_split_mm(mm); - up_write(&mm->mmap_sem); - return 0; -} -EXPORT_SYMBOL_GPL(s390_enable_sie); - -/* - * Enable storage key handling from now on and initialize the storage - * keys with the default key. - */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - unsigned long ptev; - pgste_t pgste; - - pgste = pgste_get_lock(pte); - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) { - ptep_flush_direct(walk->mm, addr, pte); - pte_val(*pte) = _PAGE_INVALID; - } - /* Clear storage key */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); - ptev = pte_val(*pte); - if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); - pgste_set_unlock(pte, pgste); - return 0; -} - -int s390_enable_skey(void) -{ - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int rc = 0; - - down_write(&mm->mmap_sem); - if (mm_use_skey(mm)) - goto out_up; - - mm->context.use_skey = 1; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags)) { - mm->context.use_skey = 0; - rc = -ENOMEM; - goto out_up; - } - } - mm->def_flags &= ~VM_MERGEABLE; - - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - -out_up: - up_write(&mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(s390_enable_skey); - -/* - * Reset CMMA state, make all pages stable again. - */ -static int __s390_reset_cmma(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgste_t pgste; - - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; - pgste_set_unlock(pte, pgste); - return 0; -} - -void s390_reset_cmma(struct mm_struct *mm) -{ - struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; - - down_write(&mm->mmap_sem); - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - up_write(&mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(s390_reset_cmma); - -/* - * Test and reset if a guest page is dirty - */ -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) -{ - pte_t *pte; - spinlock_t *ptl; - bool dirty = false; - - pte = get_locked_pte(gmap->mm, address, &ptl); - if (unlikely(!pte)) - return false; - - if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) - dirty = true; - - spin_unlock(ptl); - return dirty; -} -EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB */ - return pmdp_test_and_clear_young(vma, address, pmdp); -} - -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - entry = pmd_mkyoung(entry); - if (dirty) - entry = pmd_mkdirty(entry); - if (pmd_same(*pmdp, entry)) - return 0; - pmdp_invalidate(vma, address, pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, entry); - return 1; -} - -void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable) -{ - struct list_head *lh = (struct list_head *) pgtable; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; -} - -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) -{ - struct list_head *lh; - pgtable_t pgtable; - pte_t *ptep; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; - ptep++; - pte_val(*ptep) = _PAGE_INVALID; - return pgtable; -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ef7d6c8..d27fccba 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); -#ifndef CONFIG_DEBUG_PAGEALLOC if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { + !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && + !debug_pagealloc_enabled()) { pud_val(*pu_dir) = __pa(address) | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | (ro ? _REGION_ENTRY_PROTECT : 0); address += PUD_SIZE; continue; } -#endif if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) @@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); -#ifndef CONFIG_DEBUG_PAGEALLOC if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { + !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && + !debug_pagealloc_enabled()) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_YOUNG | @@ -121,7 +120,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) address += PMD_SIZE; continue; } -#endif if (pmd_none(*pm_dir)) { pt_dir = vmem_pte_alloc(address); if (!pt_dir) diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 1bd2301..496e4a7 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c deleted file mode 100644 index 1884e17..0000000 --- a/arch/s390/oprofile/backtrace.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * S390 Version - * Copyright IBM Corp. 2005 - * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> - */ - -#include <linux/oprofile.h> - -#include <asm/processor.h> /* for struct stack_frame */ - -static unsigned long -__show_trace(unsigned int *depth, unsigned long sp, - unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (*depth) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - /* Follow the backchain. */ - while (*depth) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - } - - if (*depth == 0) - break; - - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } - return sp; -} - -void s390_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - unsigned long head, frame_size; - struct stack_frame* head_sf; - - if (user_mode(regs)) - return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame*)head; - - if (!head_sf->back_chain) - return; - - head = head_sf->back_chain; - - head = __show_trace(&depth, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __show_trace(&depth, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); -} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 9cfa2ff..791935a 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -20,8 +20,6 @@ #include "../../../drivers/oprofile/oprof.h" -extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); - #include "hwsampler.h" #include "op_counter.h" @@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; default: return -ENODEV; } } @@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void) hwsampler_shutdown(); } +static int __s390_backtrace(void *data, unsigned long address) +{ + unsigned int *depth = data; + + if (*depth == 0) + return 1; + (*depth)--; + oprofile_add_trace(address); + return 0; +} + +static void s390_backtrace(struct pt_regs *regs, unsigned int depth) +{ + if (user_mode(regs)) + return; + dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]); +} + int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8f19c8f..9fd59a7 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -637,11 +637,9 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { - struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; - zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; zpci_map_resources(pdev); @@ -664,8 +662,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = to_zpci(pdev); - zdev->pdev = pdev; - zpci_debug_init_device(zdev); + zpci_debug_init_device(zdev, dev_name(&pdev->dev)); zpci_fmb_enable_device(zdev); return pci_enable_resources(pdev, mask); @@ -677,7 +674,6 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); - zdev->pdev = NULL; } #ifdef CONFIG_HIBERNATE_CALLBACKS @@ -864,8 +860,11 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), - 16, 0, NULL); + __alignof__(struct zpci_fmb), 0, NULL); if (!zdev_fmb_cache) goto error_fmb; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d6e411e..21591dd 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -8,13 +8,19 @@ #define KMSG_COMPONENT "zpci" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/compat.h> #include <linux/kernel.h> +#include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/delay.h> #include <linux/pci.h> +#include <linux/uaccess.h> #include <asm/pci_debug.h> #include <asm/pci_clp.h> +#include <asm/compat.h> +#include <asm/clp.h> +#include <uapi/asm/clp.h> static inline void zpci_err_clp(unsigned int rsp, int rc) { @@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc) } /* - * Call Logical Processor - * Retry logic is handled by the caller. + * Call Logical Processor with c=1, lps=0 and command 1 + * to get the bit mask of installed logical processors */ -static inline u8 clp_instr(void *data) +static inline int clp_get_ilp(unsigned long *ilp) +{ + unsigned long mask; + int cc = 3; + + asm volatile ( + " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1) + : "cc"); + *ilp = mask; + return cc; +} + +/* + * Call Logical Processor with c=0, the give constant lps and an lpcb request. + */ +static inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; - u8 cc; + int cc = 3; asm volatile ( - " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" - " ipm %[cc]\n" + " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) - : [req] "a" (req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req), [lps] "i" (lps) : "cc"); return cc; } @@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.pfgid = pfgid; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) clp_store_query_pci_fngrp(zdev, &rrb->response); else { @@ -143,7 +171,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.fh = fh; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { rc = clp_store_query_pci_fn(zdev, &rrb->response); if (rc) @@ -214,7 +242,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.oc = command; rrb->request.ndas = nr_dma_as; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { retries--; if (retries < 0) @@ -280,7 +308,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, rrb->request.resume_token = resume_token; /* Get PCI function handle list */ - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("List PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); @@ -391,3 +419,198 @@ int clp_rescan_pci_devices_simple(void) clp_free_block(rrb); return rc; } + +static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0; +} + +static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_base_slpc(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query(struct clp_req *req, + struct clp_req_rsp_query_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query_grp(struct clp_req *req, + struct clp_req_rsp_query_pci_grp *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 || + lpcb->request.reserved4 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_pci_slpc(req, (void *) lpcb); + case 0x0002: /* list PCI functions */ + return clp_pci_list(req, (void *) lpcb); + case 0x0003: /* query PCI function */ + return clp_pci_query(req, (void *) lpcb); + case 0x0004: /* query PCI function group */ + return clp_pci_query_grp(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_normal_command(struct clp_req *req) +{ + struct clp_req_hdr *lpcb; + void __user *uptr; + int rc; + + rc = -EINVAL; + if (req->lps != 0 && req->lps != 2) + goto out; + + rc = -ENOMEM; + lpcb = clp_alloc_block(GFP_KERNEL); + if (!lpcb) + goto out; + + rc = -EFAULT; + uptr = (void __force __user *)(unsigned long) req->data_p; + if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0) + goto out_free; + + rc = -EINVAL; + if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0) + goto out_free; + + switch (req->lps) { + case 0: + rc = clp_base_command(req, lpcb); + break; + case 2: + rc = clp_pci_command(req, lpcb); + break; + } + if (rc) + goto out_free; + + rc = -EFAULT; + if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0) + goto out_free; + + rc = 0; + +out_free: + clp_free_block(lpcb); +out: + return rc; +} + +static int clp_immediate_command(struct clp_req *req) +{ + void __user *uptr; + unsigned long ilp; + int exists; + + if (req->cmd > 1 || clp_get_ilp(&ilp) != 0) + return -EINVAL; + + uptr = (void __force __user *)(unsigned long) req->data_p; + if (req->cmd == 0) { + /* Command code 0: test for a specific processor */ + exists = test_bit_inv(req->lps, &ilp); + return put_user(exists, (int __user *) uptr); + } + /* Command code 1: return bit mask of installed processors */ + return put_user(ilp, (unsigned long __user *) uptr); +} + +static long clp_misc_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct clp_req req; + void __user *argp; + + if (cmd != CLP_SYNC) + return -EINVAL; + + argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg; + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + if (req.r != 0) + return -EINVAL; + return req.c ? clp_immediate_command(&req) : clp_normal_command(&req); +} + +static int clp_misc_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static const struct file_operations clp_misc_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .release = clp_misc_release, + .unlocked_ioctl = clp_misc_ioctl, + .compat_ioctl = clp_misc_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice clp_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "clp", + .fops = &clp_misc_fops, +}; + +static int __init clp_misc_init(void) +{ + return misc_register(&clp_misc_device); +} + +device_initcall(clp_misc_init); diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 4129b0a..c555de3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -void zpci_debug_init_device(struct zpci_dev *zdev) +void zpci_debug_init_device(struct zpci_dev *zdev, const char *name) { - zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), - debugfs_root); + zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root); if (IS_ERR(zdev->debugfs_dev)) zdev->debugfs_dev = NULL; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4638b93..a06ce80 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table) dma_free_cpu_table(table); } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, +static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } -static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +static unsigned long dma_alloc_iommu(struct device *dev, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long offset, flags; int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { /* wrap-around */ - offset = __dma_alloc_iommu(zdev, 0, size); + offset = __dma_alloc_iommu(dev, 0, size); wrap = 1; } @@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) return offset; } -static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +static void dma_free_iommu(struct device *dev, unsigned long offset, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long flags; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); @@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + iommu_page_index = dma_alloc_iommu(dev, nr_pages); if (iommu_page_index == -1) { ret = -ENOSPC; goto out_err; @@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, return dma_addr + (offset & ~PAGE_MASK); out_free: - dma_free_iommu(zdev, iommu_page_index, nr_pages); + dma_free_iommu(dev, iommu_page_index, nr_pages); out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); @@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - dma_free_iommu(zdev, iommu_page_index, npages); + dma_free_iommu(dev, iommu_page_index, npages); } static void *s390_dma_alloc(struct device *dev, size_t size, diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b0e0475..fb2a9a5 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -46,11 +46,14 @@ struct zpci_ccdf_avail { static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; zpci_err("error CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) return; pdev->error_state = pci_channel_io_perm_failure; + pci_dev_put(pdev); } void zpci_event_error(void *data) @@ -69,9 +73,12 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; int ret; + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); @@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (pdev) + pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/score/configs/spct6600_defconfig b/arch/score/configs/spct6600_defconfig index df1edbf..b2d8802 100644 --- a/arch/score/configs/spct6600_defconfig +++ b/arch/score/configs/spct6600_defconfig @@ -70,7 +70,6 @@ CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_CRYPTO_NULL=y diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index b48459a..f3a0649 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -101,7 +101,7 @@ static void __init resource_init(void) res->name = "System RAM"; res->start = MEMORY_START; res->end = MEMORY_START + MEMORY_SIZE - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); request_resource(res, &code_resource); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index e13da05..17a4f15 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -847,14 +847,10 @@ config PCI config PCI_DOMAINS bool -source "drivers/pci/pcie/Kconfig" - source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - endmenu menu "Executable file formats" diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index e343dbd0..644314f 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -105,9 +105,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? 15 : 14; } -/* generic DMA-mapping stuff */ -#include <asm-generic/pci-dma-compat.h> - #endif /* __KERNEL__ */ #endif /* __ASM_SH_PCI_H */ diff --git a/arch/sh/include/mach-common/mach/magicpanelr2.h b/arch/sh/include/mach-common/mach/magicpanelr2.h index 183a2f7..eb0cf20 100644 --- a/arch/sh/include/mach-common/mach/magicpanelr2.h +++ b/arch/sh/include/mach-common/mach/magicpanelr2.h @@ -13,7 +13,7 @@ #ifndef __ASM_SH_MAGICPANELR2_H #define __ASM_SH_MAGICPANELR2_H -#include <asm/gpio.h> +#include <linux/gpio.h> #define __IO_PREFIX mpr2 #include <asm/io_generic.h> diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index de19cfa..3f1c18b 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; unsigned long memory_start; @@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, res->name = "System RAM"; res->start = start; res->end = end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res)) { pr_err("unable to request memory_resource 0x%lx 0x%lx\n", diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index de6be00..13f633a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -203,7 +203,7 @@ asmlinkage void start_secondary(void) set_cpu_online(cpu, true); per_cpu(cpu_state, cpu) = CPU_ONLINE; - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } extern struct { diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 6385f60..cc948db 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, NULL, pmd, addr); + pte = pte_alloc_map(mm, pmd, addr); } } diff --git a/arch/sparc/include/asm/gpio.h b/arch/sparc/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/sparc/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h index d9c031f..6e14fd1 100644 --- a/arch/sparc/include/asm/pci.h +++ b/arch/sparc/include/asm/pci.h @@ -5,7 +5,4 @@ #else #include <asm/pci_32.h> #endif - -#include <asm-generic/pci-dma-compat.h> - #endif diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index f2d30ca..cd1f592 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -696,14 +696,6 @@ tlb_fixup_done: call __bzero sub %o1, %o0, %o1 -#ifdef CONFIG_LOCKDEP - /* We have this call this super early, as even prom_init can grab - * spinlocks and thus call into the lockdep code. - */ - call lockdep_init - nop -#endif - call prom_init mov %l7, %o0 ! OpenPROM cif handler diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index b3a5d81..fb30e7c 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg) local_irq_enable(); wmb(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* We should never reach here! */ BUG(); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 19cd08d..8a6151a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -134,7 +134,7 @@ void smp_callin(void) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void cpu_panic(void) diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 131eaf4..4977800 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -146,7 +146,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, NULL, pmd, addr); + pte = pte_alloc_map(mm, pmd, addr); } return pte; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6f21685..1cfe6aa 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static inline resource_size_t compute_kern_paddr(void *addr) @@ -2909,7 +2909,7 @@ static int __init report_memory(void) res->name = "System RAM"; res->start = pavail[i].phys_addr; res->end = pavail[i].phys_addr + pavail[i].reg_size - 1; - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; if (insert_resource(&iomem_resource, res) < 0) { pr_warn("Resource insertion failed.\n"); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index de4a4ff..8171930 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -455,8 +455,6 @@ config TILE_PCI_IO source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" - config TILE_USB tristate "Tilera USB host adapter support" default y @@ -467,8 +465,6 @@ config TILE_USB Provides USB host adapter support for the built-in EHCI and OHCI interfaces on TILE-Gx chips. -source "drivers/pci/hotplug/Kconfig" - endmenu menu "Executable file formats" diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 984fa00..3f3dfb8 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -374,7 +374,6 @@ CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ASYNC_RAID6_TEST=m CONFIG_KGDB=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 71ad9f7..ef9e27e 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -486,7 +486,6 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ASYNC_RAID6_TEST=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index dfedd7a..fe3de50 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -226,7 +226,4 @@ static inline int pcibios_assign_all_busses(void) /* Use any cpu for PCI. */ #define cpumask_of_pcibus(bus) cpu_online_mask -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - #endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index bbb855d..a992238 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1632,14 +1632,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; /* @@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) return NULL; - res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (reserved) { + res->name = "Reserved"; + } else { + res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; + } if (insert_resource(&iomem_resource, res)) { kfree(res); return NULL; diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 20d52a9..6c0abaa 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -208,7 +208,7 @@ void online_secondary(void) /* Set up tile-timer clock-event device on this cpu */ setup_tile_timer(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } int __cpu_up(unsigned int cpu, struct task_struct *tidle) diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index c034dc3..e212c64 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -77,7 +77,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, else { if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE]) panic("Unexpected page size %#lx\n", sz); - return pte_alloc_map(mm, NULL, pmd, addr); + return pte_alloc_map(mm, pmd, addr); } } #else diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index d4e1fc4..a0582b7 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -896,17 +896,15 @@ void __init pgtable_cache_init(void) panic("pgtable_cache_init(): Cannot create pgd cache"); } -#ifdef CONFIG_DEBUG_PAGEALLOC -static long __write_once initfree; -#else static long __write_once initfree = 1; -#endif +static bool __write_once set_initfree_done; /* Select whether to free (1) or mark unusable (0) the __init pages. */ static int __init set_initfree(char *str) { long val; if (kstrtol(str, 0, &val) == 0) { + set_initfree_done = true; initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); @@ -919,6 +917,11 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) { unsigned long addr = (unsigned long) begin; + /* Prefer user request first */ + if (!set_initfree_done) { + if (debug_pagealloc_enabled()) + initfree = 0; + } if (kdata_huge && !initfree) { pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n"); initfree = 1; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 9591a66..3943e9d 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, if (!pmd) goto out_pmd; - pte = pte_alloc_map(mm, NULL, pmd, proc); + pte = pte_alloc_map(mm, pmd, proc); if (!pte) goto out_pte; diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h index 38b3f37..37e55d0 100644 --- a/arch/unicore32/include/asm/pci.h +++ b/arch/unicore32/include/asm/pci.h @@ -13,8 +13,6 @@ #define __UNICORE_PCI_H__ #ifdef __KERNEL__ -#include <asm-generic/pci-dma-compat.h> -#include <asm-generic/pci-bridge.h> #include <asm-generic/pci.h> #include <mach/hardware.h> /* for PCIBIOS_MIN_* */ @@ -23,5 +21,4 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); #endif /* __KERNEL__ */ - #endif diff --git a/arch/unicore32/include/mach/hardware.h b/arch/unicore32/include/mach/hardware.h index 9e20b5d..2514623 100644 --- a/arch/unicore32/include/mach/hardware.h +++ b/arch/unicore32/include/mach/hardware.h @@ -28,11 +28,6 @@ #define PCIBIOS_MIN_IO 0x4000 /* should lower than 64KB */ #define PCIBIOS_MIN_MEM io_v2p(PKUNITY_PCIMEM_BASE) -/* - * We override the standard dma-mask routines for bouncing. - */ -#define HAVE_ARCH_PCI_SET_DMA_MASK - #define pcibios_assign_all_busses() 1 #endif /* __MACH_PUV3_HARDWARE_H__ */ diff --git a/arch/unicore32/kernel/gpio.c b/arch/unicore32/kernel/gpio.c index cb12ec3..5ab2379 100644 --- a/arch/unicore32/kernel/gpio.c +++ b/arch/unicore32/kernel/gpio.c @@ -52,7 +52,7 @@ device_initcall(puv3_gpio_leds_init); static int puv3_gpio_get(struct gpio_chip *chip, unsigned offset) { - return readl(GPIO_GPLR) & GPIO_GPIO(offset); + return !!(readl(GPIO_GPLR) & GPIO_GPIO(offset)); } static void puv3_gpio_set(struct gpio_chip *chip, unsigned offset, int value) diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 3fa317f..c2bffa5 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -72,13 +72,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi) res->name = "System RAM"; res->start = mi->bank[i].start; res->end = mi->bank[i].start + mi->bank[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index afccef552..2ec3d3a 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -276,7 +276,7 @@ retry: up_read(&mm->mmap_sem); /* - * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR + * Handle the "normal" case first - VM_FAULT_MAJOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c index 2ade20d..c572a28 100644 --- a/arch/unicore32/mm/pgd.c +++ b/arch/unicore32/mm/pgd.c @@ -54,7 +54,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_map(mm, NULL, new_pmd, 0); + new_pte = pte_alloc_map(mm, new_pmd, 0); if (!new_pte) goto no_pte; diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 1538562..eb3abf8 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,6 +1,7 @@ - obj-y += entry/ +obj-$(CONFIG_PERF_EVENTS) += events/ + obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c46662f..3c74b54 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DEBUG_RODATA + def_bool y + config PGTABLE_LEVELS int default 4 if X86_64 @@ -1160,22 +1163,23 @@ config MICROCODE bool "CPU microcode loading support" default y depends on CPU_SUP_AMD || CPU_SUP_INTEL - depends on BLK_DEV_INITRD select FW_LOADER ---help--- - If you say Y here, you will be able to update the microcode on - certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, - Xeon etc. The AMD support is for families 0x10 and later. You will - obviously need the actual microcode binary data itself which is not - shipped with the Linux kernel. + Intel and AMD processors. The Intel support is for the IA32 family, + e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The + AMD support is for families 0x10 and later. You will obviously need + the actual microcode binary data itself which is not shipped with + the Linux kernel. - This option selects the general module only, you need to select - at least one vendor specific module as well. + The preferred method to load microcode from a detached initrd is described + in Documentation/x86/early-microcode.txt. For that you need to enable + CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the + initrd for microcode blobs. - To compile this driver as a module, choose M here: the module - will be called microcode. + In addition, you can build-in the microcode into the kernel. For that you + need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode + to the CONFIG_EXTRA_FIRMWARE config option. config MICROCODE_INTEL bool "Intel microcode loading support" @@ -2431,8 +2435,6 @@ config PCI_CNB20LE_QUIRK You should say N unless you know you need this. -source "drivers/pci/pcie/Kconfig" - source "drivers/pci/Kconfig" # x86_64 have no ISA slots, but can have ISA-style DMA. @@ -2588,8 +2590,6 @@ config AMD_NB source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - config RAPIDIO tristate "RapidIO support" depends on PCI diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9b18ed9..67eec55 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -74,28 +74,16 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - default y - depends on DEBUG_KERNEL - ---help--- - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This is recommended so that we can catch kernel bugs sooner. - If in doubt, say "Y". - config DEBUG_RODATA_TEST - bool "Testcase for the DEBUG_RODATA feature" - depends on DEBUG_RODATA + bool "Testcase for the marking rodata read-only" default y ---help--- - This option enables a testcase for the DEBUG_RODATA - feature as well as for the change_page_attr() infrastructure. + This option enables a testcase for the setting rodata read-only + as well as for the change_page_attr() infrastructure. If in doubt, say "N" config DEBUG_WX bool "Warn on W+X mappings at boot" - depends on DEBUG_RODATA select X86_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. @@ -350,16 +338,6 @@ config DEBUG_IMR_SELFTEST If unsure say N here. -config X86_DEBUG_STATIC_CPU_HAS - bool "Debug alternatives" - depends on DEBUG_KERNEL - ---help--- - This option causes additional code to be generated which - fails if static_cpu_has() is used before alternatives have - run. - - If unsure, say N. - config X86_DEBUG_FPU bool "Debug the x86 FPU code" depends on DEBUG_KERNEL diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index ea97697..4cb404f 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h @@ -1,7 +1,7 @@ #ifndef BOOT_CPUFLAGS_H #define BOOT_CPUFLAGS_H -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/processor-flags.h> struct cpu_features { diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 637097e..f72498d 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -17,7 +17,7 @@ #include "../include/asm/required-features.h" #include "../include/asm/disabled-features.h" -#include "../include/asm/cpufeature.h" +#include "../include/asm/cpufeatures.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a7661c4..0702d25 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -49,7 +49,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -int is_big_kernel; #define PECOFF_RELOC_RESERVE 0x20 diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 028be48..265901a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_FRAME_WARN=2048 +CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y @@ -303,7 +303,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index cb5b3ab..4f404a6 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -300,7 +300,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3633ad6..064c7e2 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -639,16 +639,11 @@ static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 5c8b626..aa76cad 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1503,13 +1503,9 @@ int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index fca4595..50e6847 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -329,13 +329,9 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 07d2c6c..27226df 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -33,7 +33,7 @@ #include <linux/crc32.h> #include <crypto/internal/hash.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/fpu/api.h> diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 0e98716..0857b1a 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -30,7 +30,7 @@ #include <linux/kernel.h> #include <crypto/internal/hash.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/fpu/internal.h> diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index a3fcfc9..cd4df93 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -30,7 +30,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <asm/fpu/api.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 5dc3702..6f778d3 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -332,16 +332,11 @@ int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 3643dd5..8943407 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -309,16 +309,11 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a841e97..a8a0224 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -762,6 +762,38 @@ static int sha1_mb_async_digest(struct ahash_request *req) return crypto_ahash_digest(mcryptd_req); } +static int sha1_mb_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_export(mcryptd_req, out); +} + +static int sha1_mb_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + struct crypto_shash *child = mcryptd_ahash_child(mcryptd_tfm); + struct mcryptd_hash_request_ctx *rctx; + struct shash_desc *desc; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + rctx = ahash_request_ctx(mcryptd_req); + desc = &rctx->desc; + desc->tfm = child; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_import(mcryptd_req, in); +} + static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) { struct mcryptd_ahash *mcryptd_tfm; @@ -796,8 +828,11 @@ static struct ahash_alg sha1_mb_async_alg = { .final = sha1_mb_async_final, .finup = sha1_mb_async_finup, .digest = sha1_mb_async_digest, + .export = sha1_mb_async_export, + .import = sha1_mb_async_import, .halg = { .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_hash_ctx), .base = { .cra_name = "sha1", .cra_driver_name = "sha1_mb", diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S index 2ab9560..c420d89 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S @@ -197,7 +197,7 @@ len_is_0: vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 - movl 4*32(state, idx, 4), DWORD_tmp + movl _args_digest+4*32(state, idx, 4), DWORD_tmp vmovdqu %xmm0, _result_digest(job_rax) movl DWORD_tmp, _result_digest+1*16(job_rax) diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 56d8a08..2ebb5e9 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -277,13 +277,9 @@ int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, u32 *flags = &tfm->crt_flags; int err; - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } + err = xts_check_key(tfm, key, keylen); + if (err) + return err; /* first half of xts-key is for crypt */ err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index e32206e..9a9e588 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with .byte 0xf1 .endm -#else /* CONFIG_X86_64 */ - -/* - * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These - * are different from the entry_32.S versions in not changing the segment - * registers. So only suitable for in kernel use, not when transitioning - * from or to user space. The resulting stack frame is not a standard - * pt_regs frame. The main use case is calling C code from assembler - * when all the registers need to be preserved. - */ - - .macro SAVE_ALL - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - .endm - - .macro RESTORE_ALL - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - .endm - #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0366374..e79d93d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <asm/traps.h> #include <asm/vdso.h> #include <asm/uaccess.h> +#include <asm/cpufeature.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit(); } +#else +static inline void enter_from_user_mode(void) {} #endif static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) @@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; -#ifdef CONFIG_CONTEXT_TRACKING - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - enter_from_user_mode(); - work &= ~_TIF_NOHZ; - } -#endif - #ifdef CONFIG_SECCOMP /* * Do seccomp first -- it should minimize exposure of other @@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (work & _TIF_SINGLESTEP) - regs->flags |= X86_EFLAGS_TF; - #ifdef CONFIG_SECCOMP /* * Call seccomp_phase2 before running the other hooks so that @@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Called with IRQs disabled. */ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) { + struct thread_info *ti = pt_regs_to_thread_info(regs); u32 cached_flags; if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) @@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) lockdep_sys_exit(); - cached_flags = - READ_ONCE(pt_regs_to_thread_info(regs)->flags); + cached_flags = READ_ONCE(ti->flags); if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. We need to clear it *after* signal + * handling, because syscall restart has a fixup for compat + * syscalls. The fixup is exercised by the ptrace_syscall_32 + * selftest. + */ + ti->status &= ~TS_COMPAT; +#endif + user_enter(); } @@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) syscall_slow_exit_work(regs, cached_flags); -#ifdef CONFIG_COMPAT + local_irq_disable(); + prepare_exit_to_usermode(regs); +} + +#ifdef CONFIG_X86_64 +__visible void do_syscall_64(struct pt_regs *regs) +{ + struct thread_info *ti = pt_regs_to_thread_info(regs); + unsigned long nr = regs->orig_ax; + + enter_from_user_mode(); + local_irq_enable(); + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) + nr = syscall_trace_enter(regs); + /* - * Compat syscalls set TS_COMPAT. Make sure we clear it before - * returning to user mode. + * NB: Native and x32 syscalls are dispatched from the same + * table. The only functional difference is the x32 bit in + * regs->orig_ax, which changes the behavior of some syscalls. */ - ti->status &= ~TS_COMPAT; -#endif + if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { + regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + regs->di, regs->si, regs->dx, + regs->r10, regs->r8, regs->r9); + } - local_irq_disable(); - prepare_exit_to_usermode(regs); + syscall_return_slowpath(regs); } +#endif #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) /* - * Does a 32-bit syscall. Called with IRQs on and does all entry and - * exit work and returns with IRQs off. This function is extremely hot - * in workloads that use it, and it's usually called from + * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does + * all entry and exit work and returns with IRQs off. This function is + * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -#ifdef CONFIG_X86_32 -/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */ -__visible -#else -/* 64-bit kernels use do_syscall_32_irqs_off() instead. */ -static -#endif -__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned int nr = (unsigned int)regs->orig_ax; @@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) syscall_return_slowpath(regs); } -#ifdef CONFIG_X86_64 -/* Handles INT80 on 64-bit kernels */ -__visible void do_syscall_32_irqs_off(struct pt_regs *regs) +/* Handles int $0x80 */ +__visible void do_int80_syscall_32(struct pt_regs *regs) { + enter_from_user_mode(); local_irq_enable(); do_syscall_32_irqs_on(regs); } -#endif /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ __visible long do_fast_syscall_32(struct pt_regs *regs) @@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) */ regs->ip = landing_pad; - /* - * Fetch EBP from where the vDSO stashed it. - * - * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! - */ + enter_from_user_mode(); + local_irq_enable(); + + /* Fetch EBP from where the vDSO stashed it. */ if ( #ifdef CONFIG_X86_64 /* @@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ local_irq_disable(); regs->ax = -EFAULT; -#ifdef CONFIG_CONTEXT_TRACKING - enter_from_user_mode(); -#endif prepare_exit_to_usermode(regs); return 0; /* Keep it simple: use IRET. */ } diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bb3e376..10868aa 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -40,7 +40,7 @@ #include <asm/processor-flags.h> #include <asm/ftrace.h> #include <asm/irq_vectors.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/smap.h> @@ -287,14 +287,64 @@ need_resched: END(resume_kernel) #endif - # SYSENTER call handler stub +GLOBAL(__begin_SYSENTER_singlestep_region) +/* + * All code from here through __end_SYSENTER_singlestep_region is subject + * to being single-stepped if a user program sets TF and executes SYSENTER. + * There is absolutely nothing that we can do to prevent this from happening + * (thanks Intel!). To keep our handling of this situation as simple as + * possible, we handle TF just like AC and NT, except that our #DB handler + * will ignore all of the single-step traps generated in this range. + */ + +#ifdef CONFIG_XEN +/* + * Xen doesn't set %esp to be precisely what the normal SYSENTER + * entry point expects, so fix it up before using the normal path. + */ +ENTRY(xen_sysenter_target) + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp +#endif + +/* + * 32-bit SYSENTER entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * if X86_FEATURE_SEP is available. This is the preferred system call + * entry on 32-bit systems. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). + * SYSENTER does not save anything on the stack, + * and does not save old EIP (!!!), ESP, or EFLAGS. + * + * To avoid losing track of EFLAGS.VM (and thus potentially corrupting + * user and/or vm86 state), we explicitly disable the SYSENTER + * instruction in vm86 mode by reprogramming the MSRs. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp user stack + * 0(%ebp) arg6 + */ ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: pushl $__USER_DS /* pt_regs->ss */ pushl %ebp /* pt_regs->sp (stashed in bp) */ pushfl /* pt_regs->flags (except IF = 0) */ - ASM_CLAC /* Clear AC after saving FLAGS */ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ pushl $__USER_CS /* pt_regs->cs */ pushl $0 /* pt_regs->ip = 0 (placeholder) */ @@ -302,6 +352,29 @@ sysenter_past_esp: SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* + * SYSENTER doesn't filter flags, so we need to clear NT, AC + * and TF ourselves. To save a few cycles, we can check whether + * either was set instead of doing an unconditional popfq. + * This needs to happen before enabling interrupts so that + * we don't get preempted with NT set. + * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * + * NB.: .Lsysenter_fix_flags is a label with the code under it moved + * out-of-line as an optimization: NT is unlikely to be set in the + * majority of the cases and instead of polluting the I$ unnecessarily, + * we're keeping that code behind a branch which will predict as + * not-taken and therefore its instructions won't be fetched. + */ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. */ @@ -327,6 +400,15 @@ sysenter_past_esp: popl %eax /* pt_regs->ax */ /* + * Restore all flags except IF. (We restore IF separately because + * STI gives a one-instruction window in which we won't be interrupted, + * whereas POPF does not.) + */ + addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ + btr $X86_EFLAGS_IF_BIT, (%esp) + popfl + + /* * Return back to the vDSO, which will pop ecx and edx. * Don't bother with DS and ES (they already contain __USER_DS). */ @@ -339,28 +421,63 @@ sysenter_past_esp: .popsection _ASM_EXTABLE(1b, 2b) PTGS_TO_GS_EX + +.Lsysenter_fix_flags: + pushl $X86_EFLAGS_FIXED + popfl + jmp .Lsysenter_flags_fixed +GLOBAL(__end_SYSENTER_singlestep_region) ENDPROC(entry_SYSENTER_32) - # system call handler stub +/* + * 32-bit legacy system call entry. + * + * 32-bit x86 Linux system calls traditionally used the INT $0x80 + * instruction. INT $0x80 lands here. + * + * This entry point can be used by any 32-bit perform system calls. + * Instances of INT $0x80 can be found inline in various programs and + * libraries. It is also used by the vDSO's __kernel_vsyscall + * fallback for hardware that doesn't support a faster entry method. + * Restarted 32-bit system calls also fall back to INT $0x80 + * regardless of what instruction was originally used to do the system + * call. (64-bit programs can use INT $0x80 as well, but they can + * only run on 64-bit kernels and therefore land in + * entry_INT80_compat.) + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp arg6 + */ ENTRY(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* - * User mode is traced as though IRQs are on. Unlike the 64-bit - * case, INT80 is a trap gate on 32-bit kernels, so interrupts - * are already on (unless user code is messing around with iopl). + * User mode is traced as though IRQs are on, and the interrupt gate + * turned them off. */ + TRACE_IRQS_OFF movl %esp, %eax - call do_syscall_32_irqs_on + call do_int80_syscall_32 .Lsyscall_32_done: restore_all: TRACE_IRQS_IRET restore_all_notrace: #ifdef CONFIG_X86_ESPFIX32 + ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS /* * Warning: PT_OLDSS(%esp) contains the wrong/random values if we @@ -387,19 +504,6 @@ ENTRY(iret_exc ) #ifdef CONFIG_X86_ESPFIX32 ldt_ss: -#ifdef CONFIG_PARAVIRT - /* - * The kernel can't run on a non-flat stack if paravirt mode - * is active. Rather than try to fixup the high bits of - * ESP, bypass this code entirely. This may break DOSemu - * and/or Wine support in a paravirt VM, although the option - * is still available to implement the setting of the high - * 16-bits in the INTERRUPT_RETURN paravirt-op. - */ - cmpl $0, pv_info+PARAVIRT_enabled - jne restore_nocheck -#endif - /* * Setup and switch to ESPFIX stack * @@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug) END(spurious_interrupt_bug) #ifdef CONFIG_XEN -/* - * Xen doesn't set %esp to be precisely what the normal SYSENTER - * entry point expects, so fix it up before using the normal path. - */ -ENTRY(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp sysenter_past_esp - ENTRY(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL @@ -939,51 +1035,48 @@ error_code: jmp ret_from_exception END(page_fault) -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -.macro FIX_STACK offset ok label - cmpw $__KERNEL_CS, 4(%esp) - jne \ok -\label: - movl TSS_sysenter_sp0 + \offset(%esp), %esp - pushfl - pushl $__KERNEL_CS - pushl $sysenter_past_esp -.endm - ENTRY(debug) + /* + * #DB can happen at the first instruction of + * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this + * happens, then we will be running on a very small stack. We + * need to detect this condition and switch to the thread + * stack before calling any C code at all. + * + * If you edit this code, keep in mind that NMIs can happen in here. + */ ASM_CLAC - cmpl $entry_SYSENTER_32, (%esp) - jne debug_stack_correct - FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn -debug_stack_correct: pushl $-1 # mark this as an int SAVE_ALL - TRACE_IRQS_OFF xorl %edx, %edx # error code 0 movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ + PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Ldebug_from_sysenter_stack + + TRACE_IRQS_OFF + call do_debug + jmp ret_from_exception + +.Ldebug_from_sysenter_stack: + /* We're on the SYSENTER stack. Switch off. */ + movl %esp, %ebp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + TRACE_IRQS_OFF call do_debug + movl %ebp, %esp jmp ret_from_exception END(debug) /* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. + * NMI is doubly nasty. It can happen on the first instruction of + * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning + * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 + * switched stacks. We handle both conditions by simply checking whether we + * interrupted kernel code running on the SYSENTER stack. */ ENTRY(nmi) ASM_CLAC @@ -994,41 +1087,32 @@ ENTRY(nmi) popl %eax je nmi_espfix_stack #endif - cmpl $entry_SYSENTER_32, (%esp) - je nmi_stack_fixup - pushl %eax - movl %esp, %eax - /* - * Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1), %eax - cmpl $(THREAD_SIZE-20), %eax - popl %eax - jae nmi_stack_correct - cmpl $entry_SYSENTER_32, 12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - pushl %eax + + pushl %eax # pt_regs->orig_ax SAVE_ALL xorl %edx, %edx # zero error code movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ + PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Lnmi_from_sysenter_stack + + /* Not on SYSENTER stack. */ call do_nmi jmp restore_all_notrace -nmi_stack_fixup: - FIX_STACK 12, nmi_stack_correct, 1 - jmp nmi_stack_correct - -nmi_debug_stack_check: - cmpw $__KERNEL_CS, 16(%esp) - jne nmi_stack_correct - cmpl $debug, (%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn, (%esp) - ja nmi_stack_correct - FIX_STACK 24, nmi_stack_correct, 1 - jmp nmi_stack_correct +.Lnmi_from_sysenter_stack: + /* + * We're on the SYSENTER stack. Switch off. No one (not even debug) + * is using the thread stack right now, so it's safe for us to use it. + */ + movl %esp, %ebp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + call do_nmi + movl %ebp, %esp + jmp restore_all_notrace #ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9d34d3c..858b555 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64) /* * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * + * This is the only entry point used for 64-bit system calls. The + * hardware interface is reasonably well designed and the register to + * argument mapping Linux uses fits well with the registers that are + * available when SYSCALL is used. + * + * SYSCALL instructions can be found inlined in libc implementations as + * well as some other programs and libraries. There are also a handful + * of SYSCALL instructions in the vDSO used, for example, as a + * clock_gettimeofday fallback. + * * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, * then loads new ss, cs, and rip from previously programmed MSRs. * rflags gets masked by a value from another MSR (so CLD and CLAC @@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + TRACE_IRQS_OFF + /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ - /* - * Re-enable interrupts. - * We use 'rsp_scratch' as a scratch space, hence irq-off block above - * must execute atomically in the face of possible interrupt-driven - * task preemption. We must enable interrupts only after we're done - * with using rsp_scratch: - */ - ENABLE_INTERRUPTS(CLBR_NONE) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ @@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz tracesys + /* + * If we need to do entry work or if we guess we'll need to do + * exit work, go straight to the slow path. + */ + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz entry_SYSCALL64_slow_path + entry_SYSCALL_64_fastpath: + /* + * Easy case: enable interrupts and issue the syscall. If the syscall + * needs pt_regs, we'll call a stub that disables interrupts again + * and jumps to the slow path. + */ + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) #if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max, %rax #else @@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath: #endif ja 1f /* return -ENOSYS (already in pt_regs->ax) */ movq %r10, %rcx + + /* + * This call instruction is handled specially in stub_ptregs_64. + * It might end up jumping to the slow path. If it jumps, RAX + * and all argument registers are clobbered. + */ call *sys_call_table(, %rax, 8) +.Lentry_SYSCALL_64_after_fastpath_call: + movq %rax, RAX(%rsp) 1: -/* - * Syscall return path ending with SYSRET (fast path). - * Has incompletely filled pt_regs. - */ - LOCKDEP_SYS_EXIT - /* - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. - */ - DISABLE_INTERRUPTS(CLBR_NONE) /* - * We must check ti flags with interrupts (or at least preemption) - * off because we must *never* return to userspace without - * processing exit work that is enqueued if we're preempted here. - * In particular, returning to userspace with any of the one-shot - * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is - * very bad. + * If we get here, then we know that pt_regs is clean for SYSRET64. + * If we see that no exit work is required (which we are required + * to check with IRQs off), then we can go straight to SYSRET64. */ + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ + jnz 1f - RESTORE_C_REGS_EXCEPT_RCX_R11 + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON /* user mode is traced as IRQs on */ movq RIP(%rsp), %rcx movq EFLAGS(%rsp), %r11 + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp - /* - * 64-bit SYSRET restores rip from rcx, - * rflags from r11 (but RF and VM bits are forced to 0), - * cs and ss are loaded from MSRs. - * Restoration of rflags re-enables interrupts. - * - * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss - * descriptor is not reinitialized. This means that we should - * avoid SYSRET with SS == NULL, which could happen if we schedule, - * exit the kernel, and re-enter using an interrupt vector. (All - * interrupt entries on x86_64 set SS to NULL.) We prevent that - * from happening by reloading SS in __switch_to. (Actually - * detecting the failure in 64-bit userspace is tricky but can be - * done.) - */ USERGS_SYSRET64 -GLOBAL(int_ret_from_sys_call_irqs_off) +1: + /* + * The fast path looked good when we started, but something changed + * along the way and we need to switch to the slow path. Calling + * raise(3) will trigger this, for example. IRQs are off. + */ TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - jmp int_ret_from_sys_call - - /* Do syscall entry tracing */ -tracesys: - movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - call syscall_trace_enter_phase1 - test %rax, %rax - jnz tracesys_phase2 /* if needed, run the slow path */ - RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ - movq ORIG_RAX(%rsp), %rax - jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ - -tracesys_phase2: SAVE_EXTRA_REGS movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - movq %rax, %rdx - call syscall_trace_enter_phase2 - - /* - * Reload registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_entry_phase2() returned - * the value it wants us to use in the table lookup. - */ - RESTORE_C_REGS_EXCEPT_RAX - RESTORE_EXTRA_REGS -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx /* fixup for C */ - call *sys_call_table(, %rax, 8) - movq %rax, RAX(%rsp) -1: - /* Use IRET because user could have changed pt_regs->foo */ + call syscall_return_slowpath /* returns with IRQs disabled */ + jmp return_from_SYSCALL_64 -/* - * Syscall return path ending with IRET. - * Has correct iret frame. - */ -GLOBAL(int_ret_from_sys_call) +entry_SYSCALL64_slow_path: + /* IRQs are off. */ SAVE_EXTRA_REGS movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ + call do_syscall_64 /* returns with IRQs disabled */ + +return_from_SYSCALL_64: RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -355,83 +324,45 @@ opportunistic_sysret_failed: jmp restore_c_regs_and_iret END(entry_SYSCALL_64) +ENTRY(stub_ptregs_64) + /* + * Syscalls marked as needing ptregs land here. + * If we are on the fast path, we need to save the extra regs, + * which we achieve by trying again on the slow path. If we are on + * the slow path, the extra regs are already saved. + * + * RAX stores a pointer to the C function implementing the syscall. + * IRQs are on. + */ + cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) + jne 1f - .macro FORK_LIKE func -ENTRY(stub_\func) - SAVE_EXTRA_REGS 8 - jmp sys_\func -END(stub_\func) - .endm - - FORK_LIKE clone - FORK_LIKE fork - FORK_LIKE vfork - -ENTRY(stub_execve) - call sys_execve -return_from_execve: - testl %eax, %eax - jz 1f - /* exec failed, can use fast SYSRET code path in this case */ - ret -1: - /* must use IRET code path (pt_regs->cs may have changed) */ - addq $8, %rsp - ZERO_EXTRA_REGS - movq %rax, RAX(%rsp) - jmp int_ret_from_sys_call -END(stub_execve) -/* - * Remaining execve stubs are only 7 bytes long. - * ENTRY() often aligns to 16 bytes, which in this case has no benefits. - */ - .align 8 -GLOBAL(stub_execveat) - call sys_execveat - jmp return_from_execve -END(stub_execveat) - -#if defined(CONFIG_X86_X32_ABI) - .align 8 -GLOBAL(stub_x32_execve) - call compat_sys_execve - jmp return_from_execve -END(stub_x32_execve) - .align 8 -GLOBAL(stub_x32_execveat) - call compat_sys_execveat - jmp return_from_execve -END(stub_x32_execveat) -#endif - -/* - * sigreturn is special because it needs to restore all registers on return. - * This cannot be done with SYSRET, so use the IRET return path instead. - */ -ENTRY(stub_rt_sigreturn) /* - * SAVE_EXTRA_REGS result is not normally needed: - * sigreturn overwrites all pt_regs->GPREGS. - * But sigreturn can fail (!), and there is no easy way to detect that. - * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error, - * we SAVE_EXTRA_REGS here. + * Called from fast path -- disable IRQs again, pop return address + * and jump to slow path */ - SAVE_EXTRA_REGS 8 - call sys_rt_sigreturn -return_from_stub: - addq $8, %rsp - RESTORE_EXTRA_REGS - movq %rax, RAX(%rsp) - jmp int_ret_from_sys_call -END(stub_rt_sigreturn) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + popq %rax + jmp entry_SYSCALL64_slow_path -#ifdef CONFIG_X86_X32_ABI -ENTRY(stub_x32_rt_sigreturn) - SAVE_EXTRA_REGS 8 - call sys32_x32_rt_sigreturn - jmp return_from_stub -END(stub_x32_rt_sigreturn) -#endif +1: + /* Called from C */ + jmp *%rax /* called from C */ +END(stub_ptregs_64) + +.macro ptregs_stub func +ENTRY(ptregs_\func) + leaq \func(%rip), %rax + jmp stub_ptregs_64 +END(ptregs_\func) +.endm + +/* Instantiate ptregs_stub for each ptregs-using syscall */ +#define __SYSCALL_64_QUAL_(sym) +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym +#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) +#include <asm/syscalls_64.h> /* * A newly forked process directly context switches into this address. @@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn) * rdi: prev task we switched from */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) pushq $0x0002 @@ -447,28 +377,32 @@ ENTRY(ret_from_fork) call schedule_tail /* rdi: 'prev' task parameter */ - RESTORE_EXTRA_REGS - testb $3, CS(%rsp) /* from kernel_thread? */ + jnz 1f /* - * By the time we get here, we have no idea whether our pt_regs, - * ti flags, and ti status came from the 64-bit SYSCALL fast path, - * the slow path, or one of the 32-bit compat paths. - * Use IRET code path to return, since it can safely handle - * all of the above. + * We came from kernel_thread. This code path is quite twisted, and + * someone should clean it up. + * + * copy_thread_tls stashes the function pointer in RBX and the + * parameter to be passed in RBP. The called function is permitted + * to call do_execve and thereby jump to user mode. */ - jnz int_ret_from_sys_call + movq RBP(%rsp), %rdi + call *RBX(%rsp) + movl $0, RAX(%rsp) /* - * We came from kernel_thread - * nb: we depend on RESTORE_EXTRA_REGS above + * Fall through as though we're exiting a syscall. This makes a + * twisted sort of sense if we just called do_execve. */ - movq %rbp, %rdi - call *%rbx - movl $0, RAX(%rsp) - RESTORE_EXTRA_REGS - jmp int_ret_from_sys_call + +1: + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWAPGS + jmp restore_regs_and_iret END(ret_from_fork) /* diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 3c990ee..847f2f0 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -19,12 +19,21 @@ .section .entry.text, "ax" /* - * 32-bit SYSENTER instruction entry. + * 32-bit SYSENTER entry. * - * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. - * IF and VM in rflags are cleared (IOW: interrupts are off). + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on Intel CPUs. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). * SYSENTER does not save anything on the stack, - * and does not save old rip (!!!) and rflags. + * and does not save old RIP (!!!), RSP, or RFLAGS. * * Arguments: * eax system call number @@ -35,10 +44,6 @@ * edi arg5 * ebp user stack * 0(%ebp) arg6 - * - * This is purely a fast path. For anything complicated we use the int 0x80 - * path below. We set up a complete hardware stack frame to share code - * with the int 0x80 path. */ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ @@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat) */ pushfq /* pt_regs->flags (except IF = 0) */ orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ - ASM_CLAC /* Clear AC after saving FLAGS */ - pushq $__USER32_CS /* pt_regs->cs */ xorq %r8,%r8 pushq %r8 /* pt_regs->ip = 0 (placeholder) */ @@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat) cld /* - * Sysenter doesn't filter flags, so we need to clear NT + * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether - * NT was set instead of doing an unconditional popfq. + * either was set instead of doing an unconditional popfq. * This needs to happen before enabling interrupts so that * we don't get preempted with NT set. * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * * NB.: .Lsysenter_fix_flags is a label with the code under it moved * out-of-line as an optimization: NT is unlikely to be set in the * majority of the cases and instead of polluting the I$ unnecessarily, * we're keeping that code behind a branch which will predict as * not-taken and therefore its instructions won't be fetched. */ - testl $X86_EFLAGS_NT, EFLAGS(%rsp) + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: @@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat) pushq $X86_EFLAGS_FIXED popfq jmp .Lsysenter_flags_fixed +GLOBAL(__end_entry_SYSENTER_compat) ENDPROC(entry_SYSENTER_compat) /* - * 32-bit SYSCALL instruction entry. + * 32-bit SYSCALL entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on AMD CPUs. + * + * The SYSCALL instruction, in principle, should *only* occur in the + * vDSO. In practice, it appears that this really is the case. + * As evidence: + * + * - The calling convention for SYSCALL has changed several times without + * anyone noticing. * - * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, - * then loads new ss, cs, and rip from previously programmed MSRs. - * rflags gets masked by a value from another MSR (so CLD and CLAC - * are not needed). SYSCALL does not save anything on the stack - * and does not change rsp. + * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything + * user task that did SYSCALL without immediately reloading SS + * would randomly crash. * - * Note: rflags saving+masking-with-MSR happens only in Long mode + * - Most programmers do not directly target AMD CPUs, and the 32-bit + * SYSCALL instruction does not exist on Intel CPUs. Even on AMD + * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels + * because the SYSCALL instruction in legacy/native 32-bit mode (as + * opposed to compat mode) is sufficiently poorly designed as to be + * essentially unusable. + * + * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves + * RFLAGS to R11, then loads new SS, CS, and RIP from previously + * programmed MSRs. RFLAGS gets masked by a value from another MSR + * (so CLD and CLAC are not needed). SYSCALL does not save anything on + * the stack and does not change RSP. + * + * Note: RFLAGS saving+masking-with-MSR happens only in Long mode * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). - * Don't get confused: rflags saving+masking depends on Long Mode Active bit + * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). * @@ -236,7 +267,21 @@ sysret32_from_system_call: END(entry_SYSCALL_compat) /* - * Emulated IA32 system calls via int 0x80. + * 32-bit legacy system call entry. + * + * 32-bit x86 Linux system calls traditionally used the INT $0x80 + * instruction. INT $0x80 lands here. + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. * * Arguments: * eax system call number @@ -245,17 +290,8 @@ END(entry_SYSCALL_compat) * edx arg3 * esi arg4 * edi arg5 - * ebp arg6 (note: not saved in the stack frame, should not be touched) - * - * Notes: - * Uses the same stack frame as the x86-64 version. - * All registers except eax must be saved (but ptrace may violate that). - * Arguments are zero extended. For system calls that want sign extension and - * take long arguments a wrapper is needed. Most calls can just be called - * directly. - * Assumes it is only called from user space and entered with interrupts off. + * ebp arg6 */ - ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. @@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_OFF movq %rsp, %rdi - call do_syscall_32_irqs_off + call do_int80_syscall_32 .Lsyscall_32_done: /* Go back to user mode. */ diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 9a66498..8f895ee 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -6,17 +6,11 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> -#ifdef CONFIG_IA32_EMULATION -#define SYM(sym, compat) compat -#else -#define SYM(sym, compat) sym -#endif - -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 -#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), +#define __SYSCALL_I386(nr, sym, qual) [nr] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 41283d2..9dbc5ab 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,19 +6,14 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> -#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) +#define __SYSCALL_64_QUAL_(sym) sym +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif - -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include <asm/syscalls_64.h> #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, compat) [nr] = sym, +#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index dc1040a..2e5b565 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -21,7 +21,7 @@ 12 common brk sys_brk 13 64 rt_sigaction sys_rt_sigaction 14 common rt_sigprocmask sys_rt_sigprocmask -15 64 rt_sigreturn stub_rt_sigreturn +15 64 rt_sigreturn sys_rt_sigreturn/ptregs 16 64 ioctl sys_ioctl 17 common pread64 sys_pread64 18 common pwrite64 sys_pwrite64 @@ -62,10 +62,10 @@ 53 common socketpair sys_socketpair 54 64 setsockopt sys_setsockopt 55 64 getsockopt sys_getsockopt -56 common clone stub_clone -57 common fork stub_fork -58 common vfork stub_vfork -59 64 execve stub_execve +56 common clone sys_clone/ptregs +57 common fork sys_fork/ptregs +58 common vfork sys_vfork/ptregs +59 64 execve sys_execve/ptregs 60 common exit sys_exit 61 common wait4 sys_wait4 62 common kill sys_kill @@ -178,7 +178,7 @@ 169 common reboot sys_reboot 170 common sethostname sys_sethostname 171 common setdomainname sys_setdomainname -172 common iopl sys_iopl +172 common iopl sys_iopl/ptregs 173 common ioperm sys_ioperm 174 64 create_module 175 common init_module sys_init_module @@ -328,7 +328,7 @@ 319 common memfd_create sys_memfd_create 320 common kexec_file_load sys_kexec_file_load 321 common bpf sys_bpf -322 64 execveat stub_execveat +322 64 execveat sys_execveat/ptregs 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier 325 common mlock2 sys_mlock2 @@ -339,14 +339,14 @@ # for native 64-bit operation. # 512 x32 rt_sigaction compat_sys_rt_sigaction -513 x32 rt_sigreturn stub_x32_rt_sigreturn +513 x32 rt_sigreturn sys32_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl 515 x32 readv compat_sys_readv 516 x32 writev compat_sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg -520 x32 execve stub_x32_execve +520 x32 execve compat_sys_execve/ptregs 521 x32 ptrace compat_sys_ptrace 522 x32 rt_sigpending compat_sys_rt_sigpending 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait @@ -371,4 +371,4 @@ 542 x32 getsockopt compat_sys_getsockopt 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit -545 x32 execveat stub_x32_execveat +545 x32 execveat compat_sys_execveat/ptregs diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 0e7f8ec..cd3d301 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -3,13 +3,63 @@ in="$1" out="$2" +syscall_macro() { + abi="$1" + nr="$2" + entry="$3" + + # Entry can be either just a function name or "function/qualifier" + real_entry="${entry%%/*}" + qualifier="${entry:${#real_entry}}" # Strip the function name + qualifier="${qualifier:1}" # Strip the slash, if any + + echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)" +} + +emit() { + abi="$1" + nr="$2" + entry="$3" + compat="$4" + + if [ "$abi" == "64" -a -n "$compat" ]; then + echo "a compat entry for a 64-bit syscall makes no sense" >&2 + exit 1 + fi + + if [ -z "$compat" ]; then + if [ -n "$entry" ]; then + syscall_macro "$abi" "$nr" "$entry" + fi + else + echo "#ifdef CONFIG_X86_32" + if [ -n "$entry" ]; then + syscall_macro "$abi" "$nr" "$entry" + fi + echo "#else" + syscall_macro "$abi" "$nr" "$compat" + echo "#endif" + fi +} + grep '^[0-9]' "$in" | sort -n | ( while read nr abi name entry compat; do abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` - if [ -n "$compat" ]; then - echo "__SYSCALL_${abi}($nr, $entry, $compat)" - elif [ -n "$entry" ]; then - echo "__SYSCALL_${abi}($nr, $entry, $entry)" + if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then + # COMMON is the same as 64, except that we don't expect X32 + # programs to use it. Our expectation has nothing to do with + # any generated code, so treat them the same. + emit 64 "$nr" "$entry" "$compat" + elif [ "$abi" == "X32" ]; then + # X32 is equivalent to 64 on an X32-compatible kernel. + echo "#ifdef CONFIG_X86_X32_ABI" + emit 64 "$nr" "$entry" "$compat" + echo "#endif" + elif [ "$abi" == "I386" ]; then + emit "$abi" "$nr" "$entry" "$compat" + else + echo "Unknown abi $abi" >&2 + exit 1 fi done ) > "$out" diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 0224987..63a03bb 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "#include <asm/vdso.h>\n"); fprintf(outfile, "\n"); fprintf(outfile, - "static unsigned char raw_data[%lu] __page_aligned_data = {", + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); for (j = 0; j < stripped_len; j++) { if (j % 10 == 0) @@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, } fprintf(outfile, "\n};\n\n"); - fprintf(outfile, "static struct page *pages[%lu];\n\n", - mapping_size / 4096); - fprintf(outfile, "const struct vdso_image %s = {\n", name); fprintf(outfile, "\t.data = raw_data,\n"); fprintf(outfile, "\t.size = %lu,\n", mapping_size); - fprintf(outfile, "\t.text_mapping = {\n"); - fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); - fprintf(outfile, "\t\t.pages = pages,\n"); - fprintf(outfile, "\t},\n"); if (alt_sec) { fprintf(outfile, "\t.alt = %lu,\n", (unsigned long)GET_LE(&alt_sec->sh_offset)); diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 08a317a..7853b53 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/mm_types.h> -#include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/vdso.h> diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 3a1d929..0109ac6 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -3,7 +3,7 @@ */ #include <asm/dwarf2.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index b8f69e2..10f7045 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -20,6 +20,7 @@ #include <asm/page.h> #include <asm/hpet.h> #include <asm/desc.h> +#include <asm/cpufeature.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; @@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1; void __init init_vdso_image(const struct vdso_image *image) { - int i; - int npages = (image->size) / PAGE_SIZE; - BUG_ON(image->size % PAGE_SIZE != 0); - for (i = 0; i < npages; i++) - image->text_mapping.pages[i] = - virt_to_page(image->data + i*PAGE_SIZE); apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + @@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) #endif } +static int vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + + if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); + get_page(vmf->page); + return 0; +} + +static const struct vm_special_mapping text_mapping = { + .name = "[vdso]", + .fault = vdso_fault, +}; + +static int vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + long sym_offset; + int ret = -EFAULT; + + if (!image) + return VM_FAULT_SIGBUS; + + sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + + image->sym_vvar_start; + + /* + * Sanity check: a symbol offset of zero means that the page + * does not exist for this vdso image, not that the page is at + * offset zero relative to the text mapping. This should be + * impossible here, because sym_offset should only be zero for + * the page past the end of the vvar mapping. + */ + if (sym_offset == 0) + return VM_FAULT_SIGBUS; + + if (sym_offset == image->sym_vvar_page) { + ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, + __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_hpet_page) { +#ifdef CONFIG_HPET_TIMER + if (hpet_address && vclock_was_used(VCLOCK_HPET)) { + ret = vm_insert_pfn_prot( + vma, + (unsigned long)vmf->virtual_address, + hpet_address >> PAGE_SHIFT, + pgprot_noncached(PAGE_READONLY)); + } +#endif + } else if (sym_offset == image->sym_pvclock_page) { + struct pvclock_vsyscall_time_info *pvti = + pvclock_pvti_cpu0_va(); + if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { + ret = vm_insert_pfn( + vma, + (unsigned long)vmf->virtual_address, + __pa(pvti) >> PAGE_SHIFT); + } + } + + if (ret == 0 || ret == -EBUSY) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; +} + static int map_vdso(const struct vdso_image *image, bool calculate_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long addr, text_start; int ret = 0; - static struct page *no_pages[] = {NULL}; - static struct vm_special_mapping vvar_mapping = { + static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", - .pages = no_pages, + .fault = vvar_fault, }; - struct pvclock_vsyscall_time_info *pvti; if (calculate_addr) { addr = vdso_addr(current->mm->start_stack, @@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) text_start = addr - image->sym_vvar_start; current->mm->context.vdso = (void __user *)text_start; + current->mm->context.vdso_image = image; /* * MAYWRITE to allow gdb to COW and set breakpoints @@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) image->size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &image->text_mapping); + &text_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); @@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) vma = _install_special_mapping(mm, addr, -image->sym_vvar_start, - VM_READ|VM_MAYREAD, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, &vvar_mapping); if (IS_ERR(vma)) { @@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) goto up_fail; } - if (image->sym_vvar_page) - ret = remap_pfn_range(vma, - text_start + image->sym_vvar_page, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT, - PAGE_SIZE, - PAGE_READONLY); - - if (ret) - goto up_fail; - -#ifdef CONFIG_HPET_TIMER - if (hpet_address && image->sym_hpet_page) { - ret = io_remap_pfn_range(vma, - text_start + image->sym_hpet_page, - hpet_address >> PAGE_SHIFT, - PAGE_SIZE, - pgprot_noncached(PAGE_READONLY)); - - if (ret) - goto up_fail; - } -#endif - - pvti = pvclock_pvti_cpu0_va(); - if (pvti && image->sym_pvclock_page) { - ret = remap_pfn_range(vma, - text_start + image->sym_pvclock_page, - __pa(pvti) >> PAGE_SHIFT, - PAGE_SIZE, - PAGE_READONLY); - - if (ret) - goto up_fail; - } - up_fail: if (ret) current->mm->context.vdso = NULL; @@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg) #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) + if (static_cpu_has(X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c index 51e3304..0fb3a10 100644 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c @@ -16,6 +16,8 @@ #include <asm/vgtod.h> #include <asm/vvar.h> +int vclocks_used __read_mostly; + DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); void update_vsyscall_tz(void) @@ -26,12 +28,17 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timekeeper *tk) { + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->vclock_mode = vclock_mode; vdata->cycle_last = tk->tkr_mono.cycle_last; vdata->mask = tk->tkr_mono.mask; vdata->mult = tk->tkr_mono.mult; diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile new file mode 100644 index 0000000..fdfea15 --- /dev/null +++ b/arch/x86/events/Makefile @@ -0,0 +1,13 @@ +obj-y += core.o + +obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o +obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o +endif +obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c index 5861053..049ada8d 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/events/amd/core.c @@ -5,7 +5,7 @@ #include <linux/slab.h> #include <asm/apicdef.h> -#include "perf_event.h" +#include "../perf_event.h" static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c index 989d3c2..51087c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -14,7 +14,7 @@ #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" static u32 ibs_caps; @@ -670,7 +670,7 @@ static __init int perf_event_ibs_init(void) perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); - printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); + pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); return 0; } @@ -774,14 +774,14 @@ static int setup_ibs_ctl(int ibs_eilvt_off) pci_read_config_dword(cpu_cfg, IBSCTL, &value); if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); + pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", + value); return -EINVAL; } } while (1); if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); + pr_debug("No CPU node configured for IBS\n"); return -ENODEV; } @@ -810,7 +810,7 @@ static void force_ibs_eilvt_setup(void) preempt_enable(); if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); + pr_debug("No EILVT entry available\n"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c index 97242a9..635e5eb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,8 +16,8 @@ #include <linux/cpumask.h> #include <linux/slab.h> -#include "perf_event.h" -#include "perf_event_amd_iommu.h" +#include "../perf_event.h" +#include "iommu.h" #define COUNTER_SHIFT 16 diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h index 845d173..845d173 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h +++ b/arch/x86/events/amd/iommu.h diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c index 8836fc9..3db9569 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -538,7 +538,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_nb; - printk(KERN_INFO "perf: AMD NB counters detected\n"); + pr_info("perf: AMD NB counters detected\n"); ret = 0; } @@ -552,7 +552,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_l2; - printk(KERN_INFO "perf: AMD L2I counters detected\n"); + pr_info("perf: AMD L2I counters detected\n"); ret = 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c index d276b31..9b6ad08 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/events/core.c @@ -254,15 +254,16 @@ static bool check_hw_exists(void) * We still allow the PMU driver to operate: */ if (bios_fail) { - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + pr_cont("Broken BIOS detected, complain to your hardware vendor.\n"); + pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", + reg_fail, val_fail); } return true; msr_fail: - printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); @@ -596,6 +597,19 @@ void x86_pmu_disable_all(void) } } +/* + * There may be PMI landing after enabled=0. The PMI hitting could be before or + * after disable_all. + * + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. + * It will not be re-enabled in the NMI handler again, because enabled=0. After + * handling the NMI, disable_all will be called, which will not change the + * state either. If PMI hits after disable_all, the PMU is already disabled + * before entering NMI handler. The NMI handler will not change the state + * either. + * + * So either situation is harmless. + */ static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c index 2cad71d..b99dc92 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/events/intel/bts.c @@ -26,7 +26,7 @@ #include <asm-generic/sizes.h> #include <asm/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" struct bts_ctx { struct perf_output_handle handle; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c index fed2ab1..68fa55b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/events/intel/core.c @@ -18,7 +18,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Intel PerfMon, used on Core and later. @@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs }; /* - * Use from PMIs where the LBRs are already disabled. + * Used from PMIs where the LBRs are already disabled. + * + * This function could be called consecutively. It is required to remain in + * disabled state if called consecutively. + * + * During consecutive calls, the same disable value will be written to related + * registers, so the PMU state remains unchanged. hw.state in + * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive + * calls. */ static void __intel_pmu_disable_all(void) { @@ -1884,6 +1892,16 @@ again: if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); + /* + * There are cases where, even though, the PEBS ovfl bit is set + * in GLOBAL_OVF_STATUS, the PEBS events may also have their + * overflow bits set for their counters. We must clear them + * here because they have been processed as exact samples in + * the drain_pebs() routine. They must not be processed again + * in the for_each_bit_set() loop for regular samples below. + */ + status &= ~cpuc->pebs_enabled; + status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } /* @@ -1929,7 +1947,10 @@ again: goto again; done: - __intel_pmu_enable_all(0, true); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + __intel_pmu_enable_all(0, true); + /* * Only unmask the NMI after the overflow counters * have been reset. This avoids spurious NMIs on @@ -3396,6 +3417,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); pr_cont("Nehalem events, "); @@ -3459,6 +3481,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); pr_cont("Westmere events, "); break; @@ -3581,7 +3604,7 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_hsw(); x86_pmu.event_constraints = intel_bdw_event_constraints; - x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c index a316ca9..93cb412 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -7,7 +7,7 @@ #include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define MSR_IA32_PQR_ASSOC 0x0c8f #define MSR_IA32_QM_CTR 0x0c8e @@ -1244,15 +1244,12 @@ static struct pmu intel_cqm_pmu = { static inline void cqm_pick_event_reader(int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int reader; - for_each_cpu(i, &cqm_cpumask) { - if (phys_id == topology_physical_package_id(i)) - return; /* already got reader for this socket */ - } - - cpumask_set_cpu(cpu, &cqm_cpumask); + /* First online cpu in package becomes the reader */ + reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu)); + if (reader >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cqm_cpumask); } static void intel_cqm_cpu_starting(unsigned int cpu) @@ -1270,24 +1267,17 @@ static void intel_cqm_cpu_starting(unsigned int cpu) static void intel_cqm_cpu_exit(unsigned int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int target; - /* - * Is @cpu a designated cqm reader? - */ + /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) return; - for_each_online_cpu(i) { - if (i == cpu) - continue; + /* Find another online reader in this package */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - if (phys_id == topology_physical_package_id(i)) { - cpumask_set_cpu(i, &cqm_cpumask); - break; - } - } + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &cqm_cpumask); } static int intel_cqm_cpu_notifier(struct notifier_block *nb, diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c index 75a38b5..7946c42 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,7 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c index 10602f0..ce7211a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,7 +5,7 @@ #include <asm/perf_event.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -51,7 +51,8 @@ union intel_x86_pebs_dse { #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) -static const u64 pebs_data_source[] = { +/* Version for Sandy Bridge and later */ +static u64 pebs_data_source[] = { P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +/* Patch up minor differences in the bits */ +void __init intel_pmu_pebs_data_source_nhm(void) +{ + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -269,7 +278,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); + buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -286,7 +295,7 @@ static int alloc_pebs_buffer(int cpu) per_cpu(insn_buffer, cpu) = ibuffer; } - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; ds->pebs_index = ds->pebs_buffer_base; @@ -722,6 +731,30 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_bdw_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + + struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ @@ -1319,19 +1352,28 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; switch (format) { case 0: - printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); + pr_cont("PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + /* + * Using >PAGE_SIZE buffers makes the WRMSR to + * PERF_GLOBAL_CTRL in intel_pmu_enable_all() + * mysteriously hang on Core2. + * + * As a workaround, we don't do this. + */ + x86_pmu.pebs_buffer_size = PAGE_SIZE; x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; break; case 1: - printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); + pr_cont("PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; @@ -1351,7 +1393,7 @@ void __init intel_ds_init(void) break; default: - printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); + pr_cont("no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; } } diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c index 5b0c232..548d5f7 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,7 +5,7 @@ #include <asm/hardirq.h> -#include "perf_event.h" +#include "../perf_event.h" static const u64 knc_perfmon_event_map[] = { @@ -263,7 +263,9 @@ again: goto again; done: - knc_pmu_enable_all(0); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + knc_pmu_enable_all(0); return handled; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c index 653f88d..69dd118 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -5,7 +5,7 @@ #include <asm/msr.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" enum { LBR_FORMAT_32 = 0x00, diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c index f2e5678..0a5ede1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/events/intel/p4.c @@ -13,7 +13,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" #define P4_CNTR_LIMIT 3 /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c index 7c1a0c0..1f5c47a 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/events/intel/p6.c @@ -1,7 +1,7 @@ #include <linux/perf_event.h> #include <linux/types.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Not sure about some of these diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c index c0bbd10..6af7cf7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/events/intel/pt.c @@ -29,8 +29,8 @@ #include <asm/io.h> #include <asm/intel_pt.h> -#include "perf_event.h" -#include "intel_pt.h" +#include "../perf_event.h" +#include "pt.h" static DEFINE_PER_CPU(struct pt, pt_ctx); diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h index 336878a..336878a 100644 --- a/arch/x86/kernel/cpu/intel_pt.h +++ b/arch/x86/events/intel/pt.h diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c index 24a351a..b834a3f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -44,11 +44,14 @@ * the duration of the measurement. Tools may use a function such as * ldexp(raw_count, -32); */ + +#define pr_fmt(fmt) "RAPL PMU: " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" /* * RAPL energy status counters @@ -107,7 +110,7 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __rapl_##_var##_show, NULL) -#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ +#define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ static struct perf_pmu_events_attr event_attr_##v = { \ @@ -117,23 +120,33 @@ static struct perf_pmu_events_attr event_attr_##v = { \ }; struct rapl_pmu { - spinlock_t lock; - int n_active; /* number of active events */ - struct list_head active_list; - struct pmu *pmu; /* pointer to rapl_pmu_class */ - ktime_t timer_interval; /* in ktime_t unit */ - struct hrtimer hrtimer; + raw_spinlock_t lock; + int n_active; + int cpu; + struct list_head active_list; + struct pmu *pmu; + ktime_t timer_interval; + struct hrtimer hrtimer; }; -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */ -static struct pmu rapl_pmu_class; +struct rapl_pmus { + struct pmu pmu; + unsigned int maxpkg; + struct rapl_pmu *pmus[]; +}; + + /* 1/2^hw_unit Joule */ +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; +static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; -static int rapl_cntr_mask; +static unsigned int rapl_cntr_mask; +static u64 rapl_timer_ms; -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); +static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +{ + return rapl_pmus->pmus[topology_logical_package_id(cpu)]; +} -static struct x86_pmu_quirk *rapl_quirks; static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; @@ -141,19 +154,10 @@ static inline u64 rapl_read_counter(struct perf_event *event) return raw; } -#define rapl_add_quirk(func_) \ -do { \ - static struct x86_pmu_quirk __quirk __initdata = { \ - .func = func_, \ - }; \ - __quirk.next = rapl_quirks; \ - rapl_quirks = &__quirk; \ -} while (0) - static inline u64 rapl_scale(u64 v, int cfg) { if (cfg > NR_RAPL_DOMAINS) { - pr_warn("invalid domain %d, failed to scale data\n", cfg); + pr_warn("Invalid domain %d, failed to scale data\n", cfg); return v; } /* @@ -206,27 +210,21 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) HRTIMER_MODE_REL_PINNED); } -static void rapl_stop_hrtimer(struct rapl_pmu *pmu) -{ - hrtimer_cancel(&pmu->hrtimer); -} - static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); struct perf_event *event; unsigned long flags; if (!pmu->n_active) return HRTIMER_NORESTART; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); - list_for_each_entry(event, &pmu->active_list, active_entry) { + list_for_each_entry(event, &pmu->active_list, active_entry) rapl_event_update(event); - } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); hrtimer_forward_now(hrtimer, pmu->timer_interval); @@ -260,28 +258,28 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { WARN_ON_ONCE(pmu->n_active <= 0); pmu->n_active--; if (pmu->n_active == 0) - rapl_stop_hrtimer(pmu); + hrtimer_cancel(&pmu->hrtimer); list_del(&event->active_entry); @@ -299,23 +297,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); return 0; } @@ -329,15 +327,19 @@ static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; int bit, msr, ret = 0; + struct rapl_pmu *pmu; /* only look at RAPL events */ - if (event->attr.type != rapl_pmu_class.type) + if (event->attr.type != rapl_pmus->pmu.type) return -ENOENT; /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) return -EINVAL; + if (event->cpu < 0) + return -EINVAL; + /* * check event is known (determines counter) */ @@ -376,6 +378,9 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; /* must be done before validate_group */ + pmu = cpu_to_rapl_pmu(event->cpu); + event->cpu = pmu->cpu; + event->pmu_private = pmu; event->hw.event_base = msr; event->hw.config = cfg; event->hw.idx = bit; @@ -506,139 +511,62 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static struct pmu rapl_pmu_class = { - .attr_groups = rapl_attr_groups, - .task_ctx_nr = perf_invalid_context, /* system-wide only */ - .event_init = rapl_pmu_event_init, - .add = rapl_pmu_event_add, /* must have */ - .del = rapl_pmu_event_del, /* must have */ - .start = rapl_pmu_event_start, - .stop = rapl_pmu_event_stop, - .read = rapl_pmu_event_read, -}; - static void rapl_cpu_exit(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int i, phys_id = topology_physical_package_id(cpu); - int target = -1; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* find a new cpu on same package */ - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } - /* - * clear cpu from cpumask - * if was set in cpumask and still some cpu on package, - * then move to new cpu - */ - if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) - cpumask_set_cpu(target, &rapl_cpu_mask); + /* Check if exiting cpu is used for collecting rapl events */ + if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) + return; - WARN_ON(cpumask_empty(&rapl_cpu_mask)); - /* - * migrate events and context to new cpu - */ - if (target >= 0) - perf_pmu_migrate_context(pmu->pmu, cpu, target); + pmu->cpu = -1; + /* Find a new cpu to collect rapl events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* cancel overflow polling timer for CPU */ - rapl_stop_hrtimer(pmu); + /* Migrate rapl events to the new target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &rapl_cpu_mask); + pmu->cpu = target; + perf_pmu_migrate_context(pmu->pmu, cpu, target); + } } static void rapl_cpu_init(int cpu) { - int i, phys_id = topology_physical_package_id(cpu); + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* check if phys_is is already covered */ - for_each_cpu(i, &rapl_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } - /* was not found, so add it */ - cpumask_set_cpu(cpu, &rapl_cpu_mask); -} - -static __init void rapl_hsw_server_quirk(void) -{ /* - * DRAM domain on HSW server has fixed energy unit which can be - * different than the unit from power unit MSR. - * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 - * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + * Check if there is an online cpu in the package which collects rapl + * events already. */ - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; + + cpumask_set_cpu(cpu, &rapl_cpu_mask); + pmu->cpu = cpu; } static int rapl_cpu_prepare(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int phys_id = topology_physical_package_id(cpu); - u64 ms; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); if (pmu) return 0; - if (phys_id < 0) - return -1; - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); if (!pmu) - return -1; - spin_lock_init(&pmu->lock); + return -ENOMEM; + raw_spin_lock_init(&pmu->lock); INIT_LIST_HEAD(&pmu->active_list); - - pmu->pmu = &rapl_pmu_class; - - /* - * use reference of 200W for scaling the timeout - * to avoid missing counter overflows. - * 200W = 200 Joules/sec - * divide interval by 2 to avoid lockstep (2 * 100) - * if hw unit is 32, then we use 2 ms 1/200/2 - */ - if (rapl_hw_unit[0] < 32) - ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1)); - else - ms = 2; - - pmu->timer_interval = ms_to_ktime(ms); - + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + pmu->cpu = -1; rapl_hrtimer_init(pmu); - - /* set RAPL pmu for this cpu for now */ - per_cpu(rapl_pmu, cpu) = pmu; - per_cpu(rapl_pmu_to_free, cpu) = NULL; - - return 0; -} - -static void rapl_cpu_kfree(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); - - kfree(pmu); - - per_cpu(rapl_pmu_to_free, cpu) = NULL; -} - -static int rapl_cpu_dying(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - - if (!pmu) - return 0; - - per_cpu(rapl_pmu, cpu) = NULL; - - per_cpu(rapl_pmu_to_free, cpu) = pmu; - + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; return 0; } @@ -651,28 +579,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, case CPU_UP_PREPARE: rapl_cpu_prepare(cpu); break; - case CPU_STARTING: - rapl_cpu_init(cpu); - break; - case CPU_UP_CANCELED: - case CPU_DYING: - rapl_cpu_dying(cpu); - break; + + case CPU_DOWN_FAILED: case CPU_ONLINE: - case CPU_DEAD: - rapl_cpu_kfree(cpu); + rapl_cpu_init(cpu); break; + case CPU_DOWN_PREPARE: rapl_cpu_exit(cpu); break; - default: - break; } - return NOTIFY_OK; } -static int rapl_check_hw_unit(void) +static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; int i; @@ -683,28 +603,107 @@ static int rapl_check_hw_unit(void) for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + /* + * DRAM domain on HSW server and KNL has fixed energy unit which can be + * different than the unit from power unit MSR. See + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 + * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + */ + if (apply_quirk) + rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + + /* + * Calculate the timer rate: + * Use reference of 200W for scaling the timeout to avoid counter + * overflows. 200W = 200 Joules/sec + * Divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + rapl_timer_ms = 2; + if (rapl_hw_unit[0] < 32) { + rapl_timer_ms = (1000 / (2 * 100)); + rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + } + return 0; +} + +static void __init rapl_advertise(void) +{ + int i; + + pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", + hweight32(rapl_cntr_mask), rapl_timer_ms); + + for (i = 0; i < NR_RAPL_DOMAINS; i++) { + if (rapl_cntr_mask & (1 << i)) { + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_domain_names[i], rapl_hw_unit[i]); + } + } +} + +static int __init rapl_prepare_cpus(void) +{ + unsigned int cpu, pkg; + int ret; + + for_each_online_cpu(cpu) { + pkg = topology_logical_package_id(cpu); + if (rapl_pmus->pmus[pkg]) + continue; + + ret = rapl_cpu_prepare(cpu); + if (ret) + return ret; + rapl_cpu_init(cpu); + } + return 0; +} + +static void __init cleanup_rapl_pmus(void) +{ + int i; + + for (i = 0; i < rapl_pmus->maxpkg; i++) + kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus); +} + +static int __init init_rapl_pmus(void) +{ + int maxpkg = topology_max_packages(); + size_t size; + + size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); + rapl_pmus = kzalloc(size, GFP_KERNEL); + if (!rapl_pmus) + return -ENOMEM; + + rapl_pmus->maxpkg = maxpkg; + rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; + rapl_pmus->pmu.event_init = rapl_pmu_event_init; + rapl_pmus->pmu.add = rapl_pmu_event_add; + rapl_pmus->pmu.del = rapl_pmu_event_del; + rapl_pmus->pmu.start = rapl_pmu_event_start; + rapl_pmus->pmu.stop = rapl_pmu_event_stop; + rapl_pmus->pmu.read = rapl_pmu_event_read; return 0; } -static const struct x86_cpu_id rapl_cpu_match[] = { +static const struct x86_cpu_id rapl_cpu_match[] __initconst = { [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, [1] = {}, }; static int __init rapl_pmu_init(void) { - struct rapl_pmu *pmu; - int cpu, ret; - struct x86_pmu_quirk *quirk; - int i; + bool apply_quirk = false; + int ret; - /* - * check for Intel processor family 6 - */ if (!x86_match_cpu(rapl_cpu_match)) - return 0; + return -ENODEV; - /* check supported CPU */ switch (boot_cpu_data.x86_model) { case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ @@ -712,7 +711,7 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_cln_attr; break; case 63: /* Haswell-Server */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_SRV; rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; @@ -728,56 +727,41 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; case 87: /* Knights Landing */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_KNL; rapl_pmu_events_group.attrs = rapl_events_knl_attr; - + break; default: - /* unsupported */ - return 0; + return -ENODEV; } - ret = rapl_check_hw_unit(); + + ret = rapl_check_hw_unit(apply_quirk); if (ret) return ret; - /* run cpu model quirks */ - for (quirk = rapl_quirks; quirk; quirk = quirk->next) - quirk->func(); - cpu_notifier_register_begin(); + ret = init_rapl_pmus(); + if (ret) + return ret; - for_each_online_cpu(cpu) { - ret = rapl_cpu_prepare(cpu); - if (ret) - goto out; - rapl_cpu_init(cpu); - } + cpu_notifier_register_begin(); - __perf_cpu_notifier(rapl_cpu_notifier); + ret = rapl_prepare_cpus(); + if (ret) + goto out; - ret = perf_pmu_register(&rapl_pmu_class, "power", -1); - if (WARN_ON(ret)) { - pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); - cpu_notifier_register_done(); - return -1; - } + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + if (ret) + goto out; - pmu = __this_cpu_read(rapl_pmu); + __perf_cpu_notifier(rapl_cpu_notifier); + cpu_notifier_register_done(); + rapl_advertise(); + return 0; - pr_info("RAPL PMU detected," - " API unit is 2^-32 Joules," - " %d fixed counters" - " %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), - ktime_to_ms(pmu->timer_interval)); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); - } - } out: + pr_warn("Initialization failed (%d), disabled\n", ret); + cleanup_rapl_pmus(); cpu_notifier_register_done(); - - return 0; + return ret; } device_initcall(rapl_pmu_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c index 3bf41d4..7012d18 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,4 @@ -#include "perf_event_intel_uncore.h" +#include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; @@ -9,9 +9,9 @@ struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); -struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +struct pci_extra_dev *uncore_extra_pci_dev; +static int max_packages; -static DEFINE_RAW_SPINLOCK(uncore_box_lock); /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -21,7 +21,7 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); -int uncore_pcibus_to_physid(struct pci_bus *bus) +static int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -38,6 +38,16 @@ int uncore_pcibus_to_physid(struct pci_bus *bus) return phys_id; } +static void uncore_free_pcibus_map(void) +{ + struct pci2phy_map *map, *tmp; + + list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { + list_del(&map->list); + kfree(map); + } +} + struct pci2phy_map *__find_pci2phy_map(int segment) { struct pci2phy_map *map, *alloc = NULL; @@ -82,43 +92,9 @@ ssize_t uncore_event_show(struct kobject *kobj, return sprintf(buf, "%s", event->config); } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) -{ - return container_of(event->pmu, struct intel_uncore_pmu, pmu); -} - struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - struct intel_uncore_box *box; - - box = *per_cpu_ptr(pmu->box, cpu); - if (box) - return box; - - raw_spin_lock(&uncore_box_lock); - /* Recheck in lock to handle races. */ - if (*per_cpu_ptr(pmu->box, cpu)) - goto out; - list_for_each_entry(box, &pmu->box_list, list) { - if (box->phys_id == topology_physical_package_id(cpu)) { - atomic_inc(&box->refcnt); - *per_cpu_ptr(pmu->box, cpu) = box; - break; - } - } -out: - raw_spin_unlock(&uncore_box_lock); - - return *per_cpu_ptr(pmu->box, cpu); -} - -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) -{ - /* - * perf core schedules event on the basis of cpu, uncore events are - * collected by one of the cpus inside a physical package. - */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); + return pmu->boxes[topology_logical_package_id(cpu)]; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -207,7 +183,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) return config; } -static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) +static void uncore_assign_hw_event(struct intel_uncore_box *box, + struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -302,24 +279,25 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, + int node) { + int i, size, numshared = type->num_shared_regs ; struct intel_uncore_box *box; - int i, size; - size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); + size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; - for (i = 0; i < type->num_shared_regs; i++) + for (i = 0; i < numshared; i++) raw_spin_lock_init(&box->shared_regs[i].lock); uncore_pmu_init_hrtimer(box); - atomic_set(&box->refcnt, 1); box->cpu = -1; - box->phys_id = -1; + box->pci_phys_id = -1; + box->pkgid = -1; /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; @@ -341,7 +319,8 @@ static bool is_uncore_event(struct perf_event *event) } static int -uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, + bool dogrp) { struct perf_event *event; int n, max_count; @@ -402,7 +381,8 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve return &type->unconstrainted; } -static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) +static void uncore_put_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) { if (box->pmu->type->ops->put_constraint) box->pmu->type->ops->put_constraint(box, event); @@ -582,7 +562,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) if (event == box->event_list[i]) { uncore_put_event_constraint(box, event); - while (++i < box->n_events) + for (++i; i < box->n_events; i++) box->event_list[i - 1] = box->event_list[i]; --box->n_events; @@ -676,6 +656,7 @@ static int uncore_pmu_event_init(struct perf_event *event) if (!box || box->cpu < 0) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -760,64 +741,110 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) } ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); + if (!ret) + pmu->registered = true; return ret; } +static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) +{ + if (!pmu->registered) + return; + perf_pmu_unregister(&pmu->pmu); + pmu->registered = false; +} + +static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) +{ + struct intel_uncore_pmu *pmu = type->pmus; + struct intel_uncore_box *box; + int i, pkg; + + if (pmu) { + pkg = topology_physical_package_id(cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box) + uncore_box_exit(box); + } + } +} + +static void __init uncore_exit_boxes(void *dummy) +{ + struct intel_uncore_type **types; + + for (types = uncore_msr_uncores; *types; types++) + __uncore_exit_boxes(*types++, smp_processor_id()); +} + +static void uncore_free_boxes(struct intel_uncore_pmu *pmu) +{ + int pkg; + + for (pkg = 0; pkg < max_packages; pkg++) + kfree(pmu->boxes[pkg]); + kfree(pmu->boxes); +} + static void __init uncore_type_exit(struct intel_uncore_type *type) { + struct intel_uncore_pmu *pmu = type->pmus; int i; - for (i = 0; i < type->num_boxes; i++) - free_percpu(type->pmus[i].box); - kfree(type->pmus); - type->pmus = NULL; + if (pmu) { + for (i = 0; i < type->num_boxes; i++, pmu++) { + uncore_pmu_unregister(pmu); + uncore_free_boxes(pmu); + } + kfree(type->pmus); + type->pmus = NULL; + } kfree(type->events_group); type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) { - int i; - for (i = 0; types[i]; i++) - uncore_type_exit(types[i]); + for (; *types; types++) + uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type) +static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) { struct intel_uncore_pmu *pmus; struct attribute_group *attr_group; struct attribute **attrs; + size_t size; int i, j; pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); if (!pmus) return -ENOMEM; - type->pmus = pmus; + size = max_packages * sizeof(struct intel_uncore_box *); + for (i = 0; i < type->num_boxes; i++) { + pmus[i].func_id = setid ? i : -1; + pmus[i].pmu_idx = i; + pmus[i].type = type; + pmus[i].boxes = kzalloc(size, GFP_KERNEL); + if (!pmus[i].boxes) + return -ENOMEM; + } + + type->pmus = pmus; type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 0, type->num_counters, 0, 0); - for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = -1; - pmus[i].pmu_idx = i; - pmus[i].type = type; - INIT_LIST_HEAD(&pmus[i].box_list); - pmus[i].box = alloc_percpu(struct intel_uncore_box *); - if (!pmus[i].box) - goto fail; - } - if (type->event_descs) { - i = 0; - while (type->event_descs[i].attr.attr.name) - i++; + for (i = 0; type->event_descs[i].attr.attr.name; i++); attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + sizeof(*attr_group), GFP_KERNEL); if (!attr_group) - goto fail; + return -ENOMEM; attrs = (struct attribute **)(attr_group + 1); attr_group->name = "events"; @@ -831,25 +858,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type) type->pmu_group = &uncore_pmu_attr_group; return 0; -fail: - uncore_type_exit(type); - return -ENOMEM; } -static int __init uncore_types_init(struct intel_uncore_type **types) +static int __init +uncore_types_init(struct intel_uncore_type **types, bool setid) { - int i, ret; + int ret; - for (i = 0; types[i]; i++) { - ret = uncore_type_init(types[i]); + for (; *types; types++) { + ret = uncore_type_init(*types, setid); if (ret) - goto fail; + return ret; } return 0; -fail: - while (--i >= 0) - uncore_type_exit(types[i]); - return ret; } /* @@ -857,28 +878,28 @@ fail: */ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - struct intel_uncore_type *type; - int phys_id; - bool first_box = false; + int phys_id, pkg, ret; phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; + pkg = topology_phys_to_logical_pkg(phys_id); + if (WARN_ON_ONCE(pkg < 0)) + return -EINVAL; + if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { int idx = UNCORE_PCI_DEV_IDX(id->driver_data); - uncore_extra_pci_dev[phys_id][idx] = pdev; + + uncore_extra_pci_dev[pkg].dev[idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, NUMA_NO_NODE); - if (!box) - return -ENOMEM; - /* * for performance monitoring unit with multiple boxes, * each box has a different function id. @@ -890,44 +911,60 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id * some device types. Hence PCI device idx would be 0 for all devices. * So increment pmu pointer to point to an unused array element. */ - if (boot_cpu_data.x86_model == 87) + if (boot_cpu_data.x86_model == 87) { while (pmu->func_id >= 0) pmu++; + } + + if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) + return -EINVAL; + + box = uncore_alloc_box(type, NUMA_NO_NODE); + if (!box) + return -ENOMEM; + if (pmu->func_id < 0) pmu->func_id = pdev->devfn; else WARN_ON_ONCE(pmu->func_id != pdev->devfn); - box->phys_id = phys_id; + atomic_inc(&box->refcnt); + box->pci_phys_id = phys_id; + box->pkgid = pkg; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); pci_set_drvdata(pdev, box); - raw_spin_lock(&uncore_box_lock); - if (list_empty(&pmu->box_list)) - first_box = true; - list_add_tail(&box->list, &pmu->box_list); - raw_spin_unlock(&uncore_box_lock); + pmu->boxes[pkg] = box; + if (atomic_inc_return(&pmu->activeboxes) > 1) + return 0; - if (first_box) - uncore_pmu_register(pmu); - return 0; + /* First active box registers the pmu */ + ret = uncore_pmu_register(pmu); + if (ret) { + pci_set_drvdata(pdev, NULL); + pmu->boxes[pkg] = NULL; + uncore_box_exit(box); + kfree(box); + } + return ret; } static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id; - bool last_box = false; + int i, phys_id, pkg; phys_id = uncore_pcibus_to_physid(pdev->bus); + pkg = topology_phys_to_logical_pkg(phys_id); + box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (uncore_extra_pci_dev[phys_id][i] == pdev) { - uncore_extra_pci_dev[phys_id][i] = NULL; + if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { + uncore_extra_pci_dev[pkg].dev[i] = NULL; break; } } @@ -936,33 +973,20 @@ static void uncore_pci_remove(struct pci_dev *pdev) } pmu = box->pmu; - if (WARN_ON_ONCE(phys_id != box->phys_id)) + if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) return; pci_set_drvdata(pdev, NULL); - - raw_spin_lock(&uncore_box_lock); - list_del(&box->list); - if (list_empty(&pmu->box_list)) - last_box = true; - raw_spin_unlock(&uncore_box_lock); - - for_each_possible_cpu(cpu) { - if (*per_cpu_ptr(pmu->box, cpu) == box) { - *per_cpu_ptr(pmu->box, cpu) = NULL; - atomic_dec(&box->refcnt); - } - } - - WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); + pmu->boxes[pkg] = NULL; + if (atomic_dec_return(&pmu->activeboxes) == 0) + uncore_pmu_unregister(pmu); + uncore_box_exit(box); kfree(box); - - if (last_box) - perf_pmu_unregister(&pmu->pmu); } static int __init uncore_pci_init(void) { + size_t size; int ret; switch (boot_cpu_data.x86_model) { @@ -999,25 +1023,40 @@ static int __init uncore_pci_init(void) ret = skl_uncore_pci_init(); break; default: - return 0; + return -ENODEV; } if (ret) return ret; - ret = uncore_types_init(uncore_pci_uncores); + size = max_packages * sizeof(struct pci_extra_dev); + uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); + if (!uncore_extra_pci_dev) { + ret = -ENOMEM; + goto err; + } + + ret = uncore_types_init(uncore_pci_uncores, false); if (ret) - return ret; + goto errtype; uncore_pci_driver->probe = uncore_pci_probe; uncore_pci_driver->remove = uncore_pci_remove; ret = pci_register_driver(uncore_pci_driver); - if (ret == 0) - pcidrv_registered = true; - else - uncore_types_exit(uncore_pci_uncores); + if (ret) + goto errtype; + + pcidrv_registered = true; + return 0; +errtype: + uncore_types_exit(uncore_pci_uncores); + kfree(uncore_extra_pci_dev); + uncore_extra_pci_dev = NULL; + uncore_free_pcibus_map(); +err: + uncore_pci_uncores = empty_uncore; return ret; } @@ -1027,173 +1066,139 @@ static void __init uncore_pci_exit(void) pcidrv_registered = false; pci_unregister_driver(uncore_pci_driver); uncore_types_exit(uncore_pci_uncores); - } -} - -/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ -static LIST_HEAD(boxes_to_free); - -static void uncore_kfree_boxes(void) -{ - struct intel_uncore_box *box; - - while (!list_empty(&boxes_to_free)) { - box = list_entry(boxes_to_free.next, - struct intel_uncore_box, list); - list_del(&box->list); - kfree(box); + kfree(uncore_extra_pci_dev); + uncore_free_pcibus_map(); } } static void uncore_cpu_dying(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - *per_cpu_ptr(pmu->box, cpu) = NULL; - if (box && atomic_dec_and_test(&box->refcnt)) - list_add(&box->list, &boxes_to_free); + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); } } } -static int uncore_cpu_starting(int cpu) +static void uncore_cpu_starting(int cpu, bool init) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box, *exist; - int i, j, k, phys_id; - - phys_id = topology_physical_package_id(cpu); - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - /* called by uncore_cpu_init? */ - if (box && box->phys_id >= 0) { - uncore_box_init(box); - continue; - } + struct intel_uncore_box *box; + int i, pkg, ncpus = 1; - for_each_online_cpu(k) { - exist = *per_cpu_ptr(pmu->box, k); - if (exist && exist->phys_id == phys_id) { - atomic_inc(&exist->refcnt); - *per_cpu_ptr(pmu->box, cpu) = exist; - if (box) { - list_add(&box->list, - &boxes_to_free); - box = NULL; - } - break; - } - } + if (init) { + /* + * On init we get the number of online cpus in the package + * and set refcount for all of them. + */ + ncpus = cpumask_weight(topology_core_cpumask(cpu)); + } - if (box) { - box->phys_id = phys_id; + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; + /* The first cpu on a package activates the box */ + if (atomic_add_return(ncpus, &box->refcnt) == ncpus) uncore_box_init(box); - } } } - return 0; } -static int uncore_cpu_prepare(int cpu, int phys_id) +static int uncore_cpu_prepare(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (pmu->func_id < 0) - pmu->func_id = j; - + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + /* First cpu of a package allocates the box */ box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; - box->pmu = pmu; - box->phys_id = phys_id; - *per_cpu_ptr(pmu->box, cpu) = box; + box->pkgid = pkg; + pmu->boxes[pkg] = box; } } return 0; } -static void -uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) +static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, + int new_cpu) { - struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; - int i, j; + int i, pkg; - for (i = 0; uncores[i]; i++) { - type = uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (old_cpu < 0) - box = uncore_pmu_to_box(pmu, new_cpu); - else - box = uncore_pmu_to_box(pmu, old_cpu); - if (!box) - continue; - - if (old_cpu < 0) { - WARN_ON_ONCE(box->cpu != -1); - box->cpu = new_cpu; - continue; - } + pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; - WARN_ON_ONCE(box->cpu != old_cpu); - if (new_cpu >= 0) { - uncore_pmu_cancel_hrtimer(box); - perf_pmu_migrate_context(&pmu->pmu, - old_cpu, new_cpu); - box->cpu = new_cpu; - } else { - box->cpu = -1; - } + if (old_cpu < 0) { + WARN_ON_ONCE(box->cpu != -1); + box->cpu = new_cpu; + continue; } + + WARN_ON_ONCE(box->cpu != old_cpu); + box->cpu = -1; + if (new_cpu < 0) + continue; + + uncore_pmu_cancel_hrtimer(box); + perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); + box->cpu = new_cpu; } } +static void uncore_change_context(struct intel_uncore_type **uncores, + int old_cpu, int new_cpu) +{ + for (; *uncores; uncores++) + uncore_change_type_ctx(*uncores, old_cpu, new_cpu); +} + static void uncore_event_exit_cpu(int cpu) { - int i, phys_id, target; + int target; - /* if exiting cpu is used for collecting uncore events */ + /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) return; - /* find a new cpu to collect uncore events */ - phys_id = topology_physical_package_id(cpu); - target = -1; - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } + /* Find a new cpu to collect uncore events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* migrate uncore events to the new cpu */ - if (target >= 0) + /* Migrate uncore events to the new target */ + if (target < nr_cpu_ids) cpumask_set_cpu(target, &uncore_cpu_mask); + else + target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); @@ -1201,13 +1206,15 @@ static void uncore_event_exit_cpu(int cpu) static void uncore_event_init_cpu(int cpu) { - int i, phys_id; + int target; - phys_id = topology_physical_package_id(cpu); - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } + /* + * Check if there is an online cpu in the package + * which collects uncore events already. + */ + target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; cpumask_set_cpu(cpu, &uncore_cpu_mask); @@ -1220,39 +1227,25 @@ static int uncore_cpu_notifier(struct notifier_block *self, { unsigned int cpu = (long)hcpu; - /* allocate/free data structure for uncore box */ switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - uncore_cpu_prepare(cpu, -1); - break; + return notifier_from_errno(uncore_cpu_prepare(cpu)); + case CPU_STARTING: - uncore_cpu_starting(cpu); + uncore_cpu_starting(cpu, false); + case CPU_DOWN_FAILED: + uncore_event_init_cpu(cpu); break; + case CPU_UP_CANCELED: case CPU_DYING: uncore_cpu_dying(cpu); break; - case CPU_ONLINE: - case CPU_DEAD: - uncore_kfree_boxes(); - break; - default: - break; - } - /* select the cpu that collects uncore events */ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_STARTING: - uncore_event_init_cpu(cpu); - break; case CPU_DOWN_PREPARE: uncore_event_exit_cpu(cpu); break; - default: - break; } - return NOTIFY_OK; } @@ -1265,9 +1258,29 @@ static struct notifier_block uncore_cpu_nb = { .priority = CPU_PRI_PERF + 1, }; -static void __init uncore_cpu_setup(void *dummy) +static int __init type_pmu_register(struct intel_uncore_type *type) { - uncore_cpu_starting(smp_processor_id()); + int i, ret; + + for (i = 0; i < type->num_boxes; i++) { + ret = uncore_pmu_register(&type->pmus[i]); + if (ret) + return ret; + } + return 0; +} + +static int __init uncore_msr_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_msr_uncores; + int ret; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + return ret; + } + return 0; } static int __init uncore_cpu_init(void) @@ -1311,71 +1324,61 @@ static int __init uncore_cpu_init(void) knl_uncore_cpu_init(); break; default: - return 0; + return -ENODEV; } - ret = uncore_types_init(uncore_msr_uncores); + ret = uncore_types_init(uncore_msr_uncores, true); if (ret) - return ret; + goto err; + ret = uncore_msr_pmus_register(); + if (ret) + goto err; return 0; +err: + uncore_types_exit(uncore_msr_uncores); + uncore_msr_uncores = empty_uncore; + return ret; } -static int __init uncore_pmus_register(void) +static void __init uncore_cpu_setup(void *dummy) { - struct intel_uncore_pmu *pmu; - struct intel_uncore_type *type; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - uncore_pmu_register(pmu); - } - } - - return 0; + uncore_cpu_starting(smp_processor_id(), true); } -static void __init uncore_cpumask_init(void) -{ - int cpu; - - /* - * ony invoke once from msr or pci init code - */ - if (!cpumask_empty(&uncore_cpu_mask)) - return; +/* Lazy to avoid allocation of a few bytes for the normal case */ +static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - cpu_notifier_register_begin(); +static int __init uncore_cpumask_init(bool msr) +{ + unsigned int cpu; for_each_online_cpu(cpu) { - int i, phys_id = topology_physical_package_id(cpu); + unsigned int pkg = topology_logical_package_id(cpu); + int ret; - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) { - phys_id = -1; - break; - } - } - if (phys_id < 0) + if (test_and_set_bit(pkg, packages)) continue; - - uncore_cpu_prepare(cpu, phys_id); + /* + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required. + */ + if (msr) { + ret = uncore_cpu_prepare(cpu); + if (ret) + return ret; + } uncore_event_init_cpu(cpu); + smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); } - on_each_cpu(uncore_cpu_setup, NULL, 1); - __register_cpu_notifier(&uncore_cpu_nb); - - cpu_notifier_register_done(); + return 0; } - static int __init intel_uncore_init(void) { - int ret; + int pret, cret, ret; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; @@ -1383,19 +1386,27 @@ static int __init intel_uncore_init(void) if (cpu_has_hypervisor) return -ENODEV; - ret = uncore_pci_init(); - if (ret) - goto fail; - ret = uncore_cpu_init(); - if (ret) { - uncore_pci_exit(); - goto fail; - } - uncore_cpumask_init(); + max_packages = topology_max_packages(); + + pret = uncore_pci_init(); + cret = uncore_cpu_init(); - uncore_pmus_register(); + if (cret && pret) + return -ENODEV; + + cpu_notifier_register_begin(); + ret = uncore_cpumask_init(!cret); + if (ret) + goto err; + cpu_notifier_register_done(); return 0; -fail: + +err: + /* Undo box->init_box() */ + on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); + uncore_types_exit(uncore_msr_uncores); + uncore_pci_exit(); + cpu_notifier_register_done(); return ret; } device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h index a7086b8..79766b9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -1,8 +1,10 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/pci.h> +#include <asm/apicdef.h> + #include <linux/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" #define UNCORE_PMU_NAME_LEN 32 #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) @@ -19,11 +21,12 @@ #define UNCORE_EXTRA_PCI_DEV 0xff #define UNCORE_EXTRA_PCI_DEV_MAX 3 -/* support up to 8 sockets */ -#define UNCORE_SOCKET_MAX 8 - #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +struct pci_extra_dev { + struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; +}; + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -61,6 +64,7 @@ struct intel_uncore_type { struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); + void (*exit_box)(struct intel_uncore_box *); void (*disable_box)(struct intel_uncore_box *); void (*enable_box)(struct intel_uncore_box *); void (*disable_event)(struct intel_uncore_box *, struct perf_event *); @@ -73,13 +77,14 @@ struct intel_uncore_ops { }; struct intel_uncore_pmu { - struct pmu pmu; - char name[UNCORE_PMU_NAME_LEN]; - int pmu_idx; - int func_id; - struct intel_uncore_type *type; - struct intel_uncore_box ** __percpu box; - struct list_head box_list; + struct pmu pmu; + char name[UNCORE_PMU_NAME_LEN]; + int pmu_idx; + int func_id; + bool registered; + atomic_t activeboxes; + struct intel_uncore_type *type; + struct intel_uncore_box **boxes; }; struct intel_uncore_extra_reg { @@ -89,7 +94,8 @@ struct intel_uncore_extra_reg { }; struct intel_uncore_box { - int phys_id; + int pci_phys_id; + int pkgid; int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ @@ -123,7 +129,6 @@ struct pci2phy_map { int pbus_to_physid[256]; }; -int uncore_pcibus_to_physid(struct pci_bus *bus); struct pci2phy_map *__find_pci2phy_map(int segment); ssize_t uncore_event_show(struct kobject *kobj, @@ -305,14 +310,30 @@ static inline void uncore_box_init(struct intel_uncore_box *box) } } +static inline void uncore_box_exit(struct intel_uncore_box *box) +{ + if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->exit_box) + box->pmu->type->ops->exit_box(box); + } +} + static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { - return (box->phys_id < 0); + return (box->pkgid < 0); +} + +static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + return event->pmu_private; } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event); struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); @@ -328,7 +349,7 @@ extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; -extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; /* perf_event_intel_uncore_snb.c */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 2749965..cda5693 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,5 +1,5 @@ /* Nehalem-EX/Westmere-EX uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* NHM-EX event control */ #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff @@ -201,6 +201,11 @@ static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); } +static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); +} + static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); @@ -250,6 +255,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p #define NHMEX_UNCORE_OPS_COMMON_INIT() \ .init_box = nhmex_uncore_msr_init_box, \ + .exit_box = nhmex_uncore_msr_exit_box, \ .disable_box = nhmex_uncore_msr_disable_box, \ .enable_box = nhmex_uncore_msr_enable_box, \ .disable_event = nhmex_uncore_msr_disable_event, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 2bd030d..96531d2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,5 @@ /* Nehalem/SandBridge/Haswell uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -95,6 +95,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); +} + static struct uncore_event_desc snb_uncore_events[] = { INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), { /* end: all zeroes */ }, @@ -116,6 +122,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, .read_counter = uncore_msr_read_counter, @@ -231,6 +238,11 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } +static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) +{ + iounmap(box->io_addr); +} + static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -301,6 +313,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -458,6 +471,7 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, + .exit_box = snb_uncore_imc_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 33acb88..93f6bd9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,5 @@ /* SandyBridge-EP/IvyTown uncore support */ -#include "perf_event_intel_uncore.h" - +#include "uncore.h" /* SNB-EP Box level control */ #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) @@ -987,7 +986,9 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx]; + int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); + struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; + if (filter_pdev) { pci_write_config_dword(filter_pdev, reg1->reg, (u32)reg1->config); @@ -2521,14 +2522,16 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { void hswep_uncore_cpu_init(void) { + int pkg = topology_phys_to_logical_pkg(0); + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { + if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { u32 capid4; - pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], + pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], 0x94, &capid4); if (((capid4 >> 6) & 0x3) == 0) hswep_uncore_sbox.num_boxes = 2; @@ -2875,11 +2878,13 @@ static struct intel_uncore_type bdx_uncore_sbox = { .format_group = &hswep_uncore_sbox_format_group, }; +#define BDX_MSR_UNCORE_SBOX 3 + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, - &bdx_uncore_sbox, &hswep_uncore_pcu, + &bdx_uncore_sbox, NULL, }; @@ -2888,6 +2893,10 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + + /* BDX-DE doesn't have SBOX */ + if (boot_cpu_data.x86_model == 86) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c index ec863b9..ec863b9 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/events/msr.c diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h index 7bb61e3..68155ca 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -586,6 +586,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1; int pebs_record_size; + int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); @@ -860,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[]; extern struct event_constraint intel_hsw_pebs_event_constraints[]; +extern struct event_constraint intel_bdw_pebs_event_constraints[]; + extern struct event_constraint intel_skl_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); @@ -904,6 +907,8 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_pebs_data_source_nhm(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7bfc85b..99afb66 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection" /* - * This must be included *after* the definition of ALTERNATIVE due to - * <asm/arch_hweight.h> - */ -#include <asm/cpufeature.h> - -/* * Alternative instructions for different CPU types or capabilities. * * This allows to use optimized instructions even on generic binary diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 3c56ef1..5e828da 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -27,15 +27,23 @@ struct amd_l3_cache { }; struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - bool interrupt_capable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; + unsigned int block; /* Number within bank */ + unsigned int bank; /* MCA bank the block belongs to */ + unsigned int cpu; /* CPU which controls MCA bank */ + u32 address; /* MSR address for the block */ + u16 interrupt_enable; /* Enable/Disable APIC interrupt */ + bool interrupt_capable; /* Bank can generate an interrupt. */ + + u16 threshold_limit; /* + * Value upon which threshold + * interrupt is generated. + */ + + struct kobject kobj; /* sysfs object */ + struct list_head miscj; /* + * List of threshold blocks + * within a bank. + */ }; struct threshold_bank { diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c80f6b6..0899cfc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -6,7 +6,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/processor.h> #include <asm/apicdef.h> #include <linux/atomic.h> #include <asm/fixmap.h> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 259a7c1..02e799f 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_HWEIGHT_H #define _ASM_X86_HWEIGHT_H +#include <asm/cpufeatures.h> + #ifdef CONFIG_64BIT /* popcnt %edi, %eax -- redundant REX prefix for alignment */ #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 189679a..f5063b6 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -44,19 +44,22 @@ /* Exception table entry */ #ifdef __ASSEMBLY__ -# define _ASM_EXTABLE(from,to) \ +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ .pushsection "__ex_table","a" ; \ - .balign 8 ; \ + .balign 4 ; \ .long (from) - . ; \ .long (to) - . ; \ + .long (handler) - . ; \ .popsection -# define _ASM_EXTABLE_EX(from,to) \ - .pushsection "__ex_table","a" ; \ - .balign 8 ; \ - .long (from) - . ; \ - .long (to) - . + 0x7ffffff0 ; \ - .popsection +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +# define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ @@ -89,19 +92,24 @@ .endm #else -# define _ASM_EXTABLE(from,to) \ +# define _EXPAND_EXTABLE_HANDLE(x) #x +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ + " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ + " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ " .popsection\n" -# define _ASM_EXTABLE_EX(from,to) \ - " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ - " .long (" #from ") - .\n" \ - " .long (" #to ") - . + 0x7ffffff0\n" \ - " .popsection\n" +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +# define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) + /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index a584e1c..bfb28ca 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -6,18 +6,17 @@ /* * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking + * And yes, this might be required on UP too when we're talking * to devices. */ #ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") #else #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index cfe3b95..7766d1c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void change_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" @@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } @@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } @@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } @@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -static inline int variable_test_bit(long nr, volatile const unsigned long *addr) +static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) { int oldbit; @@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); * * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned long __ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) @@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word) * * Undefined if no zero exists, so code should check against ~0UL first. */ -static inline unsigned long ffz(unsigned long word) +static __always_inline unsigned long ffz(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) @@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word) * * Undefined if no set bit exists, so code should check against 0 first. */ -static inline unsigned long __fls(unsigned long word) +static __always_inline unsigned long __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) @@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word) * set bit if value is nonzero. The first (least significant) bit * is at position 1. */ -static inline int ffs(int x) +static __always_inline int ffs(int x) { int r; @@ -434,7 +434,7 @@ static inline int ffs(int x) * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ -static inline int fls(int x) +static __always_inline int fls(int x) { int r; diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e63aa38..61518cf 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size); #define mmio_flush_range(addr, size) clflush_cache_range(addr, size) -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#else -static inline void set_kernel_text_rw(void) { } -static inline void set_kernel_text_ro(void) { } -#endif #ifdef CONFIG_DEBUG_RODATA_TEST int rodata_test(void); diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index eda81dc..d194266 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,10 +3,11 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ -#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ +#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ +#define VCLOCK_MAX 3 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index ad19841..9733361 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -2,6 +2,7 @@ #define ASM_X86_CMPXCHG_H #include <linux/compiler.h> +#include <asm/cpufeatures.h> #include <asm/alternative.h> /* Provides LOCK_PREFIX */ /* diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ad8c94..68e4e82 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -1,288 +1,7 @@ -/* - * Defines x86 CPU feature bits - */ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#include <asm/required-features.h> -#endif - -#ifndef _ASM_X86_DISABLED_FEATURES_H -#include <asm/disabled-features.h> -#endif - -#define NCAPINTS 16 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ - -/* - * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. - */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ - /* (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ - -/* Other features, Linux-defined mapping, word 3 */ -/* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ -/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ -/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ - -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ - -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ - -/* - * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc, word 7. - * - * Reuse free bits when adding new feature flags! - */ - -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ - -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ - -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ - -/* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ - -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ - - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ - -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ - -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ - -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ - -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ - -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ - -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ - -/* - * BUG word(s) - */ -#define X86_BUG(x) (NCAPINTS*32 + (x)) - -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#include <asm/processor.h> #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; * is not relevant. */ #define cpu_feature_enabled(bit) \ - (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ - cpu_has(&boot_cpu_data, bit)) + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) @@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) /* - * Do not add any more of those clumsy macros - use static_cpu_has_safe() for + * Do not add any more of those clumsy macros - use static_cpu_has() for * fast paths and boot_cpu_has() otherwise! */ -#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) -extern void warn_pre_alternatives(void); -extern bool __static_cpu_has_safe(u16 bit); - +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). - * These are only valid after alternatives have run, but will statically - * patch the target code for additional performance. + * These will statically patch the target code for additional + * performance. */ -static __always_inline __pure bool __static_cpu_has(u16 bit) -{ -#ifdef CC_HAVE_ASM_GOTO - -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS - - /* - * Catch too early usage of this before alternatives - * have run. - */ - asm_volatile_goto("1: jmp %l[t_warn]\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 0\n" /* no replacement */ - " .word %P0\n" /* 1: do replace */ - " .byte 2b - 1b\n" /* source len */ - " .byte 0\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - /* skipping size check since replacement size = 0 */ - : : "i" (X86_FEATURE_ALWAYS) : : t_warn); - -#endif - - asm_volatile_goto("1: jmp %l[t_no]\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 0\n" /* no replacement */ - " .word %P0\n" /* feature bit */ - " .byte 2b - 1b\n" /* source len */ - " .byte 0\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - /* skipping size check since replacement size = 0 */ - : : "i" (bit) : : t_no); - return true; - t_no: - return false; - -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS - t_warn: - warn_pre_alternatives(); - return false; -#endif - -#else /* CC_HAVE_ASM_GOTO */ - - u8 flag; - /* Open-coded due to __stringify() in ALTERNATIVE() */ - asm volatile("1: movb $0,%0\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 3f - .\n" - " .word %P1\n" /* feature bit */ - " .byte 2b - 1b\n" /* source len */ - " .byte 4f - 3f\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "3: movb $1,%0\n" - "4:\n" - ".previous\n" - : "=qm" (flag) : "i" (bit)); - return flag; - -#endif /* CC_HAVE_ASM_GOTO */ -} - -#define static_cpu_has(bit) \ -( \ - __builtin_constant_p(boot_cpu_has(bit)) ? \ - boot_cpu_has(bit) : \ - __builtin_constant_p(bit) ? \ - __static_cpu_has(bit) : \ - boot_cpu_has(bit) \ -) - -static __always_inline __pure bool _static_cpu_has_safe(u16 bit) +static __always_inline __pure bool _static_cpu_has(u16 bit) { -#ifdef CC_HAVE_ASM_GOTO - asm_volatile_goto("1: jmp %l[t_dynamic]\n" + asm_volatile_goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" @@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) " .byte 0\n" /* repl len */ " .byte 0\n" /* pad len */ ".previous\n" - : : "i" (bit), "i" (X86_FEATURE_ALWAYS) - : : t_dynamic, t_no); + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" + : : "i" (bit), "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); + t_yes: return true; t_no: return false; - t_dynamic: - return __static_cpu_has_safe(bit); -#else - u8 flag; - /* Open-coded due to __stringify() in ALTERNATIVE() */ - asm volatile("1: movb $2,%0\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 3f - .\n" /* repl offset */ - " .word %P2\n" /* always replace */ - " .byte 2b - 1b\n" /* source len */ - " .byte 4f - 3f\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "3: movb $0,%0\n" - "4:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 5f - .\n" /* repl offset */ - " .word %P1\n" /* feature bit */ - " .byte 4b - 3b\n" /* src len */ - " .byte 6f - 5f\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "5: movb $1,%0\n" - "6:\n" - ".previous\n" - : "=qm" (flag) - : "i" (bit), "i" (X86_FEATURE_ALWAYS)); - return (flag == 2 ? __static_cpu_has_safe(bit) : flag); -#endif /* CC_HAVE_ASM_GOTO */ } -#define static_cpu_has_safe(bit) \ +#define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ - _static_cpu_has_safe(bit) \ + _static_cpu_has(bit) \ ) #else /* - * gcc 3.x is too stupid to do the static test; fall back to dynamic. + * Fall back to dynamic for gcc versions which don't support asm goto. Should be + * a minority now anyway. */ #define static_cpu_has(bit) boot_cpu_has(bit) -#define static_cpu_has_safe(bit) boot_cpu_has(bit) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) @@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) #define static_cpu_has_bug(bit) static_cpu_has((bit)) -#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h new file mode 100644 index 0000000..074b760 --- /dev/null +++ b/arch/x86/include/asm/cpufeatures.h @@ -0,0 +1,300 @@ +#ifndef _ASM_X86_CPUFEATURES_H +#define _ASM_X86_CPUFEATURES_H + +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#include <asm/required-features.h> +#endif + +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include <asm/disabled-features.h> +#endif + +/* + * Defines x86 CPU feature bits + */ +#define NCAPINTS 16 /* N 32-bit words worth of info */ +#define NBUGINTS 1 /* N 32-bit bug flags */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! + */ + +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ + +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ + +/* Virtualization flags: Linux defined, word 8 */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ + + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ + +/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ + +/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ + +/* + * BUG word(s) + */ +#define X86_BUG(x) (NCAPINTS*32 + (x)) + +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ + +#ifdef CONFIG_X86_32 +/* + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional + * to avoid confusion. + */ +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#endif + +#endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 278441f..eb5deb4 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -98,4 +98,27 @@ struct desc_ptr { #endif /* !__ASSEMBLY__ */ +/* Access rights as returned by LAR */ +#define AR_TYPE_RODATA (0 * (1 << 9)) +#define AR_TYPE_RWDATA (1 * (1 << 9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9)) +#define AR_TYPE_XOCODE (4 * (1 << 9)) +#define AR_TYPE_XRCODE (5 * (1 << 9)) +#define AR_TYPE_XOCODE_CONF (6 * (1 << 9)) +#define AR_TYPE_XRCODE_CONF (7 * (1 << 9)) +#define AR_TYPE_MASK (7 * (1 << 9)) + +#define AR_DPL0 (0 * (1 << 13)) +#define AR_DPL3 (3 * (1 << 13)) +#define AR_DPL_MASK (3 * (1 << 13)) + +#define AR_A (1 << 8) /* "Accessed" */ +#define AR_S (1 << 12) /* If clear, "System" segment */ +#define AR_P (1 << 15) /* "Present" */ +#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */ +#define AR_L (1 << 21) /* "Long mode" for code segments */ +#define AR_DB (1 << 22) /* D/B, effect depends on type */ +#define AR_G (1 << 23) /* "Granularity" (limit in pages) */ + #endif /* _ASM_X86_DESC_DEFS_H */ diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 535192f..3c69fed 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) /* Use early IO mappings for DMI because it's initialized early */ #define dmi_early_remap early_ioremap #define dmi_early_unmap early_iounmap -#define dmi_remap ioremap +#define dmi_remap ioremap_cache #define dmi_unmap iounmap #endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78..08b1f2f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -25,6 +25,8 @@ #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" +#define MAX_CMDLINE_ADDRESS UINT_MAX + #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1514753..15340e3 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -256,7 +256,7 @@ extern int force_personality32; instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ -#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) +#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6d7d0e5..8554f96 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; +#define kmap_prot PAGE_KERNEL extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0fd440d..a212434 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -17,6 +17,7 @@ #include <asm/user.h> #include <asm/fpu/api.h> #include <asm/fpu/xstate.h> +#include <asm/cpufeature.h> /* * High level FPU state handling functions: @@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void); */ static __always_inline __pure bool use_eager_fpu(void) { - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); + return static_cpu_has(X86_FEATURE_EAGER_FPU); } static __always_inline __pure bool use_xsaveopt(void) { - return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); + return static_cpu_has(X86_FEATURE_XSAVEOPT); } static __always_inline __pure bool use_xsave(void) { - return static_cpu_has_safe(X86_FEATURE_XSAVE); + return static_cpu_has(X86_FEATURE_XSAVE); } static __always_inline __pure bool use_fxsr(void) { - return static_cpu_has_safe(X86_FEATURE_FXSR); + return static_cpu_has(X86_FEATURE_FXSR); } /* @@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) WARN_ON(system_state != SYSTEM_BOOTING); - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + if (static_cpu_has(X86_FEATURE_XSAVES)) XSTATE_OP(XSAVES, xstate, lmask, hmask, err); else XSTATE_OP(XSAVE, xstate, lmask, hmask, err); @@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) WARN_ON(system_state != SYSTEM_BOOTING); - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + if (static_cpu_has(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); @@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) * pending. Clear the x87 state here by setting it to fixed values. * "m" is a random variable that should be in L1. */ - if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" "emms\n\t" @@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = new_fpu->fpstate_active && + fpu.preload = static_cpu_has(X86_FEATURE_FPU) && + new_fpu->fpstate_active && (use_eager_fpu() || new_fpu->counter > 5); if (old_fpu->fpregs_active) { diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index af30fde..f23cd8c 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,16 +20,15 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE) - -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) + /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 793179c..6e4d170 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -1,23 +1,44 @@ -#ifdef __ASSEMBLY__ +#ifndef _ASM_X86_FRAME_H +#define _ASM_X86_FRAME_H #include <asm/asm.h> -/* The annotation hides the frame from the unwinder and makes it look - like a ordinary ebp save/restore. This avoids some special cases for - frame pointer later */ +/* + * These are stack frame creation macros. They should be used by every + * callable non-leaf asm function to make kernel stack traces more reliable. + */ + #ifdef CONFIG_FRAME_POINTER - .macro FRAME - __ASM_SIZE(push,) %__ASM_REG(bp) - __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) - .endm - .macro ENDFRAME - __ASM_SIZE(pop,) %__ASM_REG(bp) - .endm -#else - .macro FRAME - .endm - .macro ENDFRAME - .endm -#endif - -#endif /* __ASSEMBLY__ */ + +#ifdef __ASSEMBLY__ + +.macro FRAME_BEGIN + push %_ASM_BP + _ASM_MOV %_ASM_SP, %_ASM_BP +.endm + +.macro FRAME_END + pop %_ASM_BP +.endm + +#else /* !__ASSEMBLY__ */ + +#define FRAME_BEGIN \ + "push %" _ASM_BP "\n" \ + _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n" + +#define FRAME_END "pop %" _ASM_BP "\n" + +#endif /* __ASSEMBLY__ */ + +#define FRAME_OFFSET __ASM_SEL(4, 8) + +#else /* !CONFIG_FRAME_POINTER */ + +#define FRAME_BEGIN +#define FRAME_END +#define FRAME_OFFSET 0 + +#endif /* CONFIG_FRAME_POINTER */ + +#endif /* _ASM_X86_FRAME_H */ diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/x86/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h index cd2ce40..ebea2c9 100644 --- a/arch/x86/include/asm/imr.h +++ b/arch/x86/include/asm/imr.h @@ -53,7 +53,7 @@ #define IMR_MASK (IMR_ALIGN - 1) int imr_add_range(phys_addr_t base, size_t size, - unsigned int rmask, unsigned int wmask, bool lock); + unsigned int rmask, unsigned int wmask); int imr_remove_range(phys_addr_t base, size_t size); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index cfc9a0d..a4fe16e 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -static inline void -__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - __xapic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector, dest); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); -} +void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void - __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - __xapic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - native_apic_mem_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector, dest); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); -} +void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); extern void default_send_IPI_single(int cpu, int vector); extern void default_send_IPI_single_phys(int cpu, int vector); diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 78162f8..d0afb05 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -1,7 +1,7 @@ #ifndef _ASM_IRQ_WORK_H #define _ASM_IRQ_WORK_H -#include <asm/processor.h> +#include <asm/cpufeature.h> static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 44adbb8..01c8b50 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -32,6 +32,7 @@ #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> +#include <asm/kvm_page_track.h> #define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 @@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache { void *objects[KVM_NR_MEM_OBJS]; }; +/* + * the pages used as guest page table on soft mmu are tracked by + * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used + * by indirect shadow page can not be more than 15 bits. + * + * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, + * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. + */ union kvm_mmu_page_role { unsigned word; struct { @@ -276,7 +285,7 @@ struct kvm_mmu_page { #endif /* Number of writes since the last time traversal visited this page. */ - int write_flooding_count; + atomic_t write_flooding_count; }; struct kvm_pio_request { @@ -338,12 +347,8 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* - * Bitmap: bit set = last pte in walk - * index[0:1]: level (zero-based) - * index[2]: pte.ps - */ - u8 last_pte_bitmap; + /* Can have large pages at levels 2..last_nonleaf_level-1. */ + u8 last_nonleaf_level; bool nx; @@ -498,7 +503,6 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; - bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -644,12 +648,13 @@ struct kvm_vcpu_arch { }; struct kvm_lpage_info { - int write_count; + int disallow_lpage; }; struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; + unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; }; /* @@ -694,6 +699,8 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_page_track_notifier_head track_notifier_head; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; @@ -754,6 +761,8 @@ struct kvm_arch { bool irqchip_split; u8 nr_reserved_ioapic_pins; + + bool disabled_lapic_found; }; struct kvm_vm_stat { @@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); void kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask); @@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h new file mode 100644 index 0000000..c2b8d24 --- /dev/null +++ b/arch/x86/include/asm/kvm_page_track.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_KVM_PAGE_TRACK_H +#define _ASM_X86_KVM_PAGE_TRACK_H + +enum kvm_page_track_mode { + KVM_PAGE_TRACK_WRITE, + KVM_PAGE_TRACK_MAX, +}; + +/* + * The notifier represented by @kvm_page_track_notifier_node is linked into + * the head which will be notified when guest is triggering the track event. + * + * Write access on the head is protected by kvm->mmu_lock, read access + * is protected by track_srcu. + */ +struct kvm_page_track_notifier_head { + struct srcu_struct track_srcu; + struct hlist_head track_notifier_list; +}; + +struct kvm_page_track_notifier_node { + struct hlist_node node; + + /* + * It is called when guest is writing the write-tracked page + * and write emulation is finished at that time. + * + * @vcpu: the vcpu where the write access happened. + * @gpa: the physical address written by guest. + * @new: the data was written to the address. + * @bytes: the written length. + */ + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +}; + +void kvm_page_track_init(struct kvm *kvm); + +void kvm_page_track_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages); + +void kvm_slot_page_track_add_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +void kvm_slot_page_track_remove_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); +bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, + enum kvm_page_track_mode mode); + +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +#endif diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c1adf33..bc62e7c 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -#ifdef CONFIG_DEBUG_RODATA #define KVM_HYPERCALL \ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) -#else -/* On AMD processors, vmcall will generate a trap that we will - * then rewrite to the appropriate instruction. - */ -#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index e795f52..7e68f95 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/ftrace.h> -#ifdef CONFIG_LIVEPATCH static inline int klp_check_compiler_support(void) { #ifndef CC_USING_FENTRY @@ -40,8 +39,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; } -#else -#error Include linux/livepatch.h, not asm/livepatch.h -#endif #endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 2ea4527..92b6f65 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,8 +40,20 @@ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ /* AMD-specific bits */ -#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ + +/* + * McaX field if set indicates a given bank supports MCA extensions: + * - Deferred error interrupt type is specifiable by bank. + * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, + * But should not be used to determine MSR numbers. + * - TCC bit is present in MCx_STATUS. + */ +#define MCI_CONFIG_MCAX 0x1 +#define MCI_IPID_MCATYPE 0xFFFF0000 +#define MCI_IPID_HWID 0xFFF /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is @@ -91,6 +103,16 @@ #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" +/* AMD Scalable MCA */ +#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 +#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 +#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) + /* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external @@ -113,6 +135,7 @@ struct mca_config { bool ignore_ce; bool disabled; bool ser; + bool recovery; bool bios_cmci_threshold; u8 banks; s8 bootlog; @@ -287,4 +310,49 @@ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +/* + * Enumerate new IP types and HWID values in AMD processors which support + * Scalable MCA. + */ +#ifdef CONFIG_X86_MCE_AMD +enum amd_ip_types { + SMCA_F17H_CORE = 0, /* Core errors */ + SMCA_DF, /* Data Fabric */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_AMD_IP_TYPES +}; + +struct amd_hwid { + const char *name; + unsigned int hwid; +}; + +extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; + +enum amd_core_mca_blocks { + SMCA_LS = 0, /* Load Store */ + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 cache */ + SMCA_DE, /* Decoder unit */ + RES, /* Reserved */ + SMCA_EX, /* Execution unit */ + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 cache */ + N_CORE_MCA_BLOCKS +}; + +extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; + +enum amd_df_mca_blocks { + SMCA_CS = 0, /* Coherent Slave */ + SMCA_PIE, /* Power management, Interrupts, etc */ + N_DF_BLOCKS +}; + +extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +#endif + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 1e1b07a..9d3a96c 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -3,6 +3,7 @@ #include <asm/cpu.h> #include <linux/earlycpio.h> +#include <linux/initrd.h> #define native_rdmsr(msr, val1, val2) \ do { \ @@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { } static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif + +static inline unsigned long get_initrd_start(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + return initrd_start; +#else + return 0; +#endif +} + +static inline unsigned long get_initrd_start_addr(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + + return (unsigned long)__pa_nodebug(*initrd_start_p); +#else + return get_initrd_start(); +#endif +#else /* CONFIG_BLK_DEV_INITRD */ + return 0; +#endif +} + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 8559b01..603417f 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -40,7 +40,6 @@ struct extended_sigtable { #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.datasize ? \ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 55234d5..1ea0bae 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -19,7 +19,8 @@ typedef struct { #endif struct mutex lock; - void __user *vdso; + void __user *vdso; /* vdso base address */ + const struct vdso_image *vdso_image; /* vdso image in use */ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ } mm_context_t; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b05402e..2da46ac 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1,7 +1,12 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H -/* CPU model specific register (MSR) numbers */ +/* + * CPU model specific register (MSR) numbers. + * + * Do not add new entries to this file unless the definitions are shared + * between multiple compilation units. + */ /* x86-64 specific MSRs */ #define MSR_EFER 0xc0000080 /* extended feature register */ @@ -230,10 +235,10 @@ #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) /* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (x & 0xff) -#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) -#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) -#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ #define HWP_MIN_PERF(x) (x & 0xff) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index c70689b..0deeb2d 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -3,6 +3,8 @@ #include <linux/sched.h> +#include <asm/cpufeature.h> + #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4625943..9ab7507 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -20,6 +20,9 @@ struct pci_sysdata { #ifdef CONFIG_X86_64 void *iommu; /* IOMMU private data */ #endif +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN + void *fwnode; /* IRQ domain for MSI assignment */ +#endif }; extern int pci_routeirq; @@ -32,6 +35,7 @@ extern int noioapicreroute; static inline int pci_domain_nr(struct pci_bus *bus) { struct pci_sysdata *sd = bus->sysdata; + return sd->domain; } @@ -41,6 +45,17 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + + return sd->fwnode; +} + +#define pci_root_bus_fwnode _pci_root_bus_fwnode +#endif + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -105,9 +120,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #include <asm/pci_64.h> #endif -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - /* generic pci stuff */ #include <asm-generic/pci.h> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 7bcb861..5a2ed3e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -165,6 +165,7 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_ASIF BIT_ULL(60) #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* * IBS cpuid feature detection diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1e..bf8b35d 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) arch_wb_cache_pmem(addr, size); } +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + clflush_cache_range((void __force *) addr, size); +} + static inline bool __arch_has_wmb_pmem(void) { /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 20c11d1..983738a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -13,7 +13,7 @@ struct vm86; #include <asm/types.h> #include <uapi/asm/sigcontext.h> #include <asm/current.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> @@ -24,7 +24,6 @@ struct vm86; #include <asm/fpu/types.h> #include <linux/personality.h> -#include <linux/cpumask.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/math64.h> @@ -129,6 +128,8 @@ struct cpuinfo_x86 { u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; + /* Logical processor id: */ + u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; /* Compute unit id */ @@ -298,10 +299,13 @@ struct tss_struct { */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +#ifdef CONFIG_X86_32 /* - * Space for the temporary SYSENTER stack: + * Space for the temporary SYSENTER stack. */ + unsigned long SYSENTER_stack_canary; unsigned long SYSENTER_stack[64]; +#endif } ____cacheline_aligned; diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a4a7728..9b9b30b1 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -7,12 +7,23 @@ void syscall_init(void); +#ifdef CONFIG_X86_64 void entry_SYSCALL_64(void); -void entry_SYSCALL_compat(void); +#endif + +#ifdef CONFIG_X86_32 void entry_INT80_32(void); -void entry_INT80_compat(void); void entry_SYSENTER_32(void); +void __begin_SYSENTER_singlestep_region(void); +void __end_SYSENTER_singlestep_region(void); +#endif + +#ifdef CONFIG_IA32_EMULATION void entry_SYSENTER_compat(void); +void __end_entry_SYSENTER_compat(void); +void entry_SYSCALL_compat(void); +void entry_INT80_compat(void); +#endif void x86_configure_nx(void); void x86_report_nx(void); diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 0a52424..13b6cdd 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,7 +7,7 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 89db467..452c88b 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -13,7 +13,6 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index ba665eb..db33330 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -15,7 +15,7 @@ #include <linux/stringify.h> #include <asm/nops.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> /* "Raw" instruction opcodes */ #define __ASM_CLAC .byte 0x0f,0x01,0xca diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index dfcf072..20a3de5 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -16,7 +16,6 @@ #endif #include <asm/thread_info.h> #include <asm/cpumask.h> -#include <asm/cpufeature.h> extern int smp_num_siblings; extern unsigned int num_processors; diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index ff8b9a1..ca6ba36 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +/** + * memcpy_mcsafe - copy memory with indication if a machine check happened + * + * @dst: destination address + * @src: source address + * @cnt: number of bytes to copy + * + * Low level memory copy function that catches machine checks + * + * Return true for success, false for fail + */ +bool memcpy_mcsafe(void *dst, const void *src, size_t cnt); + #endif /* __KERNEL__ */ #endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c7b5510..8286669 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -49,7 +49,7 @@ */ #ifndef __ASSEMBLY__ struct task_struct; -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <linux/atomic.h> struct thread_info { @@ -134,10 +134,13 @@ struct thread_info { #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) -/* work to do in syscall_trace_enter() */ +/* + * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for + * enter_from_user_mode() + */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) /* work to do on any return to user space */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6df2029..c24b422 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -5,8 +5,57 @@ #include <linux/sched.h> #include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/special_insns.h> +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void) { unsigned long flags; + if (static_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. + */ + invpcid_flush_all(); + return; + } + /* * Read-modify-write to CR4 - protect it from preemption and * from interrupts. (Use the raw variant because this code can diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0fb4648..7f991bd5 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); +#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +extern unsigned int __max_logical_packages; +#define topology_max_packages() (__max_logical_packages) +int topology_update_package_map(unsigned int apicid, unsigned int cpu); +extern int topology_phys_to_logical_pkg(unsigned int pkg); +#else +#define topology_max_packages() (1) +static inline int +topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 6d7c547..174c421 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void) return rdtsc(); } +extern struct system_counterval_t convert_art_to_tsc(cycle_t art); + extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a4a30e4..c0f27d7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un likely(!__range_not_ok(addr, size, user_addr_max())) /* - * The exception table consists of pairs of addresses relative to the - * exception table enty itself: the first is the address of an - * instruction that is allowed to fault, and the second is the address - * at which the program should continue. No registers are modified, - * so it is entirely up to the continuation code to figure out what to - * do. + * The exception table consists of triples of addresses relative to the + * exception table entry itself. The first address is of an instruction + * that is allowed to fault, the second is the target at which the program + * should continue. The third is a handler function to deal with the fault + * caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un */ struct exception_table_entry { - int insn, fixup; + int insn, fixup, handler; }; /* This is not the generic standard exception_table_entry format */ #define ARCH_HAS_SORT_EXTABLE #define ARCH_HAS_SEARCH_EXTABLE -extern int fixup_exception(struct pt_regs *regs); +extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern bool ex_has_fault_handler(unsigned long ip); extern int early_fixup_exception(unsigned long *ip); /* diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index b89c34c..3076986 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -8,7 +8,7 @@ #include <linux/errno.h> #include <linux/lockdep.h> #include <asm/alternative.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/page.h> /* diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index deabaf9..43dc55b 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -13,9 +13,6 @@ struct vdso_image { void *data; unsigned long size; /* Always a multiple of PAGE_SIZE */ - /* text_mapping.pages is big enough for data/size page pointers */ - struct vm_special_mapping text_mapping; - unsigned long alt, alt_len; long sym_vvar_start; /* Negative offset to the vvar area */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index f556c48..e728699 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -37,6 +37,12 @@ struct vsyscall_gtod_data { }; extern struct vsyscall_gtod_data vsyscall_gtod_data; +extern int vclocks_used; +static inline bool vclock_was_used(int vclock) +{ + return READ_ONCE(vclocks_used) & (1 << vclock); +} + static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) { unsigned ret; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 7956412..9b1a918 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -226,7 +226,9 @@ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) /* Declare the various hypercall operations. */ -#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d485232..62d4111 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -256,7 +256,7 @@ struct sigcontext_64 { __u16 cs; __u16 gs; __u16 fs; - __u16 __pad0; + __u16 ss; __u64 err; __u64 trapno; __u64 oldmask; @@ -341,9 +341,37 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; + + /* + * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), + * Linux saved and restored fs and gs in these slots. This + * was counterproductive, as fsbase and gsbase were never + * saved, so arch_prctl was presumably unreliable. + * + * These slots should never be reused without extreme caution: + * + * - Some DOSEMU versions stash fs and gs in these slots manually, + * thus overwriting anything the kernel expects to be preserved + * in these slots. + * + * - If these slots are ever needed for any other purpose, + * there is some risk that very old 64-bit binaries could get + * confused. I doubt that many such binaries still work, + * though, since the same patch in 2.5.64 also removed the + * 64-bit set_thread_area syscall, so it appears that there + * is no TLS API beyond modify_ldt that works in both pre- + * and post-2.5.64 kernels. + * + * If the kernel ever adds explicit fs, gs, fsbase, and gsbase + * save/restore, it will most likely need to be opt-in and use + * different context slots. + */ __u16 gs; __u16 fs; - __u16 __pad0; + union { + __u16 ss; /* If UC_SIGCONTEXT_SS */ + __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */ + }; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h index b7c29c8..e3d1ec9 100644 --- a/arch/x86/include/uapi/asm/ucontext.h +++ b/arch/x86/include/uapi/asm/ucontext.h @@ -1,11 +1,54 @@ #ifndef _ASM_X86_UCONTEXT_H #define _ASM_X86_UCONTEXT_H -#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state - * information in the memory layout pointed - * by the fpstate pointer in the ucontext's - * sigcontext struct (uc_mcontext). - */ +/* + * Indicates the presence of extended state information in the memory + * layout pointed by the fpstate pointer in the ucontext's sigcontext + * struct (uc_mcontext). + */ +#define UC_FP_XSTATE 0x1 + +#ifdef __x86_64__ +/* + * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on + * kernels that save SS in the sigcontext. All kernels that set + * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp + * regardless of SS (i.e. they implement espfix). + * + * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS + * when delivering a signal that came from 64-bit code. + * + * Sigreturn restores SS as follows: + * + * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || + * saved CS is not 64-bit) + * new SS = saved SS (will fail IRET and signal if invalid) + * else + * new SS = a flat 32-bit data segment + * + * This behavior serves three purposes: + * + * - Legacy programs that construct a 64-bit sigcontext from scratch + * with zero or garbage in the SS slot (e.g. old CRIU) and call + * sigreturn will still work. + * + * - Old DOSEMU versions sometimes catch a signal from a segmented + * context, delete the old SS segment (with modify_ldt), and change + * the saved CS to a 64-bit segment. These DOSEMU versions expect + * sigreturn to send them back to 64-bit mode without killing them, + * despite the fact that the SS selector when the signal was raised is + * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel + * will fix up SS for these DOSEMU versions. + * + * - Old and new programs that catch a signal and return without + * modifying the saved context will end up in exactly the state they + * started in, even if they were running in a segmented context when + * the signal was raised.. Old kernels would lose track of the + * previous SS value. + */ +#define UC_SIGCONTEXT_SS 0x2 +#define UC_STRICT_RESTORE_SS 0x4 +#endif #include <asm-generic/ucontext.h> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e85f71..0a2bb1f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp) return 0; } -static int gart_fix_e820 __initdata = 1; +static bool gart_fix_e820 __initdata = true; static int __init parse_gart_mem(char *p) { - if (!p) - return -EINVAL; - - if (!strncmp(p, "off", 3)) - gart_fix_e820 = 0; - else if (!strncmp(p, "on", 2)) - gart_fix_e820 = 1; - - return 0; + return kstrtobool(p, &gart_fix_e820); } early_param("gart_fix_e820", parse_gart_mem); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8a5cdda..531b961 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2078,6 +2078,20 @@ int generic_processor_info(int apicid, int version) cpu = cpumask_next_zero(-1, cpu_present_mask); /* + * This can happen on physical hotplug. The sanity check at boot time + * is done from native_smp_prepare_cpus() after num_possible_cpus() is + * established. + */ + if (topology_update_package_map(apicid, cpu) < 0) { + int thiscpu = max + disabled_cpus; + + pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n", + thiscpu, apicid); + disabled_cpus++; + return -ENOSPC; + } + + /* * Validate version */ if (version == 0x0) { diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 9968f30..76f89e2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -53,7 +53,7 @@ void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) +static void _flat_send_IPI_mask(unsigned long mask, int vector) { unsigned long flags; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c80c02c..ab5c2c6 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) unsigned long value; unsigned int id = (x >> 24) & 0xff; - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, value); id |= (value << 2) & 0xff00; } @@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) this_cpu_write(cpu_llc_id, node); /* Account for nodes per socket in multi-core-module processors */ - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, val); nodes = ((val >> 3) & 7) + 1; } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index eb45fc9..28bde88 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -18,6 +18,66 @@ #include <asm/proto.h> #include <asm/ipi.h> +void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + __xapic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + __xapic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + native_apic_mem_write(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + void default_send_IPI_single_phys(int cpu, int vector) { unsigned long flags; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 84a7524..5c04246 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -59,7 +59,6 @@ void common(void) { #ifdef CONFIG_PARAVIRT BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6ce3902..ecdc1d2 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -7,7 +7,7 @@ #include <linux/lguest.h> #include "../../../drivers/lguest/lg.h" -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_32.h> }; @@ -52,6 +52,11 @@ void foo(void) DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - offsetofend(struct tss_struct, SYSENTER_stack)); + /* Offset from cpu_tss to SYSENTER_stack */ + OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f2edafb..d875f97 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -4,17 +4,11 @@ #include <asm/ia32.h> -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif +#define __SYSCALL_64(nr, sym, qual) [nr] = 1, static char syscalls_64[] = { #include <asm/syscalls_64.h> }; -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls_ia32[] = { #include <asm/syscalls_32.h> }; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 5803130..0d373d7 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o - -ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o -endif -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o - -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ - perf_event_intel_uncore_snb.o \ - perf_event_intel_uncore_snbep.o \ - perf_event_intel_uncore_nhmex.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o -endif - - obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o @@ -64,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ -cpufeature = $(src)/../../include/asm/cpufeature.h +cpufeature = $(src)/../../include/asm/cpufeatures.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a07956a..97c59fd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) void (*f_vide)(void); u64 d, d2; - printk(KERN_INFO "AMD K6 stepping B detected - "); + pr_info("AMD K6 stepping B detected - "); /* * It looks like AMD fixed the 2.6.2 bug and improved indirect @@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c) d = d2-d; if (d > 20*K6_BUG_LOOP) - printk(KERN_CONT - "system stability may be impaired when more than 32 MB are used.\n"); + pr_cont("system stability may be impaired when more than 32 MB are used.\n"); else - printk(KERN_CONT "probably OK (after B9730xxxx).\n"); + pr_cont("probably OK (after B9730xxxx).\n"); } /* K6 with old style WHCR */ @@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + pr_info("Enabling old style K6 write allocation for %d Mb\n", mbytes); } return; @@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + pr_info("Enabling new style K6 write allocation for %d Mb\n", mbytes); } @@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) */ if (c->x86_model >= 6 && c->x86_model <= 10) { if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + pr_info("Enabling disabled K7/SSE Support.\n"); msr_clear_bit(MSR_K7_HWCR, 15); set_cpu_cap(c, X86_FEATURE_XMM); } @@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); + pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); } } @@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { unsigned long pfn = tseg >> PAGE_SHIFT; - printk(KERN_DEBUG "tseg: %010llx\n", tseg); + pr_debug("tseg: %010llx\n", tseg); if (pfn_range_is_mapped(pfn, pfn + 1)) set_memory_4k((unsigned long)__va(tseg), 1); } @@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_K7_HWCR, val); if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); + pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n"); } } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 04f0fe5..a972ac4 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c @@ -15,7 +15,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #if !defined(CONFIG_SMP) - printk(KERN_INFO "CPU: "); + pr_info("CPU: "); print_cpu_info(&boot_cpu_data); #endif alternative_instructions(); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ae20be6..1661d8e 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,7 +1,7 @@ #include <linux/bitops.h> #include <linux/kernel.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_FCR, lo, hi); lo |= ACE_FCR; /* enable ACE unit */ wrmsr(MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + pr_info("CPU: Enabled ACE h/w crypto\n"); } /* enable RNG unit, if present and disabled */ @@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_RNG, lo, hi); lo |= RNG_ENABLE; /* enable RNG unit */ wrmsr(MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + pr_info("CPU: Enabled h/w RNG\n"); } /* store Centaur Extended Feature Flags as @@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c) name = "C6"; fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr = DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); + pr_notice("Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: @@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c) newlo = (lo|fcr_set) & (~fcr_clr); if (newlo != lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", + pr_info("Centaur FCR was 0x%X now 0x%X\n", lo, newlo); wrmsr(MSR_IDT_FCR1, newlo, hi); } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); + pr_info("Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de..249461f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) } __setup("nompx", x86_mpx_setup); +static int __init x86_noinvpcid_setup(char *s) +{ + /* noinvpcid doesn't accept parameters */ + if (s) + return -EINVAL; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_INVPCID)) + return 0; + + setup_clear_cpu_cap(X86_FEATURE_INVPCID); + pr_info("noinvpcid: INVPCID feature disabled\n"); + return 0; +} +early_param("noinvpcid", x86_noinvpcid_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; @@ -228,7 +244,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) lo |= 0x200000; wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); + pr_notice("CPU serial number disabled.\n"); clear_cpu_cap(c, X86_FEATURE_PN); /* Disabling the serial number may affect the cpuid level */ @@ -329,9 +345,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) if (!warn) continue; - printk(KERN_WARNING - "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", - x86_cap_flag(df->feature), df->level); + pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", + x86_cap_flag(df->feature), df->level); } } @@ -510,7 +525,7 @@ void detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); + pr_info_once("CPU0: Hyper-Threading is disabled\n"); goto out; } @@ -531,10 +546,10 @@ void detect_ht(struct cpuinfo_x86 *c) out: if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + pr_info("CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + pr_info("CPU: Processor Core ID: %d\n", + c->cpu_core_id); printed = 1; } #endif @@ -559,9 +574,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c) } } - printk_once(KERN_ERR - "CPU: vendor_id '%s' unknown, using generic init.\n" \ - "CPU: Your system may be unstable.\n", v); + pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \ + "CPU: Your system may be unstable.\n", v); c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; @@ -760,7 +774,7 @@ void __init early_cpu_init(void) int count = 0; #ifdef CONFIG_PROCESSOR_SELECT - printk(KERN_INFO "KERNEL supported cpus:\n"); + pr_info("KERNEL supported cpus:\n"); #endif for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { @@ -778,7 +792,7 @@ void __init early_cpu_init(void) for (j = 0; j < 2; j++) { if (!cpudev->c_ident[j]) continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + pr_info(" %s %s\n", cpudev->c_vendor, cpudev->c_ident[j]); } } @@ -803,6 +817,31 @@ static void detect_nopl(struct cpuinfo_x86 *c) #else set_cpu_cap(c, X86_FEATURE_NOPL); #endif + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +#else + set_cpu_bug(c, X86_BUG_ESPFIX); +#endif +#endif } static void generic_identify(struct cpuinfo_x86 *c) @@ -977,6 +1016,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif + /* The boot/hotplug time assigment got cleared, restore it */ + c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); } /* @@ -1061,7 +1102,7 @@ static void __print_cpu_msr(void) for (index = index_min; index < index_max; index++) { if (rdmsrl_safe(index, &val)) continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + pr_info(" MSR%08x: %016llx\n", index, val); } } } @@ -1100,19 +1141,19 @@ void print_cpu_info(struct cpuinfo_x86 *c) } if (vendor && !strstr(c->x86_model_id, vendor)) - printk(KERN_CONT "%s ", vendor); + pr_cont("%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + pr_cont("%s", c->x86_model_id); else - printk(KERN_CONT "%d86", c->x86); + pr_cont("%d86", c->x86); - printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); + pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); + pr_cont(", stepping: 0x%x)\n", c->x86_mask); else - printk(KERN_CONT ")\n"); + pr_cont(")\n"); print_cpu_msr(c); } @@ -1438,7 +1479,7 @@ void cpu_init(void) show_ucode_info_early(); - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || @@ -1475,20 +1516,6 @@ void cpu_init(void) } #endif -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS -void warn_pre_alternatives(void) -{ - WARN(1, "You're using static_cpu_has before alternatives have run!\n"); -} -EXPORT_SYMBOL_GPL(warn_pre_alternatives); -#endif - -inline bool __static_cpu_has_safe(u16 bit) -{ - return boot_cpu_has(bit); -} -EXPORT_SYMBOL_GPL(__static_cpu_has_safe); - static void bsp_resume(void) { if (this_cpu->c_bsp_resume) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index aaf152e..6adef9c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -8,6 +8,7 @@ #include <linux/timer.h> #include <asm/pci-direct.h> #include <asm/tsc.h> +#include <asm/cpufeature.h> #include "cpu.h" @@ -103,7 +104,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c) local_irq_restore(flags); if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + pr_info("Recalibrating delay loop with SLOP bit reset\n"); calibrate_delay(); c->loops_per_jiffy = loops_per_jiffy; } @@ -115,7 +116,7 @@ static void set_cx86_reorder(void) { u8 ccr3; - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -128,7 +129,7 @@ static void set_cx86_reorder(void) static void set_cx86_memwb(void) { - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); @@ -268,7 +269,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) * VSA1 we work around however. */ - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; /* We do this before the PCI layer is running. However we @@ -426,7 +427,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c) if (dir0 == 5 || dir0 == 3) { unsigned char ccr3; unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + pr_info("Enabling CPUID on Cyrix processor.\n"); local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); /* enable MAPEN */ diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index d820d8e..73d391a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -56,7 +56,7 @@ detect_hypervisor_vendor(void) } if (max_pri) - printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name); + pr_info("Hypervisor detected: %s\n", x86_hyper->name); } void init_hypervisor(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 565648b..1f7fdb9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,7 +8,7 @@ #include <linux/module.h> #include <linux/uaccess.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/pgtable.h> #include <asm/msr.h> #include <asm/bugs.h> @@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && c->microcode < 0x20e) { - printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); + pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } @@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { - printk(KERN_INFO "Disabled fast string operations\n"); + pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } @@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } } #ifdef CONFIG_X86_32 @@ -176,7 +189,7 @@ int ppro_with_ram_bug(void) boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } return 0; @@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_F00F); if (!f00f_workaround_enabled) { - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } } @@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Forcefully enable PAE if kernel parameter "forcepae" is present. */ if (forcepae) { - printk(KERN_WARNING "PAE forced!\n"); + pr_warn("PAE forced!\n"); set_cpu_cap(c, X86_FEATURE_PAE); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0b6c523..de6626c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -14,7 +14,7 @@ #include <linux/sysfs.h> #include <linux/pci.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/amd_nb.h> #include <asm/smp.h> @@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf, err = amd_set_l3_disable_slot(nb, cpu, slot, val); if (err) { if (err == -EEXIST) - pr_warning("L3 slot %d in use/index already disabled!\n", + pr_warn("L3 slot %d in use/index already disabled!\n", slot); return err; } diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index afa9f0d..fbb5e90 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -1,5 +1,5 @@ #include <asm/cpu_device_id.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 4cfba43..517619e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -115,7 +115,7 @@ static int raise_local(void) int cpu = m->extcpu; if (m->inject_flags & MCJ_EXCEPTION) { - printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); + pr_info("Triggering MCE exception on CPU %d\n", cpu); switch (context) { case MCJ_CTX_IRQ: /* @@ -128,15 +128,15 @@ static int raise_local(void) raise_exception(m, NULL); break; default: - printk(KERN_INFO "Invalid MCE context\n"); + pr_info("Invalid MCE context\n"); ret = -EINVAL; } - printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); + pr_info("MCE exception done on CPU %d\n", cpu); } else if (m->status) { - printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); + pr_info("Starting machine check poll CPU %d\n", cpu); raise_poll(m); mce_notify_irq(); - printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); + pr_info("Machine check poll done on CPU %d\n", cpu); } else m->finished = 0; @@ -183,8 +183,7 @@ static void raise_mce(struct mce *m) start = jiffies; while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { - printk(KERN_ERR - "Timeout waiting for mce inject %lx\n", + pr_err("Timeout waiting for mce inject %lx\n", *cpumask_bits(mce_inject_cpumask)); break; } @@ -241,7 +240,7 @@ static int inject_init(void) { if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - printk(KERN_INFO "Machine check injector initialized\n"); + pr_info("Machine check injector initialized\n"); register_mce_write_callback(mce_write); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 9c682c2..5119766 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/debugfs.h> #include <asm/mce.h> +#include <asm/uaccess.h> #include "mce-internal.h" @@ -29,7 +30,7 @@ * panic situations) */ -enum context { IN_KERNEL = 1, IN_USER = 2 }; +enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 }; enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; @@ -48,6 +49,7 @@ static struct severity { #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } #define KERNEL .context = IN_KERNEL #define USER .context = IN_USER +#define KERNEL_RECOV .context = IN_KERNEL_RECOV #define SER .ser = SER_REQUIRED #define NOSER .ser = NO_SER #define EXCP .excp = EXCP_CONTEXT @@ -87,6 +89,10 @@ static struct severity { EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) ), MCESEV( + PANIC, "In kernel and no restart IP", + EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0) + ), + MCESEV( DEFERRED, "Deferred error", NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) ), @@ -123,6 +129,11 @@ static struct severity { MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) ), MCESEV( + AR, "Action required: data load in error recoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL_RECOV + ), + MCESEV( AR, "Action required: data load error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), USER @@ -170,6 +181,9 @@ static struct severity { ) /* always matches. keep at end */ }; +#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \ + (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) + /* * If mcgstatus indicated that ip/cs on the stack were * no good, then "m->cs" will be zero and we will have @@ -183,7 +197,11 @@ static struct severity { */ static int error_context(struct mce *m) { - return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; + if ((m->cs & 3) == 3) + return IN_USER; + if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) + return IN_KERNEL_RECOV; + return IN_KERNEL; } /* diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a006f4c..f0c921b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear) } } +static int do_memory_failure(struct mce *m) +{ + int flags = MF_ACTION_REQUIRED; + int ret; + + pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); + if (!(m->mcgstatus & MCG_STATUS_RIPV)) + flags |= MF_MUST_KILL; + ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); + if (ret) + pr_err("Memory error not recovered"); + return ret; +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; - u64 recover_paddr = ~0ull; - int flags = MF_ACTION_REQUIRED; int lmce = 0; /* If this CPU is offline, just bail out. */ @@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code) } /* - * At insane "tolerant" levels we take no action. Otherwise - * we only die if we have no other choice. For less serious - * issues we try to recover, or limit damage to the current - * process. + * If tolerant is at an insane level we drop requests to kill + * processes and continue even when there is no way out. */ - if (cfg->tolerant < 3) { - if (no_way_out) - mce_panic("Fatal machine check on current CPU", &m, msg); - if (worst == MCE_AR_SEVERITY) { - recover_paddr = m.addr; - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - flags |= MF_MUST_KILL; - } else if (kill_it) { - force_sig(SIGBUS, current); - } - } + if (cfg->tolerant == 3) + kill_it = 0; + else if (no_way_out) + mce_panic("Fatal machine check on current CPU", &m, msg); if (worst > 0) mce_report_event(regs); @@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code) out: sync_core(); - if (recover_paddr == ~0ull) - goto done; + if (worst != MCE_AR_SEVERITY && !kill_it) + goto out_ist; - pr_err("Uncorrected hardware memory error in user-access at %llx", - recover_paddr); - /* - * We must call memory_failure() here even if the current process is - * doomed. We still need to mark the page as poisoned and alert any - * other users of the page. - */ - ist_begin_non_atomic(regs); - local_irq_enable(); - if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { - pr_err("Memory error not recovered"); - force_sig(SIGBUS, current); + /* Fault was in user mode and we need to take some action */ + if ((m.cs & 3) == 3) { + ist_begin_non_atomic(regs); + local_irq_enable(); + + if (kill_it || do_memory_failure(&m)) + force_sig(SIGBUS, current); + local_irq_disable(); + ist_end_non_atomic(); + } else { + if (!fixup_exception(regs, X86_TRAP_MC)) + mce_panic("Failed kernel mode recovery", &m, NULL); } - local_irq_disable(); - ist_end_non_atomic(); -done: + +out_ist: ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1576,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; + /* + * MCG_CAP.MCG_SER_P is necessary but not sufficient to know + * whether this processor will actually generate recoverable + * machine checks. Check to see if this is an E7 model Xeon. + * We can't do a model number check because E5 and E7 use the + * same model number. E5 doesn't support recovery, E7 does. + */ + if (mca_cfg.recovery || (mca_cfg.ser && + !strncmp(c->x86_model_id, + "Intel(R) Xeon(R) CPU E7-", 24))) + set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); } if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; @@ -1617,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) case X86_VENDOR_AMD: { u32 ebx = cpuid_ebx(0x80000007); - mce_amd_feature_init(c); mce_flags.overflow_recov = !!(ebx & BIT(0)); mce_flags.succor = !!(ebx & BIT(1)); mce_flags.smca = !!(ebx & BIT(3)); + mce_amd_feature_init(c); break; } @@ -2028,6 +2041,8 @@ static int __init mcheck_enable(char *str) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) cfg->bios_cmci_threshold = true; + else if (!strcmp(str, "recovery")) + cfg->recovery = true; else if (isdigit(str[0])) { if (get_option(&str, &cfg->tolerant) == 2) get_option(&str, &(cfg->monarch_timeout)); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e99b150..9d656fd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1,5 +1,5 @@ /* - * (c) 2005-2015 Advanced Micro Devices, Inc. + * (c) 2005-2016 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -28,7 +28,7 @@ #include <asm/msr.h> #include <asm/trace/irq_vectors.h> -#define NR_BLOCKS 9 +#define NR_BLOCKS 5 #define THRESHOLD_MAX 0xFFF #define INT_TYPE_APIC 0x00020000 #define MASK_VALID_HI 0x80000000 @@ -49,6 +49,19 @@ #define DEF_LVT_OFF 0x2 #define DEF_INT_TYPE_APIC 0x2 +/* Scalable MCA: */ + +/* Threshold LVT offset is at MSR0xC0000410[15:12] */ +#define SMCA_THR_LVT_OFF 0xF000 + +/* + * OS is required to set the MCAX bit to acknowledge that it is now using the + * new MSR ranges and new registers under each bank. It also means that the OS + * will configure deferred errors in the new MCx_CONFIG register. If the bit is + * not set, uncorrectable errors will cause a system panic. + */ +#define SMCA_MCAX_EN_OFF 0x1 + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -58,6 +71,35 @@ static const char * const th_names[] = { "execution_unit", }; +/* Define HWID to IP type mappings for Scalable MCA */ +struct amd_hwid amd_hwids[] = { + [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, + [SMCA_DF] = { "data_fabric", 0x2E }, + [SMCA_UMC] = { "umc", 0x96 }, + [SMCA_PB] = { "param_block", 0x5 }, + [SMCA_PSP] = { "psp", 0xFF }, + [SMCA_SMU] = { "smu", 0x1 }, +}; +EXPORT_SYMBOL_GPL(amd_hwids); + +const char * const amd_core_mcablock_names[] = { + [SMCA_LS] = "load_store", + [SMCA_IF] = "insn_fetch", + [SMCA_L2_CACHE] = "l2_cache", + [SMCA_DE] = "decode_unit", + [RES] = "", + [SMCA_EX] = "execution_unit", + [SMCA_FP] = "floating_point", + [SMCA_L3_CACHE] = "l3_cache", +}; +EXPORT_SYMBOL_GPL(amd_core_mcablock_names); + +const char * const amd_df_mcablock_names[] = { + [SMCA_CS] = "coherent_slave", + [SMCA_PIE] = "pie", +}; +EXPORT_SYMBOL_GPL(amd_df_mcablock_names); + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ @@ -84,6 +126,13 @@ struct thresh_restart { static inline bool is_shared_bank(int bank) { + /* + * Scalable MCA provides for only one core to have access to the MSRs of + * a shared bank. + */ + if (mce_flags.smca) + return false; + /* Bank 4 is for northbridge reporting and is thus shared */ return (bank == 4); } @@ -135,6 +184,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) } if (apic != msr) { + /* + * On SMCA CPUs, LVT offset is programmed at a different MSR, and + * the BIOS provides the value. The original field where LVT offset + * was set is reserved. Return early here: + */ + if (mce_flags.smca) + return 0; + pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); @@ -144,10 +201,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; -/* - * Called via smp_call_function_single(), must be called with correct - * cpu affinity. - */ +/* Reprogram MCx_MISC MSR behind this threshold bank. */ static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; @@ -247,27 +301,116 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } +static u32 get_block_address(u32 current_addr, u32 low, u32 high, + unsigned int bank, unsigned int block) +{ + u32 addr = 0, offset = 0; + + if (mce_flags.smca) { + if (!block) { + addr = MSR_AMD64_SMCA_MCx_MISC(bank); + } else { + /* + * For SMCA enabled processors, BLKPTR field of the + * first MISC register (MCx_MISC0) indicates presence of + * additional MISC register set (MISC1-4). + */ + u32 low, high; + + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return addr; + + if (!(low & MCI_CONFIG_MCAX)) + return addr; + + if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + (low & MASK_BLKPTR_LO)) + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + } + return addr; + } + + /* Fall back to method we used for older processors: */ + switch (block) { + case 0: + addr = MSR_IA32_MCx_MISC(bank); + break; + case 1: + offset = ((low & MASK_BLKPTR_LO) >> 21); + if (offset) + addr = MCG_XBLK_ADDR + offset; + break; + default: + addr = ++current_addr; + } + return addr; +} + +static int +prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) +{ + unsigned int cpu = smp_processor_id(); + struct threshold_block b; + int new; + + if (!block) + per_cpu(bank_map, cpu) |= (1 << bank); + + memset(&b, 0, sizeof(b)); + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = addr; + b.interrupt_capable = lvt_interrupt_supported(bank, misc_high); + + if (!b.interrupt_capable) + goto done; + + b.interrupt_enable = 1; + + if (mce_flags.smca) { + u32 smca_low, smca_high; + u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); + + if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { + smca_high |= SMCA_MCAX_EN_OFF; + wrmsr(smca_addr, smca_low, smca_high); + } + + /* Gather LVT offset for thresholding: */ + if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) + goto out; + + new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + } else { + new = (misc_high & MASK_LVTOFF_HI) >> 20; + } + + offset = setup_APIC_mce_threshold(offset, new); + + if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) + mce_threshold_vector = amd_threshold_interrupt; + +done: + mce_threshold_block_init(&b, offset); + +out: + return offset; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - struct threshold_block b; - unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int offset = -1, new; + int offset = -1; for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) - address = MSR_IA32_MCx_MISC(bank); - else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - - address += MCG_XBLK_ADDR; - } else - ++address; + address = get_block_address(address, low, high, bank, block); + if (!address) + break; if (rdmsr_safe(address, &low, &high)) break; @@ -279,29 +422,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) (high & MASK_LOCKED_HI)) continue; - if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); - - memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; - b.interrupt_capable = lvt_interrupt_supported(bank, high); - - if (!b.interrupt_capable) - goto init; - - b.interrupt_enable = 1; - new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && - (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; - -init: - mce_threshold_block_init(&b, offset); + offset = prepare_threshold_block(bank, block, address, offset, high); } } @@ -394,16 +515,9 @@ static void amd_threshold_interrupt(void) if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) { - address = MSR_IA32_MCx_MISC(bank); - } else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - address += MCG_XBLK_ADDR; - } else { - ++address; - } + address = get_block_address(address, low, high, bank, block); + if (!address) + break; if (rdmsr_safe(address, &low, &high)) break; @@ -623,16 +737,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - if (!block) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - return 0; - address += MCG_XBLK_ADDR; - } else { - ++address; - } + address = get_block_address(address, low, high, bank, ++block); + if (!address) + return 0; - err = allocate_threshold_blocks(cpu, bank, ++block, address); + err = allocate_threshold_blocks(cpu, bank, block, address); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 12402e1..2a0717b 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG - "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", - smp_processor_id(), loaddr, lotype); + pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", + smp_processor_id(), loaddr, lotype); if (lotype & (1<<5)) { - printk(KERN_EMERG - "CPU#%d: Possible thermal failure (CPU on fire ?).\n", - smp_processor_id()); + pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n", + smp_processor_id()); } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) /* Read registers before enabling: */ rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO - "Intel old style machine check architecture supported.\n"); + pr_info("Intel old style machine check architecture supported.\n"); /* Enable MCE: */ cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Intel old style machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); + pr_info("Intel old style machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2c5aaf8..0b445c2 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); @@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level) } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", - this_cpu, + pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void) static void unexpected_thermal_interrupt(void) { - printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); + pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", + smp_processor_id()); } static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; @@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { if (system_state == SYSTEM_BOOTING) - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); + pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } @@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", - tm2 ? "TM2" : "TM1"); + pr_info_once("CPU0: Thermal monitoring enabled (%s)\n", + tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 7245980..fcf9ae9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -12,8 +12,8 @@ static void default_threshold_interrupt(void) { - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); + pr_err("Unexpected threshold interrupt at vector %x\n", + THRESHOLD_APIC_VECTOR); } void (*mce_threshold_vector)(void) = default_threshold_interrupt; diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 01dd870..c6a722e 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) { ist_enter(regs); - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); ist_exit(regs); @@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Winchip machine check reporting enabled on CPU#0.\n"); + pr_info("Winchip machine check reporting enabled on CPU#0.\n"); } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 2233f8a..8581963 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void) else container = cont_va; - if (ucode_new_rev) - pr_info("microcode: updated early to new patch_level=0x%08x\n", - ucode_new_rev); - eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); @@ -469,8 +465,7 @@ void reload_ucode_amd(void) if (mc && rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; - pr_info("microcode: reload patch_level=0x%08x\n", - ucode_new_rev); + pr_info("reload patch_level=0x%08x\n", ucode_new_rev); } } } @@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) return -EINVAL; } - patch->data = kzalloc(patch_size, GFP_KERNEL); + patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL); if (!patch->data) { pr_err("Patch data allocation failure.\n"); kfree(patch); return -EINVAL; } - /* All looks ok, copy patch... */ - memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); INIT_LIST_HEAD(&patch->plist); patch->patch_id = mc_hdr->patch_id; patch->equiv_cpu = proc_id; @@ -953,10 +946,14 @@ struct microcode_ops * __init init_amd_microcode(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } + if (ucode_new_rev) + pr_info_once("microcode updated early to new patch_level=0x%08x\n", + ucode_new_rev); + return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index faec712..ac360bf 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,16 +43,8 @@ #define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; - static bool dis_ucode_ldr; -static int __init disable_loader(char *str) -{ - dis_ucode_ldr = true; - return 1; -} -__setup("dis_ucode_ldr", disable_loader); - /* * Synchronization. * @@ -81,15 +73,16 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { + static const char *__dis_opt_str = "dis_ucode_ldr"; + #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); - const char *opt = "dis_ucode_ldr"; - const char *option = (const char *)__pa_nodebug(opt); + const char *option = (const char *)__pa_nodebug(__dis_opt_str); bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); #else /* CONFIG_X86_64 */ const char *cmdline = boot_command_line; - const char *option = "dis_ucode_ldr"; + const char *option = __dis_opt_str; bool *res = &dis_ucode_ldr; #endif @@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) enum ucode_state ustate; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - if (uci && uci->valid) + if (uci->valid) return UCODE_OK; if (collect_cpu_info(cpu)) @@ -630,7 +623,7 @@ int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (paravirt_enabled() || dis_ucode_ldr) + if (dis_ucode_ldr) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ee81c54..cbb3cf0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -39,9 +39,15 @@ #include <asm/setup.h> #include <asm/msr.h> -static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +/* + * Temporary microcode blobs pointers storage. We note here the pointers to + * microcode blobs we've got from whatever storage (detached initrd, builtin). + * Later on, we put those into final storage mc_saved_data.mc_saved. + */ +static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT]; + static struct mc_saved_data { - unsigned int mc_saved_count; + unsigned int num_saved; struct microcode_intel **mc_saved; } mc_saved_data; @@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved, } static inline void -copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, - unsigned long off, int num_saved) +copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs, + unsigned long off, int num_saved) { int i; for (i = 0; i < num_saved; i++) - mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); + mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off); } #ifdef CONFIG_X86_32 static void -microcode_phys(struct microcode_intel **mc_saved_tmp, - struct mc_saved_data *mc_saved_data) +microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs) { int i; struct microcode_intel ***mc_saved; - mc_saved = (struct microcode_intel ***) - __pa_nodebug(&mc_saved_data->mc_saved); - for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved); + + for (i = 0; i < mcs->num_saved; i++) { struct microcode_intel *p; - p = *(struct microcode_intel **) - __pa_nodebug(mc_saved_data->mc_saved + i); + p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i); mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); } } #endif static enum ucode_state -load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, - unsigned long initrd_start, struct ucode_cpu_info *uci) +load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, + unsigned long offset, struct ucode_cpu_info *uci) { struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int count = mc_saved_data->mc_saved_count; + unsigned int count = mcs->num_saved; - if (!mc_saved_data->mc_saved) { - copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); + if (!mcs->mc_saved) { + copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count); return load_microcode_early(mc_saved_tmp, count, uci); } else { #ifdef CONFIG_X86_32 - microcode_phys(mc_saved_tmp, mc_saved_data); + microcode_phys(mc_saved_tmp, mcs); return load_microcode_early(mc_saved_tmp, count, uci); #else - return load_microcode_early(mc_saved_data->mc_saved, - count, uci); + return load_microcode_early(mcs->mc_saved, count, uci); #endif } } @@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header, } static int -save_microcode(struct mc_saved_data *mc_saved_data, +save_microcode(struct mc_saved_data *mcs, struct microcode_intel **mc_saved_src, - unsigned int mc_saved_count) + unsigned int num_saved) { int i, j; struct microcode_intel **saved_ptr; int ret; - if (!mc_saved_count) + if (!num_saved) return -EINVAL; /* * Copy new microcode data. */ - saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); + saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL); if (!saved_ptr) return -ENOMEM; - for (i = 0; i < mc_saved_count; i++) { + for (i = 0; i < num_saved; i++) { struct microcode_header_intel *mc_hdr; struct microcode_intel *mc; unsigned long size; @@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data, mc_hdr = &mc->hdr; size = get_totalsize(mc_hdr); - saved_ptr[i] = kmalloc(size, GFP_KERNEL); + saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL); if (!saved_ptr[i]) { ret = -ENOMEM; goto err; } - - memcpy(saved_ptr[i], mc, size); } /* * Point to newly saved microcode. */ - mc_saved_data->mc_saved = saved_ptr; - mc_saved_data->mc_saved_count = mc_saved_count; + mcs->mc_saved = saved_ptr; + mcs->num_saved = num_saved; return 0; @@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved, * BSP can stay in the platform. */ static enum ucode_state __init -get_matching_model_microcode(int cpu, unsigned long start, - void *data, size_t size, - struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, +get_matching_model_microcode(unsigned long start, void *data, size_t size, + struct mc_saved_data *mcs, unsigned long *mc_ptrs, struct ucode_cpu_info *uci) { - u8 *ucode_ptr = data; - unsigned int leftover = size; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + struct microcode_header_intel *mc_header; + unsigned int num_saved = mcs->num_saved; enum ucode_state state = UCODE_OK; + unsigned int leftover = size; + u8 *ucode_ptr = data; unsigned int mc_size; - struct microcode_header_intel *mc_header; - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count = mc_saved_data->mc_saved_count; int i; - while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { + while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) { if (leftover < sizeof(mc_header)) break; @@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start, * the platform, we need to find and save microcode patches * with the same family and model as the BSP. */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != - UCODE_OK) { + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) { ucode_ptr += mc_size; continue; } - mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); + num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved); ucode_ptr += mc_size; } if (leftover) { state = UCODE_ERROR; - goto out; + return state; } - if (mc_saved_count == 0) { + if (!num_saved) { state = UCODE_NFOUND; - goto out; + return state; } - for (i = 0; i < mc_saved_count; i++) - mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + for (i = 0; i < num_saved; i++) + mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start; + + mcs->num_saved = num_saved; - mc_saved_data->mc_saved_count = mc_saved_count; -out: return state; } @@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); csig.pf = 1 << ((val[1] >> 18) & 7); } - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); @@ -396,11 +394,11 @@ static void show_saved_mc(void) unsigned int sig, pf, rev, total_size, data_size, date; struct ucode_cpu_info uci; - if (mc_saved_data.mc_saved_count == 0) { + if (!mc_saved_data.num_saved) { pr_debug("no microcode data saved.\n"); return; } - pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved); collect_cpu_info_early(&uci); @@ -409,7 +407,7 @@ static void show_saved_mc(void) rev = uci.cpu_sig.rev; pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); - for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + for (i = 0; i < mc_saved_data.num_saved; i++) { struct microcode_header_intel *mc_saved_header; struct extended_sigtable *ext_header; int ext_sigcount; @@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc) { struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; unsigned int mc_saved_count_init; - unsigned int mc_saved_count; + unsigned int num_saved; struct microcode_intel **mc_saved; int ret = 0; int i; @@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc) */ mutex_lock(&x86_cpu_microcode_mutex); - mc_saved_count_init = mc_saved_data.mc_saved_count; - mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved_count_init = mc_saved_data.num_saved; + num_saved = mc_saved_data.num_saved; mc_saved = mc_saved_data.mc_saved; - if (mc_saved && mc_saved_count) + if (mc_saved && num_saved) memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct microcode_intel *)); + num_saved * sizeof(struct microcode_intel *)); /* * Save the microcode patch mc in mc_save_tmp structure if it's a newer * version. */ - mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); + num_saved = _save_mc(mc_saved_tmp, mc, num_saved); /* * Save the mc_save_tmp in global mc_saved_data. */ - ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved); if (ret) { pr_err("Cannot save microcode patch.\n"); goto out; @@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; static __init enum ucode_state -scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, +scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, unsigned long start, unsigned long size, struct ucode_cpu_info *uci) { @@ -551,14 +549,18 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, cd.data = NULL; cd.size = 0; - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) { + /* try built-in microcode if no initrd */ + if (!size) { if (!load_builtin_intel_microcode(&cd)) return UCODE_ERROR; + } else { + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; } - return get_matching_model_microcode(0, start, cd.data, cd.size, - mc_saved_data, initrd, uci); + return get_matching_model_microcode(start, cd.data, cd.size, + mcs, mc_ptrs, uci); } /* @@ -567,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, static void print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) { - int cpu = smp_processor_id(); - - pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", - cpu, - uci->cpu_sig.rev, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); + pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); } #ifdef CONFIG_X86_32 @@ -603,19 +602,19 @@ void show_ucode_info_early(void) */ static void print_ucode(struct ucode_cpu_info *uci) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; int *delay_ucode_info_p; int *current_mc_date_p; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return; delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); *delay_ucode_info_p = 1; - *current_mc_date_p = mc_intel->hdr.date; + *current_mc_date_p = mc->hdr.date; } #else @@ -630,37 +629,35 @@ static inline void flush_tlb_early(void) static inline void print_ucode(struct ucode_cpu_info *uci) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return; - print_ucode_info(uci, mc_intel->hdr.date); + print_ucode_info(uci, mc->hdr.date); } #endif static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; unsigned int val[2]; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return 0; /* write microcode via MSR 0x79 */ - native_wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); + native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) + if (val[1] != mc->hdr.rev) return -1; #ifdef CONFIG_X86_64 @@ -672,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (early) print_ucode(uci); else - print_ucode_info(uci, mc_intel->hdr.date); + print_ucode_info(uci, mc->hdr.date); return 0; } /* * This function converts microcode patch offsets previously stored in - * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data. */ int __init save_microcode_in_initrd_intel(void) { - unsigned int count = mc_saved_data.mc_saved_count; + unsigned int count = mc_saved_data.num_saved; struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; int ret = 0; - if (count == 0) + if (!count) return ret; - copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); + copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count); + ret = save_microcode(&mc_saved_data, mc_saved, count); if (ret) pr_err("Cannot save microcode patches from initrd.\n"); @@ -701,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void) } static void __init -_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, - unsigned long *initrd, +_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, unsigned long start, unsigned long size) { struct ucode_cpu_info uci; @@ -710,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, collect_cpu_info_early(&uci); - ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); + ret = scan_microcode(mcs, mc_ptrs, start, size, &uci); if (ret != UCODE_OK) return; - ret = load_microcode(mc_saved_data, initrd, start, &uci); + ret = load_microcode(mcs, mc_ptrs, start, &uci); if (ret != UCODE_OK) return; @@ -728,53 +725,49 @@ void __init load_ucode_intel_bsp(void) struct boot_params *p; p = (struct boot_params *)__pa_nodebug(&boot_params); - start = p->hdr.ramdisk_image; size = p->hdr.ramdisk_size; - _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), - start, size); + /* + * Set start only if we have an initrd image. We cannot use initrd_start + * because it is not set that early yet. + */ + start = (size ? p->hdr.ramdisk_image : 0); + + _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_tmp_ptrs), + start, size); #else - start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; size = boot_params.hdr.ramdisk_size; + start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0); - _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); + _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size); #endif } void load_ucode_intel_ap(void) { - struct mc_saved_data *mc_saved_data_p; + unsigned long *mcs_tmp_p; + struct mc_saved_data *mcs_p; struct ucode_cpu_info uci; - unsigned long *mc_saved_in_initrd_p; - unsigned long initrd_start_addr; enum ucode_state ret; #ifdef CONFIG_X86_32 - unsigned long *initrd_start_p; - mc_saved_in_initrd_p = - (unsigned long *)__pa_nodebug(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); - initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); + mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); + mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); #else - mc_saved_data_p = &mc_saved_data; - mc_saved_in_initrd_p = mc_saved_in_initrd; - initrd_start_addr = initrd_start; + mcs_tmp_p = mc_tmp_ptrs; + mcs_p = &mc_saved_data; #endif /* * If there is no valid ucode previously saved in memory, no need to * update ucode on this AP. */ - if (mc_saved_data_p->mc_saved_count == 0) + if (!mcs_p->num_saved) return; collect_cpu_info_early(&uci); - ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, - initrd_start_addr, &uci); - + ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci); if (ret != UCODE_OK) return; @@ -786,13 +779,13 @@ void reload_ucode_intel(void) struct ucode_cpu_info uci; enum ucode_state ret; - if (!mc_saved_data.mc_saved_count) + if (!mc_saved_data.num_saved) return; collect_cpu_info_early(&uci); ret = load_microcode_early(mc_saved_data.mc_saved, - mc_saved_data.mc_saved_count, &uci); + mc_saved_data.num_saved, &uci); if (ret != UCODE_OK) return; @@ -825,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) * return 0 - no update found * return 1 - found update */ -static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) +static int get_matching_mc(struct microcode_intel *mc, int cpu) { struct cpu_signature cpu_sig; unsigned int csig, cpf, crev; @@ -836,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) cpf = cpu_sig.pf; crev = cpu_sig.rev; - return has_newer_microcode(mc_intel, csig, cpf, crev); + return has_newer_microcode(mc, csig, cpf, crev); } static int apply_microcode_intel(int cpu) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; struct ucode_cpu_info *uci; + struct cpuinfo_x86 *c; unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - - uci = ucode_cpu_info + cpu; - mc_intel = uci->mc; /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); + if (WARN_ON(raw_smp_processor_id() != cpu)) + return -1; - if (mc_intel == NULL) + uci = ucode_cpu_info + cpu; + mc = uci->mc; + if (!mc) return 0; /* * Microcode on this CPU could be updated earlier. Only apply the - * microcode patch in mc_intel when it is newer than the one on this + * microcode patch in mc when it is newer than the one on this * CPU. */ - if (get_matching_mc(mc_intel, cpu) == 0) + if (!get_matching_mc(mc, cpu)) return 0; /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); + wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); + wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); @@ -876,16 +866,19 @@ static int apply_microcode_intel(int cpu) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) { + if (val[1] != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); + cpu, mc->hdr.rev); return -1; } + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", - cpu_num, val[1], - mc_intel->hdr.date & 0xffff, - mc_intel->hdr.date >> 24, - (mc_intel->hdr.date >> 16) & 0xff); + cpu, val[1], + mc->hdr.date & 0xffff, + mc->hdr.date >> 24, + (mc->hdr.date >> 16) & 0xff); + + c = &cpu_data(cpu); uci->cpu_sig.rev = val[1]; c->microcode = val[1]; diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index b96896b..2ce1a7d 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c @@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err) unsigned long total_size, data_size, ext_table_size; struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; + u32 sum, orig_sum, ext_sigcount = 0, i; struct extended_signature *ext_sig; total_size = get_totalsize(mc_header); @@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err) if (data_size + MC_HEADER_SIZE > total_size) { if (print_err) - pr_err("error! Bad data size in microcode data file\n"); + pr_err("Error: bad microcode data file size.\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { if (print_err) - pr_err("error! Unknown microcode update format\n"); + pr_err("Error: invalid/unknown microcode update format.\n"); return -EINVAL; } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); if (ext_table_size) { + u32 ext_table_sum = 0; + u32 *ext_tablep; + if ((ext_table_size < EXT_HEADER_SIZE) || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { if (print_err) - pr_err("error! Small exttable size in microcode data file\n"); + pr_err("Error: truncated extended signature table.\n"); return -EINVAL; } + ext_header = mc + MC_HEADER_SIZE + data_size; if (ext_table_size != exttable_size(ext_header)) { if (print_err) - pr_err("error! Bad exttable size in microcode data file\n"); + pr_err("Error: extended signature table size mismatch.\n"); return -EFAULT; } + ext_sigcount = ext_header->count; - } - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; + /* + * Check extended table checksum: the sum of all dwords that + * comprise a valid table must be 0. + */ + ext_tablep = (u32 *)ext_header; - i = ext_table_size / DWSIZE; + i = ext_table_size / sizeof(u32); while (i--) ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { if (print_err) - pr_warn("aborting, bad extended signature table checksum\n"); + pr_warn("Bad extended signature table checksum, aborting.\n"); return -EINVAL; } } - /* calculate the checksum */ + /* + * Calculate the checksum of update data and header. The checksum of + * valid update data and header including the extended signature table + * must be 0. + */ orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; + i = (MC_HEADER_SIZE + data_size) / sizeof(u32); while (i--) - orig_sum += ((int *)mc)[i]; + orig_sum += ((u32 *)mc)[i]; + if (orig_sum) { if (print_err) - pr_err("aborting, bad checksum\n"); + pr_err("Bad microcode data checksum, aborting.\n"); return -EINVAL; } + if (!ext_table_size) return 0; - /* check extended signature checksum */ + + /* + * Check extended signature checksum: 0 => valid. + */ for (i = 0; i < ext_sigcount; i++) { ext_sig = (void *)ext_header + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + + sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); if (sum) { if (print_err) - pr_err("aborting, bad checksum\n"); + pr_err("Bad extended signature checksum, aborting.\n"); return -EINVAL; } } diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 3f20710..6988c74 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h +# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h # IN=$1 @@ -49,8 +49,8 @@ dump_array() trap 'rm "$OUT"' EXIT ( - echo "#ifndef _ASM_X86_CPUFEATURE_H" - echo "#include <asm/cpufeature.h>" + echo "#ifndef _ASM_X86_CPUFEATURES_H" + echo "#include <asm/cpufeatures.h>" echo "#endif" echo "" diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 20e242e..4e7c693 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("HyperV: features 0x%x, hints 0x%x\n", + ms_hyperv.features, ms_hyperv.hints); #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { @@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; - printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + pr_info("HyperV: LAPIC Timer Frequency: %#x\n", + lapic_timer_frequency); } #endif diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index 316fe3e..3d68993 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t */ if (type != MTRR_TYPE_WRCOMB && (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - pr_warning("mtrr: only write-combining%s supported\n", + pr_warn("mtrr: only write-combining%s supported\n", centaur_mcr_type ? " and uncacheable are" : " is"); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0d98503..31e951c 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -57,9 +57,9 @@ static int __initdata nr_range; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) +#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0) -#define BIOS_BUG_MSG KERN_WARNING \ +#define BIOS_BUG_MSG \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" static int __init @@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, base, base + size); } if (debug_print) { - printk(KERN_DEBUG "After WB checking\n"); + pr_debug("After WB checking\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ - printk(BIOS_BUG_MSG, i); + pr_warn(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) continue; size -= (1<<(20-PAGE_SHIFT)) - base; @@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, extra_remove_base + extra_remove_size); if (debug_print) { - printk(KERN_DEBUG "After UC checking\n"); + pr_debug("After UC checking\n"); for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } } @@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, /* sort the ranges */ nr_range = clean_sort_range(range, RANGE_NUM); if (debug_print) { - printk(KERN_DEBUG "After sorting\n"); + pr_debug("After sorting\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void) start_base = to_size_factor(start_base, &start_factor), type = range_state[i].type; - printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : @@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; /* Print original var MTRRs at first, for debugging: */ - printk(KERN_DEBUG "original variable MTRRs\n"); + pr_debug("original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); @@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM covered: %ldM\n", + pr_info("total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { @@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits) if (!result[i].bad) { set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } - printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " - "will find optimal one\n"); + pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n"); } i = 0; @@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size, i); if (debug_print) { mtrr_print_out_one_result(i); - printk(KERN_INFO "\n"); + pr_info("\n"); } i++; @@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits) index_good = mtrr_search_optimal_index(); if (index_good != -1) { - printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + pr_info("Found optimal setting for mtrr clean up\n"); i = index_good; mtrr_print_out_one_result(i); @@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits) gran_size <<= 10; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } else { @@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_print_out_one_result(i); } - printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); - printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + pr_info("mtrr_cleanup: can not find optimal value\n"); + pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n"); return 0; } @@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ if (!highest_pfn) { - printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); + pr_info("CPU MTRRs all blank - virtualized system.\n"); return 0; } @@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) end_pfn); if (total_trim_size) { - pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); + pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); if (!changed_by_mtrr_cleanup) WARN_ON(1); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index c870af1..fcbcb2f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) rdmsr(MSR_K8_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { - printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" + pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; @@ -501,14 +501,14 @@ void __init mtrr_state_warn(void) if (!mask) return; if (mask & MTRR_CHANGE_MASK_FIXED) - pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_VARIABLE) - pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_DEFTYPE) - pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); + pr_info("mtrr: probably your BIOS does not setup all CPUs.\n"); + pr_info("mtrr: corrected configuration.\n"); } /* @@ -519,8 +519,7 @@ void __init mtrr_state_warn(void) void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) { if (wrmsr_safe(msr, a, b) < 0) { - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", smp_processor_id(), msr, a, b); } } @@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1ULL<<(hi - 1)) - 1); if (tmp != mask) { - printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); + pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); mask = tmp; } @@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size, boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; } if (!(base + size < 0x70000 || base > 0x7003F) && (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK)) { - pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); return -EINVAL; } } @@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, lbase = lbase >> 1, last = last >> 1) ; if (lbase != last) { - pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); + pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); return -EINVAL; } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5c3d149..10f8d47 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -47,7 +47,7 @@ #include <linux/smp.h> #include <linux/syscore_ops.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; if (type >= MTRR_NUM_TYPES) { - pr_warning("mtrr: type: %u invalid\n", type); + pr_warn("mtrr: type: %u invalid\n", type); return -EINVAL; } /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - pr_warning("mtrr: your processor doesn't support write-combining\n"); + pr_warn("mtrr: your processor doesn't support write-combining\n"); return -ENOSYS; } if (!size) { - pr_warning("mtrr: zero sized request\n"); + pr_warn("mtrr: zero sized request\n"); return -EINVAL; } if ((base | (base + size - 1)) >> (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { - pr_warning("mtrr: base or size exceeds the MTRR width\n"); + pr_warn("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, } else if (types_compatible(type, ltype)) continue; } - pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" + pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing" " 0x%lx000,0x%lx000\n", base, size, lbase, lsize); goto out; @@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (ltype != type) { if (types_compatible(type, ltype)) continue; - pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, mtrr_attrib_to_str(ltype), mtrr_attrib_to_str(type)); goto out; @@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); + pr_warn("mtrr: size and base must be multiples of 4 kiB\n"); pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; @@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg >= max) { - pr_warning("mtrr: register: %d too big\n", reg); + pr_warn("mtrr: register: %d too big\n", reg); goto out; } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { - pr_warning("mtrr: MTRR %d not used\n", reg); + pr_warn("mtrr: MTRR %d not used\n", reg); goto out; } if (mtrr_usage_table[reg] < 1) { - pr_warning("mtrr: reg: %d has count=0\n", reg); + pr_warn("mtrr: reg: %d has count=0\n", reg); goto out; } if (--mtrr_usage_table[reg] < 1) diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 819d949..f6f50c4 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) for (i = 0; i < SANITY_CHECK_LOOPS; i++) { if (!rdrand_long(&tmp)) { clear_cpu_cap(c, X86_FEATURE_RDRAND); - printk_once(KERN_WARNING "rdrand: disabled\n"); + pr_warn_once("rdrand: disabled\n"); return; } } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 4c60eaf..cd53135 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->x86_max_cores = (core_level_siblings / smp_num_siblings); if (!printed) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + pr_info("CPU: Physical Processor ID: %d\n", c->phys_proc_id); if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", + pr_info("CPU: Processor Core ID: %d\n", c->cpu_core_id); printed = 1; } diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 252da7a..3417856 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,6 +1,6 @@ #include <linux/kernel.h> #include <linux/mm.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/msr.h> #include "cpu.h" @@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860001) { cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n", (cpu_rev >> 24) & 0xff, (cpu_rev >> 16) & 0xff, (cpu_rev >> 8) & 0xff, @@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860002) { cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + pr_info("CPU: Processor revision %08X, %u MHz\n", new_cpu_rev, cpu_freq); } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", (cms_rev1 >> 24) & 0xff, (cms_rev1 >> 16) & 0xff, (cms_rev1 >> 8) & 0xff, @@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) (void *)&cpu_info[56], (void *)&cpu_info[60]); cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); + pr_info("CPU: %s\n", cpu_info); } /* Unhide possibly hidden capability flags */ diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 628a059..364e583 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void) tsc_hz = eax | (((uint64_t)ebx) << 32); do_div(tsc_hz, 1000); BUG_ON(tsc_hz >> 32); - printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", + pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", (unsigned long) tsc_hz / 1000, (unsigned long) tsc_hz % 1000); @@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void) if (ebx != UINT_MAX) x86_platform.calibrate_tsc = vmware_get_tsc_khz; else - printk(KERN_WARNING - "Failed to get TSC freq from the hypervisor\n"); + pr_warn("Failed to get TSC freq from the hypervisor\n"); } /* diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 58f3431..9ef978d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -57,10 +57,9 @@ struct crash_elf_data { struct kimage *image; /* * Total number of ram ranges we have after various adjustments for - * GART, crash reserved region etc. + * crash reserved region, etc. */ unsigned int max_nr_ranges; - unsigned long gart_start, gart_end; /* Pointer to elf header */ void *ehdr; @@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) return 0; } -static int get_gart_ranges_callback(u64 start, u64 end, void *arg) -{ - struct crash_elf_data *ced = arg; - - ced->gart_start = start; - ced->gart_end = end; - - /* Not expecting more than 1 gart aperture */ - return 1; -} - /* Gather all the required information to prepare elf headers for ram regions */ static void fill_up_crash_elf_data(struct crash_elf_data *ced, @@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->max_nr_ranges = nr_ranges; - /* - * We don't create ELF headers for GART aperture as an attempt - * to dump this memory in second kernel leads to hang/crash. - * If gart aperture is present, one needs to exclude that region - * and that could lead to need of extra phdr. - */ - walk_iomem_res("GART", IORESOURCE_MEM, 0, -1, - ced, get_gart_ranges_callback); - - /* - * If we have gart region, excluding that could potentially split - * a memory range, resulting in extra header. Account for that. - */ - if (ced->gart_end) - ced->max_nr_ranges++; - /* Exclusion of crash region could split memory ranges */ ced->max_nr_ranges++; @@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, return ret; } - /* Exclude GART region */ - if (ced->gart_end) { - ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end); - if (ret) - return ret; - } - return ret; } @@ -599,12 +564,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) /* Add ACPI tables */ cmd.type = E820_ACPI; flags = IORESOURCE_MEM | IORESOURCE_BUSY; - walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, memmap_entry_callback); /* Add ACPI Non-volatile Storage */ cmd.type = E820_NVS; - walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); /* Add crashk_low_res region */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0d1ff4b..8efa57a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -267,9 +267,8 @@ int __die(const char *str, struct pt_regs *regs, long err) #ifdef CONFIG_SMP printk("SMP "); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC "); #ifdef CONFIG_KASAN printk("KASAN"); #endif diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 569c1e4..621b501 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -24,6 +24,7 @@ #include <asm/e820.h> #include <asm/proto.h> #include <asm/setup.h> +#include <asm/cpufeature.h> /* * The e820 map is the map that gets modified e.g. with command line parameters @@ -925,6 +926,41 @@ static const char *e820_type_to_string(int e820_type) } } +static unsigned long e820_type_to_iomem_type(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: + return IORESOURCE_SYSTEM_RAM; + case E820_ACPI: + case E820_NVS: + case E820_UNUSABLE: + case E820_PRAM: + case E820_PMEM: + default: + return IORESOURCE_MEM; + } +} + +static unsigned long e820_type_to_iores_desc(int e820_type) +{ + switch (e820_type) { + case E820_ACPI: + return IORES_DESC_ACPI_TABLES; + case E820_NVS: + return IORES_DESC_ACPI_NV_STORAGE; + case E820_PMEM: + return IORES_DESC_PERSISTENT_MEMORY; + case E820_PRAM: + return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_RESERVED_KERN: + case E820_RAM: + case E820_UNUSABLE: + default: + return IORES_DESC_NONE; + } +} + static bool do_mark_busy(u32 type, struct resource *res) { /* this is the legacy bios/dos rom-shadow + mmio region */ @@ -967,7 +1003,8 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM; + res->flags = e820_type_to_iomem_type(e820.map[i].type); + res->desc = e820_type_to_iores_desc(e820.map[i].type); /* * don't register the region that could be conflicted with diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d25097c..0b1b9ab 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (fpu->fpregs_active) { + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - if (!copy_fpregs_to_fpstate(fpu)) - fpregs_deactivate(fpu); + if (!copy_fpregs_to_fpstate(fpu)) { + if (use_eager_fpu()) + copy_kernel_to_fpregs(&fpu->state); + else + fpregs_deactivate(fpu); + } } preempt_enable(); } @@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -/* - * Copy the current task's FPU state to a new task's FPU context. - * - * In both the 'eager' and the 'lazy' case we save hardware registers - * directly to the destination buffer. - */ -static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { + dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + + if (!src_fpu->fpstate_active || !cpu_has_fpu) + return 0; + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); /* @@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * - * If the FPU context got destroyed in the process (FNSAVE - * done on old CPUs) then copy it back into the source - * context and mark the current task for lazy restore. + * In lazy mode, if the FPU context isn't loaded into + * fpregs, CR0.TS will be set and do_device_not_available + * will load the FPU context. * * We have to do all this with preemption disabled, * mostly because of the FNSAVE case, because in that @@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); - fpregs_deactivate(src_fpu); + + if (use_eager_fpu()) + copy_kernel_to_fpregs(&src_fpu->state); + else + fpregs_deactivate(src_fpu); } preempt_enable(); -} - -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) -{ - dst_fpu->counter = 0; - dst_fpu->fpregs_active = 0; - dst_fpu->last_cpu = -1; - - if (src_fpu->fpstate_active && cpu_has_fpu) - fpu_copy(dst_fpu, src_fpu); return 0; } @@ -409,8 +411,10 @@ static inline void copy_init_fpstate_to_fpregs(void) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); - else + else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); + else + copy_kernel_to_fregs(&init_fpstate.fsave); } /* @@ -423,7 +427,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu()) { + if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6d9f0a7..54c86ff 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + } #ifndef CONFIG_MATH_EMULATION if (!cpu_has_fpu) { @@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fpstate_init_fxstate(&init_fpstate.fxsave); + fpstate_init(&init_fpstate); fpu__init_system_mxcsr(); } @@ -260,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * not only saved the restores along the way, but we also have the * FPU ready to be used for the original task. * - * 'eager' switching is used on modern CPUs, there we switch the FPU + * 'lazy' is deprecated because it's almost never a performance win + * and it's much more complicated than 'eager'. + * + * 'eager' switching is by default on all CPUs, there we switch the FPU * state during every context switch, regardless of whether the task * has used FPU instructions in that time slice or not. This is done * because modern FPU context saving instructions are able to optimize @@ -271,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * to use 'eager' restores, if we detect that a task is using the FPU * frequently. See the fpu->counter logic in fpu/internal.h for that. ] */ -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static enum { ENABLE, DISABLE } eagerfpu = ENABLE; /* * Find supported xfeatures based on cpu features and command-line input. @@ -300,12 +305,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void) static void __init fpu__clear_eager_fpu_features(void) { setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } /* @@ -348,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - /* - * No need to check "eagerfpu=auto" again, since it is the - * initial default. - */ if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { eagerfpu = DISABLE; fpu__clear_eager_fpu_features(); - } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { - eagerfpu = ENABLE; } if (cmdline_find_option_bool(boot_command_line, "no387")) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d425cda5..6e8354f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); + setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); + setup_clear_cpu_cap(X86_FEATURE_AVX512BW); + setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 29408d6..702547c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end) static unsigned long text_ip_addr(unsigned long ip) { /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. @@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { } #endif /* Defined as markers to the end of the ftrace default trampolines */ -extern void ftrace_caller_end(void); extern void ftrace_regs_caller_end(void); -extern void ftrace_return(void); +extern void ftrace_epilogue(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); @@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_offset = (unsigned long)ftrace_regs_caller_op_ptr; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_caller_end; + end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; } @@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* * Allocate enough size to store the ftrace_caller code, - * the jmp to ftrace_return, as well as the address of + * the jmp to ftrace_epilogue, as well as the address of * the ftrace_ops this trampoline is used for. */ trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); @@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = (unsigned long)trampoline + size; - /* The trampoline ends with a jmp to ftrace_return */ - jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); + /* The trampoline ends with a jmp to ftrace_epilogue */ + jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue); memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2c0f340..1f4422d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); /* Wipe all early page tables except for the kernel symbol map */ static void __init reset_early_page_tables(void) { - unsigned long i; - - for (i = 0; i < PTRS_PER_PGD-1; i++) - early_level4_pgt[i].pgd = 0; - + memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); next_early_pgt = 0; - write_cr3(__pa_nodebug(early_level4_pgt)); } @@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void) int __init early_make_pgtable(unsigned long address) { unsigned long physaddr = address - __PAGE_OFFSET; - unsigned long i; pgdval_t pgd, *pgd_p; pudval_t pud, *pud_p; pmdval_t pmd, *pmd_p; @@ -81,8 +75,7 @@ again: } pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; - for (i = 0; i < PTRS_PER_PUD; i++) - pud_p[i] = 0; + memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pud_p += pud_index(address); @@ -97,8 +90,7 @@ again: } pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; - for (i = 0; i < PTRS_PER_PMD; i++) - pmd_p[i] = 0; + memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pmd = (physaddr & PMD_MASK) + early_pmd_flags; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae2..54cdbd2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,7 +19,7 @@ #include <asm/setup.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/percpu.h> #include <asm/nops.h> #include <asm/bootparam.h> @@ -389,6 +389,12 @@ default_entry: /* Make changes effective */ wrmsr + /* + * And make sure that all the mappings we set up have NX set from + * the beginning. + */ + orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) + enable_paging: /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ffdc0e8..22fbf9d 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,7 +38,6 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) -L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) @@ -76,9 +75,7 @@ startup_64: subq $_text - __START_KERNEL_map, %rbp /* Is the address not 2M aligned? */ - movq %rbp, %rax - andl $~PMD_PAGE_MASK, %eax - testl %eax, %eax + testl $~PMD_PAGE_MASK, %ebp jnz bad_address /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b8e6ff5..be0ebbb 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/io.h> +#include <asm/cpufeature.h> #include <asm/irqdomain.h> #include <asm/fixmap.h> #include <asm/hpet.h> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 44256a6..ed15cd48 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; -#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; -#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, @@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return err; err = probe_kernel_write((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA if (!err) return err; /* @@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) return -EINVAL; bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ + return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { -#ifdef CONFIG_DEBUG_RODATA int err; char opc[BREAK_INSTR_SIZE]; @@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; + knl_write: -#endif /* CONFIG_DEBUG_RODATA */ return probe_kernel_write((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1deffe6..0f05dee 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - if (fixup_exception(regs)) + if (fixup_exception(regs, trapnr)) return 1; /* diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 87e1762..ed48a9f 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -168,12 +168,14 @@ GLOBAL(ftrace_call) restore_mcount_regs /* - * The copied trampoline must call ftrace_return as it + * The copied trampoline must call ftrace_epilogue as it * still may need to call the function graph tracer. + * + * The code up to this label is copied into trampolines so + * think twice before adding any new code or changing the + * layout here. */ -GLOBAL(ftrace_caller_end) - -GLOBAL(ftrace_return) +GLOBAL(ftrace_epilogue) #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call) popfq /* - * As this jmp to ftrace_return can be a short jump + * As this jmp to ftrace_epilogue can be a short jump * it must not be copied into the trampoline. * The trampoline will add the code to jump * to the return. */ GLOBAL(ftrace_regs_caller_end) - jmp ftrace_return + jmp ftrace_epilogue END(ftrace_regs_caller) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 30ca760..97340f2 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.featureflag = boot_cpu_data.x86_capability[0]; + processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; for (i = 0; i < 2; i++) { diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 64f9616..7f3550a 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -40,7 +40,7 @@ #include <linux/uaccess.h> #include <linux/gfp.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/msr.h> static struct class *msr_class; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8a2cdd7..04b132a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -30,6 +30,7 @@ #include <asm/nmi.h> #include <asm/x86_init.h> #include <asm/reboot.h> +#include <asm/cache.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -69,7 +70,7 @@ struct nmi_stats { static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); -static int ignore_nmis; +static int ignore_nmis __read_mostly; int unknown_nmi_panic; /* diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 14415af..92f7014 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data) static __init int register_e820_pmem(void) { - char *pmem = "Persistent Memory (legacy)"; struct platform_device *pdev; int rc; - rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found); + rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY, + IORESOURCE_MEM, 0, -1, NULL, found); if (rc <= 0) return 0; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9f7c21c..2915d54 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif +#ifdef CONFIG_X86_32 + .SYSENTER_stack_canary = STACK_END_MAGIC, +#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss); @@ -418,9 +421,9 @@ static void mwait_idle(void) if (!current_set_polling_and_test()) { trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { - smp_mb(); /* quirk */ + mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); - smp_mb(); /* quirk */ + mb(); /* quirk */ } __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3d80e6..aa52c10 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -152,21 +152,21 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index cb6282c..548ddf7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -61,7 +61,38 @@ regs->seg = GET_SEG(seg) | 3; \ } while (0) -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +#ifdef CONFIG_X86_64 +/* + * If regs->ss will cause an IRET fault, change it. Otherwise leave it + * alone. Using this generally makes no sense unless + * user_64bit_mode(regs) would return true. + */ +static void force_valid_ss(struct pt_regs *regs) +{ + u32 ar; + asm volatile ("lar %[old_ss], %[ar]\n\t" + "jz 1f\n\t" /* If invalid: */ + "xorl %[ar], %[ar]\n\t" /* set ar = 0 */ + "1:" + : [ar] "=r" (ar) + : [old_ss] "rm" ((u16)regs->ss)); + + /* + * For a valid 64-bit user context, we need DPL 3, type + * read-write data or read-write exp-down data, and S and P + * set. We can't use VERW because VERW doesn't check the + * P bit. + */ + ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK; + if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) && + ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN)) + regs->ss = __USER_DS; +} +#endif + +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc, + unsigned long uc_flags) { unsigned long buf_val; void __user *buf; @@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && + user_64bit_mode(regs))) + force_valid_ss(regs); +#endif get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->cs, &sc->cs); put_user_ex(0, &sc->gs); put_user_ex(0, &sc->fs); + put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return 0; } #else /* !CONFIG_X86_32 */ +static unsigned long frame_uc_flags(struct pt_regs *regs) +{ + unsigned long flags; + + if (cpu_has_xsave) + flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; + else + flags = UC_SIGCONTEXT_SS; + + if (likely(user_64bit_mode(regs))) + flags |= UC_STRICT_RESTORE_SS; + + return flags; +} + static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { @@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* Set up the CS register to run signal handlers in 64-bit mode, - even if the handler happens to be interrupting 32-bit code. */ + /* + * Set up the CS and SS registers to run signal handlers in + * 64-bit mode, even if the handler happens to be interrupting + * 32-bit or 16-bit code. + * + * SS is subtle. In 64-bit mode, we don't need any particular + * SS descriptor, but we do need SS to be valid. It's possible + * that the old SS is entirely bogus -- this can happen if the + * signal we're trying to deliver is #GP or #SS caused by a bad + * SS value. We also have a compatbility issue here: DOSEMU + * relies on the contents of the SS register indicating the + * SS value at the time of the signal, even though that code in + * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU + * avoids relying on sigreturn to restore SS; instead it uses + * a trampoline.) So we do our best: if the old SS was valid, + * we keep it. Otherwise we replace it. + */ regs->cs = __USER_CS; + if (unlikely(regs->ss != __USER_DS)) + force_valid_ss(regs); + return 0; } #endif /* CONFIG_X86_32 */ @@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); @@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc)) + /* + * x86_32 has no uc_flags bits relevant to restore_sigcontext. + * Save a few cycles by skipping the __get_user. + */ + if (restore_sigcontext(regs, &frame->sc, 0)) goto badframe; return regs->ax; @@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) @@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { -#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) +#ifdef CONFIG_X86_64 + if (is_ia32_task()) + return __NR_ia32_restart_syscall; +#endif +#ifdef CONFIG_X86_X32_ABI + return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); +#else return __NR_restart_syscall; -#else /* !CONFIG_X86_32 && CONFIG_X86_64 */ - return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : - __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); -#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ +#endif } /* @@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 24d57f7..643dbdc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Logical package management. We might want to allocate that dynamically */ +static int *physical_to_logical_pkg __read_mostly; +static unsigned long *physical_package_map __read_mostly;; +static unsigned long *logical_package_map __read_mostly; +static unsigned int max_physical_pkg_id __read_mostly; +unsigned int __max_logical_packages __read_mostly; +EXPORT_SYMBOL(__max_logical_packages); + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -248,7 +256,98 @@ static void notrace start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + +int topology_update_package_map(unsigned int apicid, unsigned int cpu) +{ + unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + /* Called from early boot ? */ + if (!physical_package_map) + return 0; + + if (pkg >= max_physical_pkg_id) + return -EINVAL; + + /* Set the logical package id */ + if (test_and_set_bit(pkg, physical_package_map)) + goto found; + + if (pkg < __max_logical_packages) { + set_bit(pkg, logical_package_map); + physical_to_logical_pkg[pkg] = pkg; + goto found; + } + new = find_first_zero_bit(logical_package_map, __max_logical_packages); + if (new >= __max_logical_packages) { + physical_to_logical_pkg[pkg] = -1; + pr_warn("APIC(%x) Package %u exceeds logical package map\n", + apicid, pkg); + return -ENOSPC; + } + set_bit(new, logical_package_map); + pr_info("APIC(%x) Converting physical %u to logical package %u\n", + apicid, pkg, new); + physical_to_logical_pkg[pkg] = new; + +found: + cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + return 0; +} + +/** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + if (phys_pkg >= max_physical_pkg_id) + return -1; + return physical_to_logical_pkg[phys_pkg]; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + +static void __init smp_init_package_map(void) +{ + unsigned int ncpus, cpu; + size_t size; + + /* + * Today neither Intel nor AMD support heterogenous systems. That + * might change in the future.... + */ + ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings; + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + + /* + * Possibly larger than what we need as the number of apic ids per + * package can be smaller than the actual used apic ids. + */ + max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); + size = max_physical_pkg_id * sizeof(unsigned int); + physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); + memset(physical_to_logical_pkg, 0xff, size); + size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); + physical_package_map = kzalloc(size, GFP_KERNEL); + size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); + logical_package_map = kzalloc(size, GFP_KERNEL); + + pr_info("Max logical packages: %u\n", __max_logical_packages); + + for_each_present_cpu(cpu) { + unsigned int apicid = apic->cpu_present_to_apicid(cpu); + + if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) + continue; + if (!topology_update_package_map(apicid, cpu)) + continue; + pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); + per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; + set_cpu_possible(cpu, false); + set_cpu_present(cpu, false); + } } void __init smp_store_boot_cpu_info(void) @@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; + smp_init_package_map(); } /* diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 91a4496..e72a07f 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, pmd = pmd_alloc(&tboot_mm, pud, vaddr); if (!pmd) return -1; - pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); + pte = pte_alloc_map(&tboot_mm, pmd, vaddr); if (!pte) return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 3f92ce0..27538f1 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -142,7 +142,6 @@ static int test_NX(void) * by the error message */ -#ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ if (rodata_test_data != 0xC3) { printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); @@ -151,7 +150,6 @@ static int test_NX(void) printk(KERN_ERR "test_nx: .rodata section is executable\n"); ret = -ENODEV; } -#endif #if 0 /* Test 4: Check if the .data section of a module is executable */ diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 5ecbfe5..cb4a01b 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -76,5 +76,5 @@ int rodata_test(void) } MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); +MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ade185a..06cbe25 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); -static inline void conditional_sti(struct pt_regs *regs) +static inline void cond_local_irq_enable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - preempt_count_inc(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void cond_local_irq_disable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - preempt_count_dec(); } void ist_enter(struct pt_regs *regs) @@ -199,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, } if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, trapnr)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; -#ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", @@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, print_vma_addr(" in ", regs->ip); pr_cont("\n"); } -#endif force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } @@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { - conditional_sti(regs); + cond_local_irq_enable(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } @@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) die("bounds", regs, error_code); @@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code) struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - conditional_sti(regs); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { local_irq_enable(); @@ -453,7 +437,7 @@ do_general_protection(struct pt_regs *regs, long error_code) tsk = current; if (!user_mode(regs)) { - if (fixup_exception(regs)) + if (fixup_exception(regs, X86_TRAP_GP)) return; tsk->thread.error_code = error_code; @@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: ist_exit(regs); @@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) NOKPROBE_SYMBOL(fixup_bad_iret); #endif +static bool is_sysenter_singlestep(struct pt_regs *regs) +{ + /* + * We don't try for precision here. If we're anywhere in the region of + * code that can be single-stepped in the SYSENTER entry path, then + * assume that this is a useless single-step trap due to SYSENTER + * being invoked with TF set. (We don't know in advance exactly + * which instructions will be hit because BTF could plausibly + * be set.) + */ +#ifdef CONFIG_X86_32 + return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) < + (unsigned long)__end_SYSENTER_singlestep_region - + (unsigned long)__begin_SYSENTER_singlestep_region; +#elif defined(CONFIG_IA32_EMULATION) + return (regs->ip - (unsigned long)entry_SYSENTER_compat) < + (unsigned long)__end_entry_SYSENTER_compat - + (unsigned long)entry_SYSENTER_compat; +#else + return false; +#endif +} + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) ist_enter(regs); get_debugreg(dr6, 6); + /* + * The Intel SDM says: + * + * Certain debug exceptions may clear bits 0-3. The remaining + * contents of the DR6 register are never cleared by the + * processor. To avoid confusion in identifying debug + * exceptions, debug handlers should clear the register before + * returning to the interrupted task. + * + * Keep it simple: clear DR6 immediately. + */ + set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); + + if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) && + is_sysenter_singlestep(regs))) { + dr6 &= ~DR_STEP; + if (!dr6) + goto exit; + /* + * else we might have gotten a single-step trap and hit a + * watchpoint at the same time, in which case we should fall + * through and handle the watchpoint. + */ + } + + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. @@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (!dr6 && user_mode(regs)) user_icebp = 1; - /* Catch kmemcheck conditions first of all! */ + /* Catch kmemcheck conditions! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) goto exit; - /* DR6 may or may not be cleared by the CPU */ - set_debugreg(0, 6); - - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); goto exit; } - /* - * Single-stepping through system calls: ignore any exceptions in - * kernel space, but re-enable TF when returning to user mode. - * - * We already checked v86 mode above, so we can check for kernel mode - * by just checking the CPL of CS. - */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { + if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { + /* + * Historical junk that used to handle SYSENTER single-stepping. + * This should be unreachable now. If we survive for a while + * without anyone hitting this warning, we'll turn this into + * an oops. + */ tsk->thread.debugreg6 &= ~DR_STEP; set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; @@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: +#if defined(CONFIG_X86_32) + /* + * This is the most likely code path that involves non-trivial use + * of the SYSENTER stack. Check that we haven't overrun it. + */ + WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, + "Overran or corrupted SYSENTER stack\n"); +#endif ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -696,10 +738,10 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, trapnr)) { task->thread.error_code = error_code; task->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { - conditional_sti(regs); + cond_local_irq_enable(regs); } dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION - if (read_cr0() & X86_CR0_EM) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { struct math_emu_info info = { }; - conditional_sti(regs); + cond_local_irq_enable(regs); info.regs = regs; math_emulate(&info); @@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif fpu__restore(¤t->thread.fpu); /* interrupts still off */ #ifdef CONFIG_X86_32 - conditional_sti(regs); + cond_local_irq_enable(regs); #endif } NOKPROBE_SYMBOL(do_device_not_available); @@ -868,7 +909,7 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); + set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3d743da..5638044 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -43,6 +43,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc); int tsc_clocksource_reliable; +static u32 art_to_tsc_numerator; +static u32 art_to_tsc_denominator; +static u64 art_to_tsc_offset; +struct clocksource *art_related_clocksource; + /* * Use a ring-buffer like data structure, where a writer advances the head by * writing a new data entry and a reader advances the tail when it observes a @@ -964,6 +969,37 @@ core_initcall(cpufreq_tsc); #endif /* CONFIG_CPU_FREQ */ +#define ART_CPUID_LEAF (0x15) +#define ART_MIN_DENOMINATOR (1) + + +/* + * If ART is present detect the numerator:denominator to convert to TSC + */ +static void detect_art(void) +{ + unsigned int unused[2]; + + if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) + return; + + cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, + &art_to_tsc_numerator, unused, unused+1); + + /* Don't enable ART in a VM, non-stop TSC required */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || + !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + art_to_tsc_denominator < ART_MIN_DENOMINATOR) + return; + + if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) + return; + + /* Make this sticky over multiple CPU init calls */ + setup_force_cpu_cap(X86_FEATURE_ART); +} + + /* clocksource code */ static struct clocksource clocksource_tsc; @@ -1071,6 +1107,25 @@ int unsynchronized_tsc(void) return 0; } +/* + * Convert ART to TSC given numerator/denominator found in detect_art() + */ +struct system_counterval_t convert_art_to_tsc(cycle_t art) +{ + u64 tmp, res, rem; + + rem = do_div(art, art_to_tsc_denominator); + + res = art * art_to_tsc_numerator; + tmp = rem * art_to_tsc_numerator; + + do_div(tmp, art_to_tsc_denominator); + res += tmp + art_to_tsc_offset; + + return (struct system_counterval_t) {.cs = art_related_clocksource, + .cycles = res}; +} +EXPORT_SYMBOL(convert_art_to_tsc); static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); @@ -1142,6 +1197,8 @@ static void tsc_refine_calibration_work(struct work_struct *work) (unsigned long)tsc_khz % 1000); out: + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); } @@ -1235,6 +1292,8 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); check_system_tsc_reliable(); + + detect_art(); } #ifdef CONFIG_SMP @@ -1246,14 +1305,14 @@ void __init tsc_init(void) */ unsigned long calibrate_delay_is_known(void) { - int i, cpu = smp_processor_id(); + int sibling, cpu = smp_processor_id(); if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - for_each_online_cpu(i) - if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) - return cpu_data(i).loops_per_jiffy; + sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu); + if (sibling < nr_cpu_ids) + return cpu_data(sibling).loops_per_jiffy; return 0; } #endif diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 07efb35..014ea59 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -30,7 +30,7 @@ * appropriately. Either display a message or halt. */ -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/msr-index.h> verify_cpu: diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e574b85..3dce1ca 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (static_cpu_has_safe(X86_FEATURE_SEP)) + if (static_cpu_has(X86_FEATURE_SEP)) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf1..5af9958 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,29 +41,28 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. + * On 64-bit, align RODATA to 2MB so we retain large page mappings for + * boundaries spanning kernel text, rodata and data sections. * * However, kernel identity mappings will have different RWX permissions * to the pages mapping to text and to the pages padding (which are freed) the * text section. Hence kernel identity mappings will be broken to smaller * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. + * so we can enable protection checks as well as retain 2MB large page + * mappings for kernel text. */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); +#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); -#define X64_ALIGN_DEBUG_RODATA_END \ +#define X64_ALIGN_RODATA_END \ . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; #else -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END +#define X64_ALIGN_RODATA_BEGIN +#define X64_ALIGN_RODATA_END #endif @@ -112,13 +111,11 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN + X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END + X64_ALIGN_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { @@ -195,6 +192,17 @@ SECTIONS :init #endif + /* + * Section for code used exclusively before alternatives are run. All + * references to such code must be patched out by alternatives, normally + * by using X86_FEATURE_ALWAYS CPU feature bit. + * + * See static_cpu_has() for an example. + */ + .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { + *(.altinstr_aux) + } + INIT_DATA_SECTION(16) .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a0695be..cd05942 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); +EXPORT_SYMBOL_GPL(memcpy_mcsafe); + EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a1ff508..464fa47 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -13,9 +13,10 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ - hyperv.o + hyperv.o page_track.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o + kvm-intel-y += vmx.o pmu_intel.o kvm-amd-y += svm.o pmu_amd.o diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 9dc091a..308b859 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -51,11 +51,9 @@ struct kvm_assigned_dev_kernel { static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { - struct list_head *ptr; struct kvm_assigned_dev_kernel *match; - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + list_for_each_entry(match, head, list) { if (match->assigned_dev_id == assigned_dev_id) return match; } @@ -373,14 +371,10 @@ static void kvm_free_assigned_device(struct kvm *kvm, void kvm_free_all_assigned_devices(struct kvm *kvm) { - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); + struct kvm_assigned_dev_kernel *assigned_dev, *tmp; + list_for_each_entry_safe(assigned_dev, tmp, + &kvm->arch.assigned_dev_head, list) { kvm_free_assigned_device(kvm, assigned_dev); } } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6525e92..0029644 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) return ret; } +bool kvm_mpx_supported(void) +{ + return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) + && kvm_x86_ops->mpx_supported()); +} +EXPORT_SYMBOL_GPL(kvm_mpx_supported); + u64 kvm_supported_xcr0(void) { u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; @@ -97,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); - if (vcpu->arch.eager_fpu) + if (use_eager_fpu()) kvm_x86_ops->fpu_activate(vcpu); /* @@ -295,7 +301,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; + unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; /* cpuid 1.edx */ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c8eda14..66a6581 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -5,6 +5,7 @@ #include <asm/cpu.h> int kvm_update_cpuid(struct kvm_vcpu *vcpu); +bool kvm_mpx_supported(void); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, @@ -135,14 +136,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); -} - static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c58ba67..5ff3485 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1043,6 +1043,27 @@ bool kvm_hv_hypercall_enabled(struct kvm *kvm) return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; } +static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) +{ + bool longmode; + + longmode = is_64_bit_mode(vcpu); + if (longmode) + kvm_register_write(vcpu, VCPU_REGS_RAX, result); + else { + kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); + kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); + } +} + +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + return 1; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret; @@ -1055,7 +1076,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) */ if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); - return 0; + return 1; } longmode = is_64_bit_mode(vcpu); @@ -1083,22 +1104,33 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); + /* Hypercall continuation is not supported yet */ + if (rep_cnt || rep_idx) { + res = HV_STATUS_INVALID_HYPERCALL_CODE; + goto set_result; + } + switch (code) { - case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: + case HVCALL_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; + case HVCALL_POST_MESSAGE: + case HVCALL_SIGNAL_EVENT: + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; + vcpu->run->hyperv.u.hcall.input = param; + vcpu->run->hyperv.u.hcall.params[0] = ingpa; + vcpu->run->hyperv.u.hcall.params[1] = outgpa; + vcpu->arch.complete_userspace_io = + kvm_hv_hypercall_complete_userspace; + return 0; default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; } +set_result: ret = res | (((u64)rep_done & 0xfff) << 32); - if (longmode) { - kvm_register_write(vcpu, VCPU_REGS_RAX, ret); - } else { - kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); - kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); - } - + kvm_hv_hypercall_set_result(vcpu, ret); return 1; } diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b0ea42b..a4bf5b4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -51,32 +51,9 @@ #define RW_STATE_WORD0 3 #define RW_STATE_WORD1 4 -/* Compute with 96 bit intermediate result: (a*b)/c */ -static u64 muldiv64(u64 a, u32 b, u32 c) +static void pit_set_gate(struct kvm_pit *pit, int channel, u32 val) { - union { - u64 ll; - struct { - u32 low, high; - } l; - } u, res; - u64 rl, rh; - - u.ll = a; - rl = (u64)u.l.low * (u64)b; - rh = (u64)u.l.high * (u64)b; - rh += (rl >> 32); - res.l.high = div64_u64(rh, c); - res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); - return res.ll; -} - -static void pit_set_gate(struct kvm *kvm, int channel, u32 val) -{ - struct kvm_kpit_channel_state *c = - &kvm->arch.vpit->pit_state.channels[channel]; - - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; switch (c->mode) { default: @@ -97,18 +74,16 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val) c->gate = val; } -static int pit_get_gate(struct kvm *kvm, int channel) +static int pit_get_gate(struct kvm_pit *pit, int channel) { - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); - - return kvm->arch.vpit->pit_state.channels[channel].gate; + return pit->pit_state.channels[channel].gate; } -static s64 __kpit_elapsed(struct kvm *kvm) +static s64 __kpit_elapsed(struct kvm_pit *pit) { s64 elapsed; ktime_t remaining; - struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + struct kvm_kpit_state *ps = &pit->pit_state; if (!ps->period) return 0; @@ -128,26 +103,23 @@ static s64 __kpit_elapsed(struct kvm *kvm) return elapsed; } -static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, +static s64 kpit_elapsed(struct kvm_pit *pit, struct kvm_kpit_channel_state *c, int channel) { if (channel == 0) - return __kpit_elapsed(kvm); + return __kpit_elapsed(pit); return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); } -static int pit_get_count(struct kvm *kvm, int channel) +static int pit_get_count(struct kvm_pit *pit, int channel) { - struct kvm_kpit_channel_state *c = - &kvm->arch.vpit->pit_state.channels[channel]; + struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; s64 d, t; int counter; - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); - - t = kpit_elapsed(kvm, c, channel); - d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + t = kpit_elapsed(pit, c, channel); + d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC); switch (c->mode) { case 0: @@ -167,17 +139,14 @@ static int pit_get_count(struct kvm *kvm, int channel) return counter; } -static int pit_get_out(struct kvm *kvm, int channel) +static int pit_get_out(struct kvm_pit *pit, int channel) { - struct kvm_kpit_channel_state *c = - &kvm->arch.vpit->pit_state.channels[channel]; + struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; s64 d, t; int out; - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); - - t = kpit_elapsed(kvm, c, channel); - d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + t = kpit_elapsed(pit, c, channel); + d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC); switch (c->mode) { default: @@ -202,29 +171,23 @@ static int pit_get_out(struct kvm *kvm, int channel) return out; } -static void pit_latch_count(struct kvm *kvm, int channel) +static void pit_latch_count(struct kvm_pit *pit, int channel) { - struct kvm_kpit_channel_state *c = - &kvm->arch.vpit->pit_state.channels[channel]; - - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; if (!c->count_latched) { - c->latched_count = pit_get_count(kvm, channel); + c->latched_count = pit_get_count(pit, channel); c->count_latched = c->rw_mode; } } -static void pit_latch_status(struct kvm *kvm, int channel) +static void pit_latch_status(struct kvm_pit *pit, int channel) { - struct kvm_kpit_channel_state *c = - &kvm->arch.vpit->pit_state.channels[channel]; - - WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; if (!c->status_latched) { /* TODO: Return NULL COUNT (bit 6). */ - c->status = ((pit_get_out(kvm, channel) << 7) | + c->status = ((pit_get_out(pit, channel) << 7) | (c->rw_mode << 4) | (c->mode << 1) | c->bcd); @@ -232,26 +195,24 @@ static void pit_latch_status(struct kvm *kvm, int channel) } } +static inline struct kvm_pit *pit_state_to_pit(struct kvm_kpit_state *ps) +{ + return container_of(ps, struct kvm_pit, pit_state); +} + static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, irq_ack_notifier); - int value; - - spin_lock(&ps->inject_lock); - value = atomic_dec_return(&ps->pending); - if (value < 0) - /* spurious acks can be generated if, for example, the - * PIC is being reset. Handle it gracefully here - */ - atomic_inc(&ps->pending); - else if (value > 0) - /* in this case, we had multiple outstanding pit interrupts - * that we needed to inject. Reinject - */ - queue_kthread_work(&ps->pit->worker, &ps->pit->expired); - ps->irq_ack = 1; - spin_unlock(&ps->inject_lock); + struct kvm_pit *pit = pit_state_to_pit(ps); + + atomic_set(&ps->irq_ack, 1); + /* irq_ack should be set before pending is read. Order accesses with + * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work. + */ + smp_mb(); + if (atomic_dec_if_positive(&ps->pending) > 0) + queue_kthread_work(&pit->worker, &pit->expired); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -282,45 +243,36 @@ static void pit_do_work(struct kthread_work *work) struct kvm_vcpu *vcpu; int i; struct kvm_kpit_state *ps = &pit->pit_state; - int inject = 0; - /* Try to inject pending interrupts when - * last one has been acked. + if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0)) + return; + + kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false); + kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false); + + /* + * Provides NMI watchdog support via Virtual Wire mode. + * The route is: PIT -> LVT0 in NMI mode. + * + * Note: Our Virtual Wire implementation does not follow + * the MP specification. We propagate a PIT interrupt to all + * VCPUs and only when LVT0 is in NMI mode. The interrupt can + * also be simultaneously delivered through PIC and IOAPIC. */ - spin_lock(&ps->inject_lock); - if (ps->irq_ack) { - ps->irq_ack = 0; - inject = 1; - } - spin_unlock(&ps->inject_lock); - if (inject) { - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); - - /* - * Provides NMI watchdog support via Virtual Wire mode. - * The route is: PIT -> PIC -> LVT0 in NMI mode. - * - * Note: Our Virtual Wire implementation is simplified, only - * propagating PIT interrupts to all VCPUs when they have set - * LVT0 to NMI delivery. Other PIC interrupts are just sent to - * VCPU0, and only if its LVT0 is in EXTINT mode. - */ - if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_apic_nmi_wd_deliver(vcpu); - } + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_apic_nmi_wd_deliver(vcpu); } static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) { struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); - struct kvm_pit *pt = ps->kvm->arch.vpit; + struct kvm_pit *pt = pit_state_to_pit(ps); - if (ps->reinject || !atomic_read(&ps->pending)) { + if (atomic_read(&ps->reinject)) atomic_inc(&ps->pending); - queue_kthread_work(&pt->worker, &pt->expired); - } + + queue_kthread_work(&pt->worker, &pt->expired); if (ps->is_periodic) { hrtimer_add_expires_ns(&ps->timer, ps->period); @@ -329,30 +281,54 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) return HRTIMER_NORESTART; } -static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) +static inline void kvm_pit_reset_reinject(struct kvm_pit *pit) { - struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + atomic_set(&pit->pit_state.pending, 0); + atomic_set(&pit->pit_state.irq_ack, 1); +} + +void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject) +{ + struct kvm_kpit_state *ps = &pit->pit_state; + struct kvm *kvm = pit->kvm; + + if (atomic_read(&ps->reinject) == reinject) + return; + + if (reinject) { + /* The initial state is preserved while ps->reinject == 0. */ + kvm_pit_reset_reinject(pit); + kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier); + kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); + } else { + kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier); + kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); + } + + atomic_set(&ps->reinject, reinject); +} + +static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period) +{ + struct kvm_kpit_state *ps = &pit->pit_state; + struct kvm *kvm = pit->kvm; s64 interval; if (!ioapic_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) return; - interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); + interval = mul_u64_u32_div(val, NSEC_PER_SEC, KVM_PIT_FREQ); pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&ps->timer); - flush_kthread_work(&ps->pit->expired); + flush_kthread_work(&pit->expired); ps->period = interval; ps->is_periodic = is_period; - ps->timer.function = pit_timer_fn; - ps->kvm = ps->pit->kvm; - - atomic_set(&ps->pending, 0); - ps->irq_ack = 1; + kvm_pit_reset_reinject(pit); /* * Do not allow the guest to program periodic timers with small @@ -375,11 +351,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) HRTIMER_MODE_ABS); } -static void pit_load_count(struct kvm *kvm, int channel, u32 val) +static void pit_load_count(struct kvm_pit *pit, int channel, u32 val) { - struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - - WARN_ON(!mutex_is_locked(&ps->lock)); + struct kvm_kpit_state *ps = &pit->pit_state; pr_debug("load_count val is %d, channel is %d\n", val, channel); @@ -404,29 +378,33 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) case 1: /* FIXME: enhance mode 4 precision */ case 4: - create_pit_timer(kvm, val, 0); + create_pit_timer(pit, val, 0); break; case 2: case 3: - create_pit_timer(kvm, val, 1); + create_pit_timer(pit, val, 1); break; default: - destroy_pit_timer(kvm->arch.vpit); + destroy_pit_timer(pit); } } -void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start) +void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val, + int hpet_legacy_start) { u8 saved_mode; + + WARN_ON_ONCE(!mutex_is_locked(&pit->pit_state.lock)); + if (hpet_legacy_start) { /* save existing mode for later reenablement */ WARN_ON(channel != 0); - saved_mode = kvm->arch.vpit->pit_state.channels[0].mode; - kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */ - pit_load_count(kvm, channel, val); - kvm->arch.vpit->pit_state.channels[0].mode = saved_mode; + saved_mode = pit->pit_state.channels[0].mode; + pit->pit_state.channels[0].mode = 0xff; /* disable timer */ + pit_load_count(pit, channel, val); + pit->pit_state.channels[0].mode = saved_mode; } else { - pit_load_count(kvm, channel, val); + pit_load_count(pit, channel, val); } } @@ -452,7 +430,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, { struct kvm_pit *pit = dev_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; - struct kvm *kvm = pit->kvm; int channel, access; struct kvm_kpit_channel_state *s; u32 val = *(u32 *) data; @@ -476,9 +453,9 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, s = &pit_state->channels[channel]; if (val & (2 << channel)) { if (!(val & 0x20)) - pit_latch_count(kvm, channel); + pit_latch_count(pit, channel); if (!(val & 0x10)) - pit_latch_status(kvm, channel); + pit_latch_status(pit, channel); } } } else { @@ -486,7 +463,7 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, s = &pit_state->channels[channel]; access = (val >> 4) & KVM_PIT_CHANNEL_MASK; if (access == 0) { - pit_latch_count(kvm, channel); + pit_latch_count(pit, channel); } else { s->rw_mode = access; s->read_state = access; @@ -503,17 +480,17 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, switch (s->write_state) { default: case RW_STATE_LSB: - pit_load_count(kvm, addr, val); + pit_load_count(pit, addr, val); break; case RW_STATE_MSB: - pit_load_count(kvm, addr, val << 8); + pit_load_count(pit, addr, val << 8); break; case RW_STATE_WORD0: s->write_latch = val; s->write_state = RW_STATE_WORD1; break; case RW_STATE_WORD1: - pit_load_count(kvm, addr, s->write_latch | (val << 8)); + pit_load_count(pit, addr, s->write_latch | (val << 8)); s->write_state = RW_STATE_WORD0; break; } @@ -529,7 +506,6 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu, { struct kvm_pit *pit = dev_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; - struct kvm *kvm = pit->kvm; int ret, count; struct kvm_kpit_channel_state *s; if (!pit_in_range(addr)) @@ -566,20 +542,20 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu, switch (s->read_state) { default: case RW_STATE_LSB: - count = pit_get_count(kvm, addr); + count = pit_get_count(pit, addr); ret = count & 0xff; break; case RW_STATE_MSB: - count = pit_get_count(kvm, addr); + count = pit_get_count(pit, addr); ret = (count >> 8) & 0xff; break; case RW_STATE_WORD0: - count = pit_get_count(kvm, addr); + count = pit_get_count(pit, addr); ret = count & 0xff; s->read_state = RW_STATE_WORD1; break; case RW_STATE_WORD1: - count = pit_get_count(kvm, addr); + count = pit_get_count(pit, addr); ret = (count >> 8) & 0xff; s->read_state = RW_STATE_WORD0; break; @@ -600,14 +576,13 @@ static int speaker_ioport_write(struct kvm_vcpu *vcpu, { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; - struct kvm *kvm = pit->kvm; u32 val = *(u32 *) data; if (addr != KVM_SPEAKER_BASE_ADDRESS) return -EOPNOTSUPP; mutex_lock(&pit_state->lock); pit_state->speaker_data_on = (val >> 1) & 1; - pit_set_gate(kvm, 2, val & 1); + pit_set_gate(pit, 2, val & 1); mutex_unlock(&pit_state->lock); return 0; } @@ -618,7 +593,6 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; - struct kvm *kvm = pit->kvm; unsigned int refresh_clock; int ret; if (addr != KVM_SPEAKER_BASE_ADDRESS) @@ -628,8 +602,8 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; mutex_lock(&pit_state->lock); - ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | - (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); + ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) | + (pit_get_out(pit, 2) << 5) | (refresh_clock << 4)); if (len > sizeof(ret)) len = sizeof(ret); memcpy(data, (char *)&ret, len); @@ -637,33 +611,28 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, return 0; } -void kvm_pit_reset(struct kvm_pit *pit) +static void kvm_pit_reset(struct kvm_pit *pit) { int i; struct kvm_kpit_channel_state *c; - mutex_lock(&pit->pit_state.lock); pit->pit_state.flags = 0; for (i = 0; i < 3; i++) { c = &pit->pit_state.channels[i]; c->mode = 0xff; c->gate = (i != 2); - pit_load_count(pit->kvm, i, 0); + pit_load_count(pit, i, 0); } - mutex_unlock(&pit->pit_state.lock); - atomic_set(&pit->pit_state.pending, 0); - pit->pit_state.irq_ack = 1; + kvm_pit_reset_reinject(pit); } static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) { struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); - if (!mask) { - atomic_set(&pit->pit_state.pending, 0); - pit->pit_state.irq_ack = 1; - } + if (!mask) + kvm_pit_reset_reinject(pit); } static const struct kvm_io_device_ops pit_dev_ops = { @@ -690,14 +659,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) return NULL; pit->irq_source_id = kvm_request_irq_source_id(kvm); - if (pit->irq_source_id < 0) { - kfree(pit); - return NULL; - } + if (pit->irq_source_id < 0) + goto fail_request; mutex_init(&pit->pit_state.lock); - mutex_lock(&pit->pit_state.lock); - spin_lock_init(&pit->pit_state.inject_lock); pid = get_pid(task_tgid(current)); pid_nr = pid_vnr(pid); @@ -706,36 +671,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) init_kthread_worker(&pit->worker); pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, "kvm-pit/%d", pid_nr); - if (IS_ERR(pit->worker_task)) { - mutex_unlock(&pit->pit_state.lock); - kvm_free_irq_source_id(kvm, pit->irq_source_id); - kfree(pit); - return NULL; - } + if (IS_ERR(pit->worker_task)) + goto fail_kthread; + init_kthread_work(&pit->expired, pit_do_work); - kvm->arch.vpit = pit; pit->kvm = kvm; pit_state = &pit->pit_state; - pit_state->pit = pit; hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + pit_state->timer.function = pit_timer_fn; + pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); - pit_state->reinject = true; - mutex_unlock(&pit->pit_state.lock); + pit->mask_notifier.func = pit_mask_notifer; kvm_pit_reset(pit); - pit->mask_notifier.func = pit_mask_notifer; - kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); + kvm_pit_set_reinject(pit, true); kvm_iodevice_init(&pit->dev, &pit_dev_ops); ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH, &pit->dev); if (ret < 0) - goto fail; + goto fail_register_pit; if (flags & KVM_PIT_SPEAKER_DUMMY) { kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); @@ -743,42 +702,35 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) KVM_SPEAKER_BASE_ADDRESS, 4, &pit->speaker_dev); if (ret < 0) - goto fail_unregister; + goto fail_register_speaker; } return pit; -fail_unregister: +fail_register_speaker: kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); - -fail: - kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); - kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); - kvm_free_irq_source_id(kvm, pit->irq_source_id); +fail_register_pit: + kvm_pit_set_reinject(pit, false); kthread_stop(pit->worker_task); +fail_kthread: + kvm_free_irq_source_id(kvm, pit->irq_source_id); +fail_request: kfree(pit); return NULL; } void kvm_free_pit(struct kvm *kvm) { - struct hrtimer *timer; - - if (kvm->arch.vpit) { - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &kvm->arch.vpit->speaker_dev); - kvm_unregister_irq_mask_notifier(kvm, 0, - &kvm->arch.vpit->mask_notifier); - kvm_unregister_irq_ack_notifier(kvm, - &kvm->arch.vpit->pit_state.irq_ack_notifier); - mutex_lock(&kvm->arch.vpit->pit_state.lock); - timer = &kvm->arch.vpit->pit_state.timer; - hrtimer_cancel(timer); - flush_kthread_work(&kvm->arch.vpit->expired); - kthread_stop(kvm->arch.vpit->worker_task); - kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); - mutex_unlock(&kvm->arch.vpit->pit_state.lock); - kfree(kvm->arch.vpit); + struct kvm_pit *pit = kvm->arch.vpit; + + if (pit) { + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev); + kvm_pit_set_reinject(pit, false); + hrtimer_cancel(&pit->pit_state.timer); + flush_kthread_work(&pit->expired); + kthread_stop(pit->worker_task); + kvm_free_irq_source_id(kvm, pit->irq_source_id); + kfree(pit); } } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index c84990b..2f5af07 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -22,19 +22,18 @@ struct kvm_kpit_channel_state { }; struct kvm_kpit_state { + /* All members before "struct mutex lock" are protected by the lock. */ struct kvm_kpit_channel_state channels[3]; u32 flags; bool is_periodic; s64 period; /* unit: ns */ struct hrtimer timer; - atomic_t pending; /* accumulated triggered timers */ - bool reinject; - struct kvm *kvm; u32 speaker_data_on; + struct mutex lock; - struct kvm_pit *pit; - spinlock_t inject_lock; - unsigned long irq_ack; + atomic_t reinject; + atomic_t pending; /* accumulated triggered timers */ + atomic_t irq_ack; struct kvm_irq_ack_notifier irq_ack_notifier; }; @@ -57,9 +56,11 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); -void kvm_pit_reset(struct kvm_pit *pit); + +void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val, + int hpet_legacy_start); +void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject); #endif diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 1facfd6..9db47090 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); @@ -117,16 +117,16 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } @@ -156,7 +156,8 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) { - if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { + if (test_and_clear_bit(vcpu->vcpu_id, + ioapic->rtc_status.dest_map.map)) { --ioapic->rtc_status.pending_eoi; rtc_status_pending_eoi_check_valid(ioapic); } @@ -236,10 +237,17 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; union kvm_ioapic_redirect_entry *e; int index; spin_lock(&ioapic->lock); + + /* Make sure we see any missing RTC EOI */ + if (test_bit(vcpu->vcpu_id, dest_map->map)) + __set_bit(dest_map->vectors[vcpu->vcpu_id], + ioapic_handled_vectors); + for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || @@ -346,7 +354,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) */ BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, - ioapic->rtc_status.dest_map); + &ioapic->rtc_status.dest_map); ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); @@ -407,8 +415,14 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work) static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { - int i; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + /* RTC special handling */ + if (test_bit(vcpu->vcpu_id, dest_map->map) && + vector == dest_map->vectors[vcpu->vcpu_id]) + rtc_irq_eoi(ioapic, vcpu); for (i = 0; i < IOAPIC_NUM_PINS; i++) { union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; @@ -416,8 +430,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, if (ent->fields.vector != vector) continue; - if (i == RTC_GSI) - rtc_irq_eoi(ioapic, vcpu); /* * We are dropping lock while calling ack notifiers because ack * notifier callbacks for assigned devices call into IOAPIC diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 2d16dc2..7d2692a 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -40,9 +40,21 @@ struct kvm_vcpu; #define RTC_GSI -1U #endif +struct dest_map { + /* vcpu bitmap where IRQ has been sent */ + DECLARE_BITMAP(map, KVM_MAX_VCPUS); + + /* + * Vector sent to a given vcpu, only valid when + * the vcpu's bit in map is set + */ + u8 vectors[KVM_MAX_VCPUS]; +}; + + struct rtc_status { int pending_eoi; - DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); + struct dest_map dest_map; }; union kvm_ioapic_redirect_entry { @@ -118,7 +130,8 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, unsigned long *dest_map); + struct kvm_lapic_irq *irq, + struct dest_map *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 3982b47..95fcc7b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -33,7 +33,10 @@ */ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return apic_has_pending_timer(vcpu); + if (lapic_in_kernel(vcpu)) + return apic_has_pending_timer(vcpu); + + return 0; } EXPORT_SYMBOL(kvm_cpu_has_pending_timer); @@ -137,8 +140,8 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { - kvm_inject_apic_timer_irqs(vcpu); - /* TODO: PIT, RTC etc. */ + if (lapic_in_kernel(vcpu)) + kvm_inject_apic_timer_irqs(vcpu); } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ae5c78f..61ebdc1 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -109,14 +109,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return ret; } -static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) -{ - /* Same as irqchip_in_kernel(vcpu->kvm), but with less - * pointer chasing and no unnecessary memory barriers. - */ - return vcpu->arch.apic != NULL; -} - void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8fc89ef..54ead79 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -34,6 +34,7 @@ #include "lapic.h" #include "hyperv.h" +#include "x86.h" static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, @@ -53,10 +54,12 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, unsigned long *dest_map) + struct kvm_lapic_irq *irq, struct dest_map *dest_map) { int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int dest_vcpus = 0; if (irq->dest_mode == 0 && irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { @@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) return r; + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); + kvm_for_each_vcpu(i, vcpu, kvm) { if (!kvm_apic_present(vcpu)) continue; @@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, r = 0; r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { - if (!lowest) - lowest = vcpu; - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) - lowest = vcpu; + if (!kvm_vector_hashing_enabled()) { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; + } else { + __set_bit(i, dest_vcpu_bitmap); + dest_vcpus++; + } } } + if (dest_vcpus != 0) { + int idx = kvm_vector_to_index(irq->vector, dest_vcpus, + dest_vcpu_bitmap, KVM_MAX_VCPUS); + + lowest = kvm_get_vcpu(kvm, idx); + } + if (lowest) r = kvm_apic_set_irq(lowest, irq, dest_map); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36591fa..443d2a5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -281,7 +281,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *feat; u32 v = APIC_VERSION; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); @@ -475,26 +475,20 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { - int highest_irr; - /* This may race with setting of irr in __apic_accept_irq() and * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq * will cause vmexit immediately and the value will be recalculated * on the next vmentry. */ - if (!kvm_vcpu_has_lapic(vcpu)) - return 0; - highest_irr = apic_find_highest_irr(vcpu->arch.apic); - - return highest_irr; + return apic_find_highest_irr(vcpu->arch.apic); } static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, - unsigned long *dest_map); + struct dest_map *dest_map); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map) + struct dest_map *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -675,8 +669,33 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, } } +int kvm_vector_to_index(u32 vector, u32 dest_vcpus, + const unsigned long *bitmap, u32 bitmap_size) +{ + u32 mod; + int i, idx = -1; + + mod = vector % dest_vcpus; + + for (i = 0; i <= mod; i++) { + idx = find_next_bit(bitmap, bitmap_size, idx + 1); + BUG_ON(idx == bitmap_size); + } + + return idx; +} + +static void kvm_apic_disabled_lapic_found(struct kvm *kvm) +{ + if (!kvm->arch.disabled_lapic_found) { + kvm->arch.disabled_lapic_found = true; + printk(KERN_INFO + "Disabled LAPIC found during irq injection\n"); + } +} + bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) + struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) { struct kvm_apic_map *map; unsigned long bitmap = 1; @@ -727,21 +746,42 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = map->logical_map[cid]; - if (kvm_lowest_prio_delivery(irq)) { + if (!kvm_lowest_prio_delivery(irq)) + goto set_irq; + + if (!kvm_vector_hashing_enabled()) { int l = -1; for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; if (l < 0) l = i; - else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) + else if (kvm_apic_compare_prio(dst[i]->vcpu, + dst[l]->vcpu) < 0) l = i; } - bitmap = (l >= 0) ? 1 << l : 0; + } else { + int idx; + unsigned int dest_vcpus; + + dest_vcpus = hweight16(bitmap); + if (dest_vcpus == 0) + goto out; + + idx = kvm_vector_to_index(irq->vector, + dest_vcpus, &bitmap, 16); + + if (!dst[idx]) { + kvm_apic_disabled_lapic_found(kvm); + goto out; + } + + bitmap = (idx >= 0) ? 1 << idx : 0; } } +set_irq: for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; @@ -754,6 +794,20 @@ out: return ret; } +/* + * This routine tries to handler interrupts in posted mode, here is how + * it deals with different cases: + * - For single-destination interrupts, handle it in posted mode + * - Else if vector hashing is enabled and it is a lowest-priority + * interrupt, handle it in posted mode and use the following mechanism + * to find the destinaiton vCPU. + * 1. For lowest-priority interrupts, store all the possible + * destination vCPUs in an array. + * 2. Use "guest vector % max number of destination vCPUs" to find + * the right destination vCPU in the array for the lowest-priority + * interrupt. + * - Otherwise, use remapped mode to inject the interrupt. + */ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { @@ -795,16 +849,37 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, if (cid >= ARRAY_SIZE(map->logical_map)) goto out; - for_each_set_bit(i, &bitmap, 16) { - dst = map->logical_map[cid][i]; - if (++r == 2) + if (kvm_vector_hashing_enabled() && + kvm_lowest_prio_delivery(irq)) { + int idx; + unsigned int dest_vcpus; + + dest_vcpus = hweight16(bitmap); + if (dest_vcpus == 0) goto out; - } - if (dst && kvm_apic_present(dst->vcpu)) + idx = kvm_vector_to_index(irq->vector, dest_vcpus, + &bitmap, 16); + + dst = map->logical_map[cid][idx]; + if (!dst) { + kvm_apic_disabled_lapic_found(kvm); + goto out; + } + *dest_vcpu = dst->vcpu; - else - goto out; + } else { + for_each_set_bit(i, &bitmap, 16) { + dst = map->logical_map[cid][i]; + if (++r == 2) + goto out; + } + + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } } ret = true; @@ -819,7 +894,7 @@ out: */ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, - unsigned long *dest_map) + struct dest_map *dest_map) { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; @@ -839,8 +914,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, result = 1; - if (dest_map) - __set_bit(vcpu->vcpu_id, dest_map); + if (dest_map) { + __set_bit(vcpu->vcpu_id, dest_map->map); + dest_map->vectors[vcpu->vcpu_id] = vector; + } if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { if (trig_mode) @@ -1195,7 +1272,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; - wait_queue_head_t *q = &vcpu->wq; + struct swait_queue_head *q = &vcpu->wq; struct kvm_timer *ktimer = &apic->lapic_timer; if (atomic_read(&apic->lapic_timer.pending)) @@ -1204,8 +1281,8 @@ static void apic_timer_expired(struct kvm_lapic *apic) atomic_inc(&apic->lapic_timer.pending); kvm_set_pending_timer(vcpu); - if (waitqueue_active(q)) - wake_up_interruptible(q); + if (swait_active(q)) + swake_up(q); if (apic_lvtt_tscdeadline(apic)) ktimer->expired_tscdeadline = ktimer->tscdeadline; @@ -1239,7 +1316,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u64 guest_tsc, tsc_deadline; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; if (apic->lapic_timer.expired_tscdeadline == 0) @@ -1515,8 +1592,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_has_lapic(vcpu)) - apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); + apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); @@ -1566,7 +1642,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return 0; @@ -1577,7 +1653,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return; @@ -1590,9 +1666,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) - return; - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); } @@ -1601,9 +1674,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { u64 tpr; - if (!kvm_vcpu_has_lapic(vcpu)) - return 0; - tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); return (tpr & 0xf0) >> 4; @@ -1728,8 +1798,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && - apic_lvt_enabled(apic, APIC_LVTT)) + if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) return atomic_read(&apic->lapic_timer.pending); return 0; @@ -1826,7 +1895,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; - if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) + if (!apic_enabled(apic)) return -1; apic_update_ppr(apic); @@ -1854,9 +1923,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) - return; - if (atomic_read(&apic->lapic_timer.pending) > 0) { kvm_apic_local_deliver(apic, APIC_LVTT); if (apic_lvtt_tscdeadline(apic)) @@ -1932,7 +1998,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { struct hrtimer *timer; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; timer = &vcpu->arch.apic->lapic_timer.timer; @@ -2105,7 +2171,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return 1; /* if this is ICR write vector before command */ @@ -2119,7 +2185,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 low, high = 0; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return 1; if (apic_reg_read(apic, reg, 4, &low)) @@ -2151,7 +2217,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) u8 sipi_vector; unsigned long pe; - if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) + if (!lapic_in_kernel(vcpu) || !apic->pending_events) return; /* diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 41bdb35..f71183e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -42,6 +42,9 @@ struct kvm_lapic { unsigned long pending_events; unsigned int sipi_vector; }; + +struct dest_map; + int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -60,11 +63,11 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); void __kvm_apic_update_irr(u32 *pir, void *regs); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - unsigned long *dest_map); + struct dest_map *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); + struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); @@ -103,7 +106,7 @@ static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) extern struct static_key kvm_no_apic_vcpu; -static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) +static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) { if (static_key_false(&kvm_no_apic_vcpu)) return vcpu->arch.apic; @@ -130,7 +133,7 @@ static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); + return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); } static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) @@ -150,7 +153,7 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; + return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; } static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) @@ -161,7 +164,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } static inline int kvm_apic_id(struct kvm_lapic *apic) @@ -175,4 +178,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); +int kvm_vector_to_index(u32 vector, u32 dest_vcpus, + const unsigned long *bitmap, u32 bitmap_size); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95a955d..c512f09 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -41,6 +41,7 @@ #include <asm/cmpxchg.h> #include <asm/io.h> #include <asm/vmx.h> +#include <asm/kvm_page_track.h> /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -776,62 +777,85 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, return &slot->arch.lpage_info[level - 2][idx]; } +static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, + gfn_t gfn, int count) +{ + struct kvm_lpage_info *linfo; + int i; + + for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + linfo = lpage_info_slot(gfn, slot, i); + linfo->disallow_lpage += count; + WARN_ON(linfo->disallow_lpage < 0); + } +} + +void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) +{ + update_gfn_disallow_lpage_count(slot, gfn, 1); +} + +void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) +{ + update_gfn_disallow_lpage_count(slot, gfn, -1); +} + static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; - struct kvm_lpage_info *linfo; gfn_t gfn; - int i; + kvm->arch.indirect_shadow_pages++; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - linfo = lpage_info_slot(gfn, slot, i); - linfo->write_count += 1; - } - kvm->arch.indirect_shadow_pages++; + + /* the non-leaf shadow pages are keeping readonly. */ + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return kvm_slot_page_track_add_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + + kvm_mmu_gfn_disallow_lpage(slot, gfn); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; - struct kvm_lpage_info *linfo; gfn_t gfn; - int i; + kvm->arch.indirect_shadow_pages--; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - linfo = lpage_info_slot(gfn, slot, i); - linfo->write_count -= 1; - WARN_ON(linfo->write_count < 0); - } - kvm->arch.indirect_shadow_pages--; + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return kvm_slot_page_track_remove_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + + kvm_mmu_gfn_allow_lpage(slot, gfn); } -static int __has_wrprotected_page(gfn_t gfn, int level, - struct kvm_memory_slot *slot) +static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { struct kvm_lpage_info *linfo; if (slot) { linfo = lpage_info_slot(gfn, slot, level); - return linfo->write_count; + return !!linfo->disallow_lpage; } - return 1; + return true; } -static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) +static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, + int level) { struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __has_wrprotected_page(gfn, level, slot); + return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } static int host_mapping_level(struct kvm *kvm, gfn_t gfn) @@ -897,7 +921,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, max_level = min(kvm_x86_ops->get_lpage_level(), host_level); for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) - if (__has_wrprotected_page(large_gfn, level, slot)) + if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot)) break; return level - 1; @@ -1323,23 +1347,29 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) +bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn) { - struct kvm_memory_slot *slot; struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); + write_protected |= __rmap_write_protect(kvm, rmap_head, true); } return write_protected; } +static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) +{ + struct kvm_memory_slot *slot; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn); +} + static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; @@ -1754,7 +1784,7 @@ static void mark_unsync(u64 *spte) static int nonpaging_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - return 1; + return 0; } static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) @@ -1840,13 +1870,16 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, return nr_unsync_leaf; } +#define INVALID_INDEX (-1) + static int mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { + pvec->nr = 0; if (!sp->unsync_children) return 0; - mmu_pages_add(pvec, sp, 0); + mmu_pages_add(pvec, sp, INVALID_INDEX); return __mmu_unsync_walk(sp, pvec); } @@ -1883,37 +1916,35 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, if ((_sp)->role.direct || (_sp)->role.invalid) {} else /* @sp->gfn should be write-protected at the call site */ -static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - struct list_head *invalid_list, bool clear_unsync) +static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + struct list_head *invalid_list) { if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return 1; + return false; } - if (clear_unsync) - kvm_unlink_unsync_page(vcpu->kvm, sp); - - if (vcpu->arch.mmu.sync_page(vcpu, sp)) { + if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return 1; + return false; } - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - return 0; + return true; } -static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) +static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, + struct list_head *invalid_list, + bool remote_flush, bool local_flush) { - LIST_HEAD(invalid_list); - int ret; - - ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); - if (ret) - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + if (!list_empty(invalid_list)) { + kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); + return; + } - return ret; + if (remote_flush) + kvm_flush_remote_tlbs(vcpu->kvm); + else if (local_flush) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } #ifdef CONFIG_KVM_MMU_AUDIT @@ -1923,46 +1954,38 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } static void mmu_audit_disable(void) { } #endif -static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - return __kvm_sync_page(vcpu, sp, invalid_list, true); + kvm_unlink_unsync_page(vcpu->kvm, sp); + return __kvm_sync_page(vcpu, sp, invalid_list); } /* @gfn should be write-protected at the call site */ -static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) +static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, + struct list_head *invalid_list) { struct kvm_mmu_page *s; - LIST_HEAD(invalid_list); - bool flush = false; + bool ret = false; for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!s->unsync) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); - kvm_unlink_unsync_page(vcpu->kvm, s); - if ((s->role.cr4_pae != !!is_pae(vcpu)) || - (vcpu->arch.mmu.sync_page(vcpu, s))) { - kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); - continue; - } - flush = true; + ret |= kvm_sync_page(vcpu, s, invalid_list); } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - if (flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + return ret; } struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; - unsigned int idx[PT64_ROOT_LEVEL-1]; + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; + unsigned int idx[PT64_ROOT_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ - for (i = mmu_pages_next(&pvec, &parents, -1), \ - sp = pvec.page[i].sp; \ + for (i = mmu_pages_first(&pvec, &parents); \ i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ i = mmu_pages_next(&pvec, &parents, i)) @@ -1974,19 +1997,43 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, for (n = i+1; n < pvec->nr; n++) { struct kvm_mmu_page *sp = pvec->page[n].sp; + unsigned idx = pvec->page[n].idx; + int level = sp->role.level; - if (sp->role.level == PT_PAGE_TABLE_LEVEL) { - parents->idx[0] = pvec->page[n].idx; - return n; - } + parents->idx[level-1] = idx; + if (level == PT_PAGE_TABLE_LEVEL) + break; - parents->parent[sp->role.level-2] = sp; - parents->idx[sp->role.level-1] = pvec->page[n].idx; + parents->parent[level-2] = sp; } return n; } +static int mmu_pages_first(struct kvm_mmu_pages *pvec, + struct mmu_page_path *parents) +{ + struct kvm_mmu_page *sp; + int level; + + if (pvec->nr == 0) + return 0; + + WARN_ON(pvec->page[0].idx != INVALID_INDEX); + + sp = pvec->page[0].sp; + level = sp->role.level; + WARN_ON(level == PT_PAGE_TABLE_LEVEL); + + parents->parent[level-2] = sp; + + /* Also set up a sentinel. Further entries in pvec are all + * children of sp, so this element is never overwritten. + */ + parents->parent[level-1] = NULL; + return mmu_pages_next(pvec, parents, 0); +} + static void mmu_pages_clear_parents(struct mmu_page_path *parents) { struct kvm_mmu_page *sp; @@ -1994,22 +2041,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) do { unsigned int idx = parents->idx[level]; - sp = parents->parent[level]; if (!sp) return; + WARN_ON(idx == INVALID_INDEX); clear_unsync_child_bit(sp, idx); level++; - } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); -} - -static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, - struct mmu_page_path *parents, - struct kvm_mmu_pages *pvec) -{ - parents->parent[parent->role.level-1] = NULL; - pvec->nr = 0; + } while (!sp->unsync_children); } static void mmu_sync_children(struct kvm_vcpu *vcpu, @@ -2020,30 +2059,36 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, struct mmu_page_path parents; struct kvm_mmu_pages pages; LIST_HEAD(invalid_list); + bool flush = false; - kvm_mmu_pages_init(parent, &parents, &pages); while (mmu_unsync_walk(parent, &pages)) { bool protected = false; for_each_sp(pages, sp, parents, i) protected |= rmap_write_protect(vcpu, sp->gfn); - if (protected) + if (protected) { kvm_flush_remote_tlbs(vcpu->kvm); + flush = false; + } for_each_sp(pages, sp, parents, i) { - kvm_sync_page(vcpu, sp, &invalid_list); + flush |= kvm_sync_page(vcpu, sp, &invalid_list); mmu_pages_clear_parents(&parents); } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - cond_resched_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_pages_init(parent, &parents, &pages); + if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) { + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); + cond_resched_lock(&vcpu->kvm->mmu_lock); + flush = false; + } } + + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); } static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { - sp->write_flooding_count = 0; + atomic_set(&sp->write_flooding_count, 0); } static void clear_sp_write_flooding_count(u64 *spte) @@ -2069,6 +2114,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, unsigned quadrant; struct kvm_mmu_page *sp; bool need_sync = false; + bool flush = false; + LIST_HEAD(invalid_list); role = vcpu->arch.mmu.base_role; role.level = level; @@ -2092,8 +2139,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->role.word != role.word) continue; - if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) - break; + if (sp->unsync) { + /* The page is good, but __kvm_sync_page might still end + * up zapping it. If so, break in order to rebuild it. + */ + if (!__kvm_sync_page(vcpu, sp, &invalid_list)) + break; + + WARN_ON(!list_empty(&invalid_list)); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } if (sp->unsync_children) kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); @@ -2112,16 +2167,24 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, hlist_add_head(&sp->hash_link, &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); if (!direct) { - if (rmap_write_protect(vcpu, gfn)) + /* + * we should do write protection before syncing pages + * otherwise the content of the synced shadow page may + * be inconsistent with guest page table. + */ + account_shadowed(vcpu->kvm, sp); + if (level == PT_PAGE_TABLE_LEVEL && + rmap_write_protect(vcpu, gfn)) kvm_flush_remote_tlbs(vcpu->kvm); - if (level > PT_PAGE_TABLE_LEVEL && need_sync) - kvm_sync_pages(vcpu, gfn); - account_shadowed(vcpu->kvm, sp); + if (level > PT_PAGE_TABLE_LEVEL && need_sync) + flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); + + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); return sp; } @@ -2269,7 +2332,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, if (parent->role.level == PT_PAGE_TABLE_LEVEL) return 0; - kvm_mmu_pages_init(parent, &parents, &pages); while (mmu_unsync_walk(parent, &pages)) { struct kvm_mmu_page *sp; @@ -2278,7 +2340,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, mmu_pages_clear_parents(&parents); zapped++; } - kvm_mmu_pages_init(parent, &parents, &pages); } return zapped; @@ -2354,8 +2415,8 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, if (list_empty(&kvm->arch.active_mmu_pages)) return false; - sp = list_entry(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); + sp = list_last_entry(&kvm->arch.active_mmu_pages, + struct kvm_mmu_page, link); kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); return true; @@ -2408,7 +2469,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); -static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { trace_kvm_mmu_unsync_page(sp); ++vcpu->kvm->stat.mmu_unsync; @@ -2417,37 +2478,26 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) kvm_mmu_mark_parents_unsync(sp); } -static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) +static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, + bool can_unsync) { - struct kvm_mmu_page *s; - - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { - if (s->unsync) - continue; - WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); - __kvm_unsync_page(vcpu, s); - } -} + struct kvm_mmu_page *sp; -static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, - bool can_unsync) -{ - struct kvm_mmu_page *s; - bool need_unsync = false; + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) + return true; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (!can_unsync) - return 1; + return true; - if (s->role.level != PT_PAGE_TABLE_LEVEL) - return 1; + if (sp->unsync) + continue; - if (!s->unsync) - need_unsync = true; + WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); + kvm_unsync_page(vcpu, sp); } - if (need_unsync) - kvm_unsync_pages(vcpu, gfn); - return 0; + + return false; } static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) @@ -2503,7 +2553,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * be fixed if guest refault. */ if (level > PT_PAGE_TABLE_LEVEL && - has_wrprotected_page(vcpu, gfn, level)) + mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) goto done; spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; @@ -2768,7 +2818,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompound(pfn_to_page(pfn)) && - !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) { + !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; /* * mmu_notifier_retry was successful and we hold the @@ -2796,20 +2846,16 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, kvm_pfn_t pfn, unsigned access, int *ret_val) { - bool ret = true; - /* The pfn is invalid, report the error! */ if (unlikely(is_error_pfn(pfn))) { *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); - goto exit; + return true; } if (unlikely(is_noslot_pfn(pfn))) vcpu_cache_mmio_info(vcpu, gva, gfn, access); - ret = false; -exit: - return ret; + return false; } static bool page_fault_can_be_fast(u32 error_code) @@ -3273,7 +3319,7 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); } -static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { if (direct) return vcpu_match_mmio_gpa(vcpu, addr); @@ -3332,7 +3378,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) u64 spte; bool reserved; - if (quickly_check_mmio_pf(vcpu, addr, direct)) + if (mmio_info_in_cache(vcpu, addr, direct)) return RET_MMIO_PF_EMULATE; reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); @@ -3362,20 +3408,53 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) } EXPORT_SYMBOL_GPL(handle_mmio_page_fault); +static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, + u32 error_code, gfn_t gfn) +{ + if (unlikely(error_code & PFERR_RSVD_MASK)) + return false; + + if (!(error_code & PFERR_PRESENT_MASK) || + !(error_code & PFERR_WRITE_MASK)) + return false; + + /* + * guest is writing the page which is write tracked which can + * not be fixed by page fault handler. + */ + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) + return true; + + return false; +} + +static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) +{ + struct kvm_shadow_walk_iterator iterator; + u64 spte; + + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return; + + walk_shadow_page_lockless_begin(vcpu); + for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { + clear_sp_write_flooding_count(iterator.sptep); + if (!is_shadow_present_pte(spte)) + break; + } + walk_shadow_page_lockless_end(vcpu); +} + static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, bool prefault) { - gfn_t gfn; + gfn_t gfn = gva >> PAGE_SHIFT; int r; pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gva, true); - - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - } + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return 1; r = mmu_topup_memory_caches(vcpu); if (r) @@ -3383,7 +3462,6 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); - gfn = gva >> PAGE_SHIFT; return nonpaging_map(vcpu, gva & PAGE_MASK, error_code, gfn, prefault); @@ -3460,12 +3538,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gpa, true); - - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - } + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return 1; r = mmu_topup_memory_caches(vcpu); if (r) @@ -3558,13 +3632,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, return false; } -static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) +static inline bool is_last_gpte(struct kvm_mmu *mmu, + unsigned level, unsigned gpte) { - unsigned index; + /* + * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set + * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means + * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + */ + gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - index = level - 1; - index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); - return mmu->last_pte_bitmap & (1 << index); + /* + * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. + * If it is clear, there are no large pages at this level, so clear + * PT_PAGE_SIZE_MASK in gpte if that is the case. + */ + gpte &= level - mmu->last_nonleaf_level; + + return gpte & PT_PAGE_SIZE_MASK; } #define PTTYPE_EPT 18 /* arbitrary */ @@ -3721,13 +3806,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + bool uses_nx = context->nx || context->base_role.smep_andnot_wp; + /* * Passing "true" to the last argument is okay; it adds a check * on bit 8 of the SPTEs which KVM doesn't use anyway. */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, - context->shadow_root_level, context->nx, + context->shadow_root_level, uses_nx, guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), true); } @@ -3836,22 +3923,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, } } -static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { - u8 map; - unsigned level, root_level = mmu->root_level; - const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ - - if (root_level == PT32E_ROOT_LEVEL) - --root_level; - /* PT_PAGE_TABLE_LEVEL always terminates */ - map = 1 | (1 << ps_set_index); - for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { - if (level <= PT_PDPE_LEVEL - && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) - map |= 1 << (ps_set_index | (level - 1)); - } - mmu->last_pte_bitmap = map; + unsigned root_level = mmu->root_level; + + mmu->last_nonleaf_level = root_level; + if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu)) + mmu->last_nonleaf_level++; } static void paging64_init_context_common(struct kvm_vcpu *vcpu, @@ -3863,7 +3941,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); MMU_WARN_ON(!is_pae(vcpu)); context->page_fault = paging64_page_fault; @@ -3890,7 +3968,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -3948,7 +4026,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); reset_tdp_shadow_zero_bits_mask(vcpu, context); } @@ -4054,7 +4132,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, g_context, false); - update_last_pte_bitmap(vcpu, g_context); + update_last_nonleaf_level(vcpu, g_context); } static void init_kvm_mmu(struct kvm_vcpu *vcpu) @@ -4125,18 +4203,6 @@ static bool need_remote_flush(u64 old, u64 new) return (old & ~new & PT64_PERM_MASK) != 0; } -static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, - bool remote_flush, bool local_flush) -{ - if (zap_page) - return; - - if (remote_flush) - kvm_flush_remote_tlbs(vcpu->kvm); - else if (local_flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); -} - static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, const u8 *new, int *bytes) { @@ -4186,7 +4252,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) if (sp->role.level == PT_PAGE_TABLE_LEVEL) return false; - return ++sp->write_flooding_count >= 3; + atomic_inc(&sp->write_flooding_count); + return atomic_read(&sp->write_flooding_count) >= 3; } /* @@ -4248,15 +4315,15 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) return spte; } -void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) +static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; - bool remote_flush, local_flush, zap_page; + bool remote_flush, local_flush; union kvm_mmu_page_role mask = { }; mask.cr0_wp = 1; @@ -4273,7 +4340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; - zap_page = remote_flush = local_flush = false; + remote_flush = local_flush = false; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); @@ -4293,8 +4360,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { - zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, - &invalid_list); + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } @@ -4316,8 +4382,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++spte; } } - mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush); kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); spin_unlock(&vcpu->kvm->mmu_lock); } @@ -4354,32 +4419,34 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); } -static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) -{ - if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) - return vcpu_match_mmio_gpa(vcpu, addr); - - return vcpu_match_mmio_gva(vcpu, addr); -} - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, void *insn, int insn_len) { int r, emulation_type = EMULTYPE_RETRY; enum emulation_result er; + bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); + + if (unlikely(error_code & PFERR_RSVD_MASK)) { + r = handle_mmio_page_fault(vcpu, cr2, direct); + if (r == RET_MMIO_PF_EMULATE) { + emulation_type = 0; + goto emulate; + } + if (r == RET_MMIO_PF_RETRY) + return 1; + if (r < 0) + return r; + } r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); if (r < 0) - goto out; - - if (!r) { - r = 1; - goto out; - } + return r; + if (!r) + return 1; - if (is_mmio_page_fault(vcpu, cr2)) + if (mmio_info_in_cache(vcpu, cr2, direct)) emulation_type = 0; - +emulate: er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); switch (er) { @@ -4393,8 +4460,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, default: BUG(); } -out: - return r; } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -4463,6 +4528,21 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) init_kvm_mmu(vcpu); } +void kvm_mmu_init_vm(struct kvm *kvm) +{ + struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + + node->track_write = kvm_mmu_pte_write; + kvm_page_track_register_notifier(kvm, node); +} + +void kvm_mmu_uninit_vm(struct kvm *kvm) +{ + struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + + kvm_page_track_unregister_notifier(kvm, node); +} + /* The return value indicates if tlb flush on all vcpus is needed. */ typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 55ffb7b..58fe98a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -174,4 +174,9 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); + +void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); +void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); +bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn); #endif diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c new file mode 100644 index 0000000..11f7643 --- /dev/null +++ b/arch/x86/kvm/page_track.c @@ -0,0 +1,222 @@ +/* + * Support KVM gust page tracking + * + * This feature allows us to track page access in guest. Currently, only + * write access is tracked. + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong <guangrong.xiao@linux.intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_host.h> +#include <asm/kvm_page_track.h> + +#include "mmu.h" + +void kvm_page_track_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + int i; + + for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) + if (!dont || free->arch.gfn_track[i] != + dont->arch.gfn_track[i]) { + kvfree(free->arch.gfn_track[i]); + free->arch.gfn_track[i] = NULL; + } +} + +int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + int i; + + for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { + slot->arch.gfn_track[i] = kvm_kvzalloc(npages * + sizeof(*slot->arch.gfn_track[i])); + if (!slot->arch.gfn_track[i]) + goto track_free; + } + + return 0; + +track_free: + kvm_page_track_free_memslot(slot, NULL); + return -ENOMEM; +} + +static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) +{ + if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) + return false; + + return true; +} + +static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode, short count) +{ + int index, val; + + index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + + val = slot->arch.gfn_track[mode][index]; + + if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) + return; + + slot->arch.gfn_track[mode][index] += count; +} + +/* + * add guest page to the tracking pool so that corresponding access on that + * page will be intercepted. + * + * It should be called under the protection both of mmu-lock and kvm->srcu + * or kvm->slots_lock. + * + * @kvm: the guest instance we are interested in. + * @slot: the @gfn belongs to. + * @gfn: the guest page. + * @mode: tracking mode, currently only write track is supported. + */ +void kvm_slot_page_track_add_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + + if (WARN_ON(!page_track_mode_is_valid(mode))) + return; + + update_gfn_track(slot, gfn, mode, 1); + + /* + * new track stops large page mapping for the + * tracked page. + */ + kvm_mmu_gfn_disallow_lpage(slot, gfn); + + if (mode == KVM_PAGE_TRACK_WRITE) + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) + kvm_flush_remote_tlbs(kvm); +} + +/* + * remove the guest page from the tracking pool which stops the interception + * of corresponding access on that page. It is the opposed operation of + * kvm_slot_page_track_add_page(). + * + * It should be called under the protection both of mmu-lock and kvm->srcu + * or kvm->slots_lock. + * + * @kvm: the guest instance we are interested in. + * @slot: the @gfn belongs to. + * @gfn: the guest page. + * @mode: tracking mode, currently only write track is supported. + */ +void kvm_slot_page_track_remove_page(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + if (WARN_ON(!page_track_mode_is_valid(mode))) + return; + + update_gfn_track(slot, gfn, mode, -1); + + /* + * allow large page mapping for the tracked page + * after the tracker is gone. + */ + kvm_mmu_gfn_allow_lpage(slot, gfn); +} + +/* + * check if the corresponding access on the specified guest page is tracked. + */ +bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + int index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + + if (WARN_ON(!page_track_mode_is_valid(mode))) + return false; + + return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); +} + +void kvm_page_track_init(struct kvm *kvm) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + init_srcu_struct(&head->track_srcu); + INIT_HLIST_HEAD(&head->track_notifier_list); +} + +/* + * register the notifier so that event interception for the tracked guest + * pages can be received. + */ +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + + spin_lock(&kvm->mmu_lock); + hlist_add_head_rcu(&n->node, &head->track_notifier_list); + spin_unlock(&kvm->mmu_lock); +} + +/* + * stop receiving the event interception. It is the opposed operation of + * kvm_page_track_register_notifier(). + */ +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + + spin_lock(&kvm->mmu_lock); + hlist_del_rcu(&n->node); + spin_unlock(&kvm->mmu_lock); + synchronize_srcu(&head->track_srcu); +} + +/* + * Notify the node that write access is intercepted and write emulation is + * finished at this time. + * + * The node should figure out if the written page is the one that node is + * interested in by itself. + */ +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes) +{ + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + + head = &vcpu->kvm->arch.track_notifier_head; + + if (hlist_empty(&head->track_notifier_list)) + return; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_write) + n->track_write(vcpu, gpa, new, bytes); + srcu_read_unlock(&head->track_srcu, idx); +} diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2ce4f05..e159a81 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | ACC_USER_MASK; #else - access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; - access &= ~(gpte >> PT64_NX_SHIFT); + BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); + BUILD_BUG_ON(ACC_EXEC_MASK != 1); + access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK); + /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */ + access ^= (gpte >> PT64_NX_SHIFT); #endif return access; @@ -702,24 +705,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - - /* - * page fault with PFEC.RSVD = 1 is caused by shadow - * page fault, should not be used to walk guest page - * table. - */ - error_code &= ~PFERR_RSVD_MASK; - }; - r = mmu_topup_memory_caches(vcpu); if (r) return r; /* + * If PFEC.RSVD is set, this is a shadow page fault. + * The bit needs to be cleared before walking guest page tables. + */ + error_code &= ~PFERR_RSVD_MASK; + + /* * Look up the guest pte for the faulting address. */ r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); @@ -735,6 +731,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, return 0; } + if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) { + shadow_page_table_clear_flood(vcpu, addr); + return 1; + } + vcpu->arch.write_fault_to_shadow_pgtable = false; is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, @@ -945,7 +946,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, sizeof(pt_element_t))) - return -EINVAL; + return 0; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { vcpu->kvm->tlbs_dirty++; @@ -977,7 +978,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable); } - return !nr_present; + return nr_present; } #undef pt_element_t diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 31aa2c8..06ce377 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -257,7 +257,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) { - if (vcpu->arch.apic) + if (lapic_in_kernel(vcpu)) kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c13a64b..9507038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1858,8 +1858,7 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - kvm_emulate_hypercall(&svm->vcpu); - return 1; + return kvm_emulate_hypercall(&svm->vcpu); } static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ad9f6a2..2f1ea2f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -996,11 +996,13 @@ TRACE_EVENT(kvm_enter_smm, * Tracepoint for VT-d posted-interrupts. */ TRACE_EVENT(kvm_pi_irte_update, - TP_PROTO(unsigned int vcpu_id, unsigned int gsi, - unsigned int gvec, u64 pi_desc_addr, bool set), - TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), + TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, + unsigned int gsi, unsigned int gvec, + u64 pi_desc_addr, bool set), + TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set), TP_STRUCT__entry( + __field( unsigned int, host_irq ) __field( unsigned int, vcpu_id ) __field( unsigned int, gsi ) __field( unsigned int, gvec ) @@ -1009,6 +1011,7 @@ TRACE_EVENT(kvm_pi_irte_update, ), TP_fast_assign( + __entry->host_irq = host_irq; __entry->vcpu_id = vcpu_id; __entry->gsi = gsi; __entry->gvec = gvec; @@ -1016,9 +1019,10 @@ TRACE_EVENT(kvm_pi_irte_update, __entry->set = set; ), - TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " + TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, " "gvec: 0x%x, pi_desc_addr: 0x%llx", __entry->set ? "enabled and being updated" : "disabled", + __entry->host_irq, __entry->vcpu_id, __entry->gsi, __entry->gvec, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0ff4537..5e45c27 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); -static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, @@ -963,25 +962,36 @@ static const u32 vmx_msr_index[] = { MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; -static inline bool is_page_fault(u32 intr_info) +static inline bool is_exception_n(u32 intr_info, u8 vector) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); +} + +static inline bool is_debug(u32 intr_info) +{ + return is_exception_n(intr_info, DB_VECTOR); +} + +static inline bool is_breakpoint(u32 intr_info) +{ + return is_exception_n(intr_info, BP_VECTOR); +} + +static inline bool is_page_fault(u32 intr_info) +{ + return is_exception_n(intr_info, PF_VECTOR); } static inline bool is_no_device(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, NM_VECTOR); } static inline bool is_invalid_opcode(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, UD_VECTOR); } static inline bool is_external_interrupt(u32 intr_info) @@ -1813,6 +1823,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, return; } break; + case MSR_IA32_PEBS_ENABLE: + /* PEBS needs a quiescent period after being disabled (to write + * a record). Disabling PEBS through VMX MSR swapping doesn't + * provide that period, so a CPU could write host's record into + * guest's memory. + */ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } for (i = 0; i < m->nr; ++i) @@ -1850,26 +1867,31 @@ static void reload_tss(void) static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { - u64 guest_efer; - u64 ignore_bits; + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; - guest_efer = vmx->vcpu.arch.efer; + if (!enable_ept) { + /* + * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing + * host CPUID is more efficient than testing guest CPUID + * or CR4. Host SMEP is anyway a requirement for guest SMEP. + */ + if (boot_cpu_has(X86_FEATURE_SMEP)) + guest_efer |= EFER_NX; + else if (!(guest_efer & EFER_NX)) + ignore_bits |= EFER_NX; + } /* - * NX is emulated; LMA and LME handled by hardware; SCE meaningless - * outside long mode + * LMA and LME handled by hardware; SCE meaningless outside long mode. */ - ignore_bits = EFER_NX | EFER_SCE; + ignore_bits |= EFER_SCE; #ifdef CONFIG_X86_64 ignore_bits |= EFER_LMA | EFER_LME; /* SCE is meaningful only in long mode on Intel */ if (guest_efer & EFER_LMA) ignore_bits &= ~(u64)EFER_SCE; #endif - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; clear_atomic_switch_msr(vmx, MSR_EFER); @@ -1880,16 +1902,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) */ if (cpu_has_load_ia32_efer || (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - guest_efer = vmx->vcpu.arch.efer; if (!(guest_efer & EFER_LMA)) guest_efer &= ~EFER_LME; if (guest_efer != host_efer) add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); return false; - } + } else { + guest_efer &= ~ignore_bits; + guest_efer |= host_efer & ignore_bits; - return true; + vmx->guest_msrs[efer_offset].data = guest_efer; + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; + + return true; + } } static unsigned long segment_base(u16 selector) @@ -2588,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ @@ -2609,7 +2636,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) VM_ENTRY_LOAD_IA32_PAT; vmx->nested.nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ @@ -2853,7 +2880,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -2930,7 +2957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; @@ -3403,7 +3430,7 @@ static void init_vmcs_shadow_fields(void) for (i = j = 0; i < max_shadow_read_write_fields; i++) { switch (shadow_read_write_fields[i]) { case GUEST_BNDCFGS: - if (!vmx_mpx_supported()) + if (!kvm_mpx_supported()) continue; break; default: @@ -5612,11 +5639,8 @@ static int handle_dr(struct kvm_vcpu *vcpu) } if (vcpu->guest_debug == 0) { - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_MOV_DR_EXITING); /* * No more DR vmexits; force a reload of the debug registers @@ -5653,8 +5677,6 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { - u32 cpu_based_vm_exec_control; - get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); @@ -5663,10 +5685,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING); } static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) @@ -5751,8 +5770,7 @@ static int handle_halt(struct kvm_vcpu *vcpu) static int handle_vmcall(struct kvm_vcpu *vcpu) { - kvm_emulate_hypercall(vcpu); - return 1; + return kvm_emulate_hypercall(vcpu); } static int handle_invd(struct kvm_vcpu *vcpu) @@ -6439,8 +6457,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { /* Recycle the least recently used VMCS. */ - item = list_entry(vmx->nested.vmcs02_pool.prev, - struct vmcs02_list, list); + item = list_last_entry(&vmx->nested.vmcs02_pool, + struct vmcs02_list, list); item->vmptr = vmx->nested.current_vmptr; list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; @@ -7756,6 +7774,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) return false; + else if (is_debug(intr_info) && + vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return false; + else if (is_breakpoint(intr_info) && + vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return false; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: @@ -10260,7 +10285,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (vmx_mpx_supported()) + if (kvm_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); if (nested_cpu_has_xsaves(vmcs12)) vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); @@ -10768,13 +10793,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, */ kvm_set_msi_irq(e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { + /* + * Make sure the IRTE is in remapped mode if + * we don't handle it in posted mode. + */ + ret = irq_set_vcpu_affinity(host_irq, NULL); + if (ret < 0) { + printk(KERN_INFO + "failed to back to remapped mode, irq: %u\n", + host_irq); + goto out; + } + continue; + } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; - trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, + trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); if (set) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eaf6ee8..7236bd3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +static bool __read_mostly vector_hashing = true; +module_param(vector_hashing, bool, S_IRUGO); + static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -1196,17 +1199,11 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) static uint32_t div_frac(uint32_t dividend, uint32_t divisor) { - uint32_t quotient, remainder; - - /* Don't try to replace with do_div(), this one calculates - * "(dividend << 32) / divisor" */ - __asm__ ( "divl %4" - : "=a" (quotient), "=d" (remainder) - : "0" (0), "1" (dividend), "r" (divisor) ); - return quotient; + do_shl32_div32(dividend, divisor); + return dividend; } -static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, +static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, s8 *pshift, u32 *pmultiplier) { uint64_t scaled64; @@ -1214,8 +1211,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, uint64_t tps64; uint32_t tps32; - tps64 = base_khz * 1000LL; - scaled64 = scaled_khz * 1000LL; + tps64 = base_hz; + scaled64 = scaled_hz; while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { tps64 >>= 1; shift--; @@ -1233,8 +1230,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, *pshift = shift; *pmultiplier = div_frac(scaled64, tps32); - pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", - __func__, base_khz, scaled_khz, shift, *pmultiplier); + pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", + __func__, base_hz, scaled_hz, shift, *pmultiplier); } #ifdef CONFIG_X86_64 @@ -1293,23 +1290,23 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) return 0; } -static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) { + if (user_tsc_khz == 0) { /* set tsc_scaling_ratio to a safe value */ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; return -1; } /* Compute a scale to convert nanoseconds in TSC cycles */ - kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, + kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, &vcpu->arch.virtual_tsc_shift, &vcpu->arch.virtual_tsc_mult); - vcpu->arch.virtual_tsc_khz = this_tsc_khz; + vcpu->arch.virtual_tsc_khz = user_tsc_khz; /* * Compute the variation in TSC rate which is acceptable @@ -1319,11 +1316,11 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) */ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); - if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { - pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); + if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { + pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, user_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1716,7 +1713,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz, tgt_tsc_khz; + unsigned long flags, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1742,8 +1739,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - if (unlikely(this_tsc_khz == 0)) { + tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); + if (unlikely(tgt_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; @@ -1778,13 +1775,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) if (!vcpu->pv_time_enabled) return 0; - if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - tgt_tsc_khz = kvm_has_tsc_control ? - vcpu->virtual_tsc_khz : this_tsc_khz; - kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, + if (kvm_has_tsc_control) + tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); + + if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); - vcpu->hw_tsc_khz = this_tsc_khz; + vcpu->hw_tsc_khz = tgt_tsc_khz; } /* With all the info we got, fill in the values */ @@ -2987,7 +2985,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && - kvm_vcpu_has_lapic(vcpu)) + lapic_in_kernel(vcpu)) vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { @@ -3000,7 +2998,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (kvm_vcpu_has_lapic(vcpu)) { + if (lapic_in_kernel(vcpu)) { if (events->smi.latched_init) set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); else @@ -3240,7 +3238,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_LAPIC: { r = -EINVAL; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) goto out; u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); @@ -3258,7 +3256,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_LAPIC: { r = -EINVAL; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) goto out; u.lapic = memdup_user(argp, sizeof(*u.lapic)); if (IS_ERR(u.lapic)) @@ -3605,20 +3603,26 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) { - mutex_lock(&kvm->arch.vpit->pit_state.lock); - memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); - mutex_unlock(&kvm->arch.vpit->pit_state.lock); + struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state; + + BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels)); + + mutex_lock(&kps->lock); + memcpy(ps, &kps->channels, sizeof(*ps)); + mutex_unlock(&kps->lock); return 0; } static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { int i; - mutex_lock(&kvm->arch.vpit->pit_state.lock); - memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); + struct kvm_pit *pit = kvm->arch.vpit; + + mutex_lock(&pit->pit_state.lock); + memcpy(&pit->pit_state.channels, ps, sizeof(*ps)); for (i = 0; i < 3; i++) - kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); - mutex_unlock(&kvm->arch.vpit->pit_state.lock); + kvm_pit_load_count(pit, i, ps->channels[i].count, 0); + mutex_unlock(&pit->pit_state.lock); return 0; } @@ -3638,29 +3642,39 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) int start = 0; int i; u32 prev_legacy, cur_legacy; - mutex_lock(&kvm->arch.vpit->pit_state.lock); - prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; + struct kvm_pit *pit = kvm->arch.vpit; + + mutex_lock(&pit->pit_state.lock); + prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; if (!prev_legacy && cur_legacy) start = 1; - memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, - sizeof(kvm->arch.vpit->pit_state.channels)); - kvm->arch.vpit->pit_state.flags = ps->flags; + memcpy(&pit->pit_state.channels, &ps->channels, + sizeof(pit->pit_state.channels)); + pit->pit_state.flags = ps->flags; for (i = 0; i < 3; i++) - kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, + kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count, start && i == 0); - mutex_unlock(&kvm->arch.vpit->pit_state.lock); + mutex_unlock(&pit->pit_state.lock); return 0; } static int kvm_vm_ioctl_reinject(struct kvm *kvm, struct kvm_reinject_control *control) { - if (!kvm->arch.vpit) + struct kvm_pit *pit = kvm->arch.vpit; + + if (!pit) return -ENXIO; - mutex_lock(&kvm->arch.vpit->pit_state.lock); - kvm->arch.vpit->pit_state.reinject = control->pit_reinject; - mutex_unlock(&kvm->arch.vpit->pit_state.lock); + + /* pit->pit_state.lock was overloaded to prevent userspace from getting + * an inconsistent state after running multiple KVM_REINJECT_CONTROL + * ioctls in parallel. Use a separate lock if that ioctl isn't rare. + */ + mutex_lock(&pit->pit_state.lock); + kvm_pit_set_reinject(pit, control->pit_reinject); + mutex_unlock(&pit->pit_state.lock); + return 0; } @@ -4093,7 +4107,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, do { n = min(len, 8); - if (!(vcpu->arch.apic && + if (!(lapic_in_kernel(vcpu) && !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) break; @@ -4113,7 +4127,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) do { n = min(len, 8); - if (!(vcpu->arch.apic && + if (!(lapic_in_kernel(vcpu) && !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) @@ -4346,7 +4360,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_page_track_write(vcpu, gpa, val, bytes); return 1; } @@ -4604,7 +4618,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CMPXCHG_FAILED; kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); - kvm_mmu_pte_write(vcpu, gpa, new, bytes); + kvm_page_track_write(vcpu, gpa, new, bytes); return X86EMUL_CONTINUE; @@ -6010,7 +6024,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!kvm_x86_ops->update_cr8_intercept) return; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) return; if (vcpu->arch.apicv_active) @@ -7038,7 +7052,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (!kvm_vcpu_has_lapic(vcpu) && + if (!lapic_in_kernel(vcpu) && mp_state->mp_state != KVM_MP_STATE_RUNNABLE) return -EINVAL; @@ -7314,7 +7328,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) * Every 255 times fpu_counter rolls over to 0; a guest that uses * the FPU in bursts will revert to loading it on demand. */ - if (!vcpu->arch.eager_fpu) { + if (!use_eager_fpu()) { if (++vcpu->fpu_counter < 5) kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); } @@ -7593,6 +7607,7 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) } struct static_key kvm_no_apic_vcpu __read_mostly; +EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { @@ -7724,6 +7739,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); + kvm_page_track_init(kvm); + kvm_mmu_init_vm(kvm); + return 0; } @@ -7850,6 +7868,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); + kvm_mmu_uninit_vm(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, @@ -7871,6 +7890,8 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, free->arch.lpage_info[i - 1] = NULL; } } + + kvm_page_track_free_memslot(free, dont); } int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -7879,6 +7900,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int i; for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + struct kvm_lpage_info *linfo; unsigned long ugfn; int lpages; int level = i + 1; @@ -7893,15 +7915,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, if (i == 0) continue; - slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * - sizeof(*slot->arch.lpage_info[i - 1])); - if (!slot->arch.lpage_info[i - 1]) + linfo = kvm_kvzalloc(lpages * sizeof(*linfo)); + if (!linfo) goto out_free; + slot->arch.lpage_info[i - 1] = linfo; + if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i - 1][0].write_count = 1; + linfo[0].disallow_lpage = 1; if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; + linfo[lpages - 1].disallow_lpage = 1; ugfn = slot->userspace_addr >> PAGE_SHIFT; /* * If the gfn and userspace address are not aligned wrt each @@ -7913,10 +7936,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long j; for (j = 0; j < lpages; ++j) - slot->arch.lpage_info[i - 1][j].write_count = 1; + linfo[j].disallow_lpage = 1; } } + if (kvm_page_track_create_memslot(slot, npages)) + goto out_free; + return 0; out_free: @@ -8370,6 +8396,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); } +bool kvm_vector_hashing_enabled(void) +{ + return vector_hashing; +} +EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f2afa5f..007940f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); +bool kvm_vector_hashing_enabled(void); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ @@ -192,4 +193,19 @@ extern unsigned int min_timer_period_us; extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; + +/* Same "calling convention" as do_div: + * - divide (n << 32) by base + * - put result in n + * - return remainder + */ +#define do_shl32_div32(n, base) \ + ({ \ + u32 __quot, __rem; \ + asm("divl %2" : "=a" (__quot), "=d" (__rem) \ + : "rm" (base), "0" (0), "1" ((u32) n)); \ + n = __quot; \ + __rem; \ + }) + #endif diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4ba229a..fd57d3a 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1520,12 +1520,6 @@ __init void lguest_init(void) */ reserve_top_address(lguest_data.reserve_mem); - /* - * If we don't initialize the lock dependency checker now, it crashes - * atomic_notifier_chain_register, then paravirt_disable_iospace. - */ - lockdep_init(); - /* Hook in our special panic hypercall code. */ atomic_notifier_chain_register(&panic_notifier_list, &paniced); @@ -1535,7 +1529,7 @@ __init void lguest_init(void) */ cpu_detect(&new_cpu_data); /* head.S usually sets up the first capability word, so do it here. */ - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); /* Math is always hard! */ set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index a2fe51b..65be7cf 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,5 @@ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 422db00..5cc78bf 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -21,12 +21,16 @@ static inline int myisspace(u8 c) * @option: option string to look for * * Returns the position of that @option (starts counting with 1) - * or 0 on not found. + * or 0 on not found. @option will only be found if it is found + * as an entire word in @cmdline. For instance, if @option="car" + * then a cmdline which contains "cart" will not match. */ -int cmdline_find_option_bool(const char *cmdline, const char *option) +static int +__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, + const char *option) { char c; - int len, pos = 0, wstart = 0; + int pos = 0, wstart = 0; const char *opptr = NULL; enum { st_wordstart = 0, /* Start of word/after whitespace */ @@ -37,11 +41,11 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) if (!cmdline) return -1; /* No command line */ - len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE); - if (!len) - return 0; - - while (len--) { + /* + * This 'pos' check ensures we do not overrun + * a non-NULL-terminated 'cmdline' + */ + while (pos < max_cmdline_size) { c = *(char *)cmdline++; pos++; @@ -58,18 +62,35 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) /* fall through */ case st_wordcmp: - if (!*opptr) + if (!*opptr) { + /* + * We matched all the way to the end of the + * option we were looking for. If the + * command-line has a space _or_ ends, then + * we matched! + */ if (!c || myisspace(c)) return wstart; - else - state = st_wordskip; - else if (!c) + /* + * We hit the end of the option, but _not_ + * the end of a word on the cmdline. Not + * a match. + */ + } else if (!c) { + /* + * Hit the NULL terminator on the end of + * cmdline. + */ return 0; - else if (c != *opptr++) - state = st_wordskip; - else if (!len) /* last word and is matching */ - return wstart; - break; + } else if (c == *opptr++) { + /* + * We are currently matching, so continue + * to the next character on the cmdline. + */ + break; + } + state = st_wordskip; + /* fall through */ case st_wordskip: if (!c) @@ -82,3 +103,8 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) return 0; /* Buffer overrun */ } + +int cmdline_find_option_bool(const char *cmdline, const char *option) +{ + return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); +} diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 009f982..24ef1c2 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -1,7 +1,7 @@ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 27f89c7..2b0ef26 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -10,7 +10,7 @@ #include <asm/current.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/smap.h> diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index e912b2f..2f07c29 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -102,7 +102,7 @@ static void delay_mwaitx(unsigned long __loops) * Use cpu_tss as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(this_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 16698bb..cbb8ee5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,7 +1,7 @@ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* @@ -177,3 +177,120 @@ ENTRY(memcpy_orig) .Lend: retq ENDPROC(memcpy_orig) + +#ifndef CONFIG_UML +/* + * memcpy_mcsafe - memory copy with machine check exception handling + * Note that we only catch machine checks when reading the source addresses. + * Writes to target are posted and don't generate machine checks. + */ +ENTRY(memcpy_mcsafe) + cmpl $8, %edx + /* Less than 8 bytes? Go to byte copy loop */ + jb .L_no_whole_words + + /* Check for bad alignment of source */ + testl $7, %esi + /* Already aligned */ + jz .L_8byte_aligned + + /* Copy one byte at a time until source is 8-byte aligned */ + movl %esi, %ecx + andl $7, %ecx + subl $8, %ecx + negl %ecx + subl %ecx, %edx +.L_copy_leading_bytes: + movb (%rsi), %al + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_copy_leading_bytes + +.L_8byte_aligned: + /* Figure out how many whole cache lines (64-bytes) to copy */ + movl %edx, %ecx + andl $63, %edx + shrl $6, %ecx + jz .L_no_whole_cache_lines + + /* Loop copying whole cache lines */ +.L_cache_w0: movq (%rsi), %r8 +.L_cache_w1: movq 1*8(%rsi), %r9 +.L_cache_w2: movq 2*8(%rsi), %r10 +.L_cache_w3: movq 3*8(%rsi), %r11 + movq %r8, (%rdi) + movq %r9, 1*8(%rdi) + movq %r10, 2*8(%rdi) + movq %r11, 3*8(%rdi) +.L_cache_w4: movq 4*8(%rsi), %r8 +.L_cache_w5: movq 5*8(%rsi), %r9 +.L_cache_w6: movq 6*8(%rsi), %r10 +.L_cache_w7: movq 7*8(%rsi), %r11 + movq %r8, 4*8(%rdi) + movq %r9, 5*8(%rdi) + movq %r10, 6*8(%rdi) + movq %r11, 7*8(%rdi) + leaq 64(%rsi), %rsi + leaq 64(%rdi), %rdi + decl %ecx + jnz .L_cache_w0 + + /* Are there any trailing 8-byte words? */ +.L_no_whole_cache_lines: + movl %edx, %ecx + andl $7, %edx + shrl $3, %ecx + jz .L_no_whole_words + + /* Copy trailing words */ +.L_copy_trailing_words: + movq (%rsi), %r8 + mov %r8, (%rdi) + leaq 8(%rsi), %rsi + leaq 8(%rdi), %rdi + decl %ecx + jnz .L_copy_trailing_words + + /* Any trailing bytes? */ +.L_no_whole_words: + andl %edx, %edx + jz .L_done_memcpy_trap + + /* Copy trailing bytes */ + movl %edx, %ecx +.L_copy_trailing_bytes: + movb (%rsi), %al + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_copy_trailing_bytes + + /* Copy successful. Return true */ +.L_done_memcpy_trap: + xorq %rax, %rax + ret +ENDPROC(memcpy_mcsafe) + + .section .fixup, "ax" + /* Return false for any failure */ +.L_memcpy_mcsafe_fail: + mov $1, %rax + ret + + .previous + + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) +#endif diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index ca2afdd..90ce01b 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -6,7 +6,7 @@ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #undef memmove diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2661fad..c9c8122 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -1,7 +1,7 @@ /* Copyright 2002 Andi Kleen, SuSE Labs */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> .weak memset diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 4a6f1d9..99bfb19 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -#ifdef CONFIG_X86_64 static inline bool is_hypervisor_range(int idx) { +#ifdef CONFIG_X86_64 /* * ffff800000000000 - ffff87ffffffffff is reserved for * the hypervisor. */ - return paravirt_enabled() && - (idx >= pgd_index(__PAGE_OFFSET) - 16) && - (idx < pgd_index(__PAGE_OFFSET)); -} + return (idx >= pgd_index(__PAGE_OFFSET) - 16) && + (idx < pgd_index(__PAGE_OFFSET)); #else -static inline bool is_hypervisor_range(int idx) { return false; } + return false; #endif +} static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, bool checkwx) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 903ec1e..9dd7e4b 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -3,6 +3,9 @@ #include <linux/sort.h> #include <asm/uaccess.h> +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *, int); + static inline unsigned long ex_insn_addr(const struct exception_table_entry *x) { @@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } +static inline ex_handler_t +ex_fixup_handler(const struct exception_table_entry *x) +{ + return (ex_handler_t)((unsigned long)&x->handler + x->handler); +} -int fixup_exception(struct pt_regs *regs) +bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { - const struct exception_table_entry *fixup; - unsigned long new_ip; + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_default); + +bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ip = ex_fixup_addr(fixup); + regs->ax = trapnr; + return true; +} +EXPORT_SYMBOL_GPL(ex_handler_fault); + +bool ex_handler_ext(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + /* Special hack for uaccess_err */ + current_thread_info()->uaccess_err = 1; + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_ext); + +bool ex_has_fault_handler(unsigned long ip) +{ + const struct exception_table_entry *e; + ex_handler_t handler; + + e = search_exception_tables(ip); + if (!e) + return false; + handler = ex_fixup_handler(e); + + return handler == ex_handler_fault; +} + +int fixup_exception(struct pt_regs *regs, int trapnr) +{ + const struct exception_table_entry *e; + ex_handler_t handler; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs) } #endif - fixup = search_exception_tables(regs->ip); - if (fixup) { - new_ip = ex_fixup_addr(fixup); - - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* Special hack for uaccess_err */ - current_thread_info()->uaccess_err = 1; - new_ip -= 0x7ffffff0; - } - regs->ip = new_ip; - return 1; - } + e = search_exception_tables(regs->ip); + if (!e) + return 0; - return 0; + handler = ex_fixup_handler(e); + return handler(e, regs, trapnr); } /* Restricted version used during very early boot */ int __init early_fixup_exception(unsigned long *ip) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *e; unsigned long new_ip; + ex_handler_t handler; - fixup = search_exception_tables(*ip); - if (fixup) { - new_ip = ex_fixup_addr(fixup); + e = search_exception_tables(*ip); + if (!e) + return 0; - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* uaccess handling not supported during early boot */ - return 0; - } + new_ip = ex_fixup_addr(e); + handler = ex_fixup_handler(e); - *ip = new_ip; - return 1; - } + /* special handling not supported during early boot */ + if (handler != ex_handler_default) + return 0; - return 0; + *ip = new_ip; + return 1; } /* @@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start, i += 4; p->fixup += i; i += 4; + p->handler += i; + i += 4; } sort(start, finish - start, sizeof(struct exception_table_entry), @@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start, i += 4; p->fixup -= i; i += 4; + p->handler -= i; + i += 4; } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e830c71..03898ae 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -663,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, int sig; /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) { + if (fixup_exception(regs, X86_TRAP_PF)) { /* * Any interrupt that takes a fault gets the fixup. This makes * the below recursive fault logic only apply to a faults from diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d8a798d..f8d0b5e 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -131,7 +131,7 @@ static inline void get_head_page_multiple(struct page *page, int nr) { VM_BUG_ON_PAGE(page != compound_head(page), page); VM_BUG_ON_PAGE(page_count(page) == 0, page); - atomic_add(nr, &page->_count); + page_ref_add(page, nr); SetPageReferenced(page); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 493f541..9d56f27 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -150,13 +150,14 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { -#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) +#if !defined(CONFIG_KMEMCHECK) /* - * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will + * use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (cpu_has_pse) + if (cpu_has_pse && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; #endif @@ -666,21 +667,22 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) * mark them not present - any buggy init-section access will * create a kernel page fault: */ -#ifdef CONFIG_DEBUG_PAGEALLOC - printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", - begin, end - 1); - set_memory_np(begin, (end - begin) >> PAGE_SHIFT); -#else - /* - * We just marked the kernel text read only above, now that - * we are going to free part of that, we need to make that - * writeable and non-executable first. - */ - set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); - set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); + if (debug_pagealloc_enabled()) { + pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", + begin, end - 1); + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); + } else { + /* + * We just marked the kernel text read only above, now that + * we are going to free part of that, we need to make that + * writeable and non-executable first. + */ + set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); + set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); -#endif + free_reserved_area((void *)begin, (void *)end, + POISON_FREE_INITMEM, what); + } } void free_initmem(void) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cb4ef3d..bd7a9b9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -388,7 +388,6 @@ repeat: } pte_t *kmap_pte; -pgprot_t kmap_prot; static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) { @@ -405,8 +404,6 @@ static void __init kmap_init(void) */ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; } #ifdef CONFIG_HIGHMEM @@ -871,7 +868,6 @@ static noinline int do_test_wp_bit(void) return flag; } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -960,5 +956,3 @@ void mark_rodata_ro(void) if (__supported_pte_mask & _PAGE_NX) debug_checkwx(); } -#endif - diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5488d21..214afda 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -53,6 +53,7 @@ #include <asm/numa.h> #include <asm/cacheflush.h> #include <asm/init.h> +#include <asm/uv/uv.h> #include <asm/setup.h> #include "mm_internal.h" @@ -1074,7 +1075,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1166,8 +1166,6 @@ void mark_rodata_ro(void) debug_checkwx(); } -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; @@ -1206,26 +1204,13 @@ int kern_addr_valid(unsigned long addr) static unsigned long probe_memory_block_size(void) { - /* start from 2g */ - unsigned long bz = 1UL<<31; - - if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { - pr_info("Using 2GB memory block size for large-memory system\n"); - return 2UL * 1024 * 1024 * 1024; - } - - /* less than 64g installed */ - if ((max_pfn << PAGE_SHIFT) < (16UL << 32)) - return MIN_MEMORY_BLOCK_SIZE; + unsigned long bz = MIN_MEMORY_BLOCK_SIZE; - /* get the tail size */ - while (bz > MIN_MEMORY_BLOCK_SIZE) { - if (!((max_pfn << PAGE_SHIFT) & (bz - 1))) - break; - bz >>= 1; - } + /* if system is UV or has 64GB of RAM or more, use large blocks */ + if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) + bz = 2UL << 30; /* 2GB */ - printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); + pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); return bz; } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf2..1b1110f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -120,11 +120,22 @@ void __init kasan_init(void) kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); - memset(kasan_zero_page, 0, PAGE_SIZE); - load_cr3(init_level4_pgt); __flush_tlb_all(); - init_task.kasan_depth = 0; + /* + * kasan_zero_page has been used as early shadow memory, thus it may + * contain some garbage. Now we can clear and write protect it, since + * after the TLB flush no one should write to it. + */ + memset(kasan_zero_page, 0, PAGE_SIZE); + for (i = 0; i < PTRS_PER_PTE; i++) { + pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); + set_pte(&kasan_zero_pte[i], pte); + } + /* Flush TLBs again to be sure that write protection applied. */ + __flush_tlb_all(); + + init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34..ddb2244 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -33,7 +33,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ + unsigned long addr; /* the requested address */ pteval_t old_presence; /* page presence prior to arming */ bool armed; @@ -70,9 +70,16 @@ unsigned int kmmio_count; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct list_head *kmmio_page_list(unsigned long page) +static struct list_head *kmmio_page_list(unsigned long addr) { - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; } /* Accessed per-cpu */ @@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) } /* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) { struct list_head *head; struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); - page &= PAGE_MASK; - head = kmmio_page_list(page); + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); list_for_each_entry_rcu(f, head, list) { - if (f->page == page) + if (f->addr == addr) return f; } return NULL; @@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(f->page, &level); + pte_t *pte = lookup_address(f->addr, &level); if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); + pr_err("no pte for addr 0x%08lx\n", f->addr); return -1; } @@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->page); + __flush_tlb_one(f->addr); return 0; } @@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); f->armed = true; return ret; } @@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); f->armed = false; } @@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); /* * Preemption is now disabled to prevent process switch during @@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) preempt_disable(); rcu_read_lock(); - faultpage = get_kmmio_fault_page(addr); + faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { /* * Either this page fault is not caused by kmmio, or @@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - if (addr == ctx->addr) { + if (page_base == ctx->addr) { /* * A second fault on the same page means some other * condition needs handling by do_page_fault(), the @@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->active++; ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); + ctx->probe = get_kmmio_probe(page_base); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; + ctx->addr = page_base; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -354,12 +371,11 @@ out: } /* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) +static int add_kmmio_fault_page(unsigned long addr) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (f) { if (!f->count) arm_kmmio_fault_page(f); @@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) return -1; f->count = 1; - f->page = page; + f->addr = addr; if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + list_add_rcu(&f->list, kmmio_page_list(f->addr)); return 0; } /* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, +static void release_kmmio_fault_page(unsigned long addr, struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (!f) return; @@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) int ret = 0; unsigned long size = 0; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + + pte = lookup_address(p->addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; + size += page_level_size(l); } out: spin_unlock_irqrestore(&kmmio_lock, flags); @@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(p->addr, &l); + if (!pte) + return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; + size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 72bb52f..d2dc043 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd) } /* - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 - * does, but not when emulating X86_32 - */ -static unsigned long mmap_legacy_base(unsigned long rnd) -{ - if (mmap_is_ia32()) - return TASK_UNMAPPED_BASE; - else - return TASK_UNMAPPED_BASE + rnd; -} - -/* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - mm->mmap_legacy_base = mmap_legacy_base(random_factor); + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index d04f809..f70c1ff 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) return true; } +/* + * Mark all currently memblock-reserved physical memory (which covers the + * kernel's own memory ranges) as hot-unswappable. + */ static void __init numa_clear_kernel_node_hotplug(void) { - int i, nid; - nodemask_t numa_kernel_nodes = NODE_MASK_NONE; - phys_addr_t start, end; - struct memblock_region *r; + nodemask_t reserved_nodemask = NODE_MASK_NONE; + struct memblock_region *mb_region; + int i; /* + * We have to do some preprocessing of memblock regions, to + * make them suitable for reservation. + * * At this time, all memory regions reserved by memblock are - * used by the kernel. Set the nid in memblock.reserved will - * mark out all the nodes the kernel resides in. + * used by the kernel, but those regions are not split up + * along node boundaries yet, and don't necessarily have their + * node ID set yet either. + * + * So iterate over all memory known to the x86 architecture, + * and use those ranges to set the nid in memblock.reserved. + * This will split up the memblock regions along node + * boundaries and will set the node IDs as well. */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - struct numa_memblk *mb = &numa_meminfo.blk[i]; + struct numa_memblk *mb = numa_meminfo.blk + i; + int ret; - memblock_set_node(mb->start, mb->end - mb->start, - &memblock.reserved, mb->nid); + ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); + WARN_ON_ONCE(ret); } /* - * Mark all kernel nodes. + * Now go over all reserved memblock regions, to construct a + * node mask of all kernel reserved memory areas. * - * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo - * may not include all the memblock.reserved memory ranges because - * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. + * [ Note, when booting with mem=nn[kMG] or in a kdump kernel, + * numa_meminfo might not include all memblock.reserved + * memory ranges, because quirks such as trim_snb_memory() + * reserve specific pages for Sandy Bridge graphics. ] */ - for_each_memblock(reserved, r) - if (r->nid != MAX_NUMNODES) - node_set(r->nid, numa_kernel_nodes); + for_each_memblock(reserved, mb_region) { + if (mb_region->nid != MAX_NUMNODES) + node_set(mb_region->nid, reserved_nodemask); + } - /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ + /* + * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory + * belonging to the reserved node mask. + * + * Note that this will include memory regions that reside + * on nodes that contain kernel memory - entire nodes + * become hot-unpluggable: + */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - nid = numa_meminfo.blk[i].nid; - if (!node_isset(nid, numa_kernel_nodes)) - continue; + struct numa_memblk *mb = numa_meminfo.blk + i; - start = numa_meminfo.blk[i].start; - end = numa_meminfo.blk[i].end; + if (!node_isset(mb->nid, reserved_nodemask)) + continue; - memblock_clear_hotplug(start, end - start); + memblock_clear_hotplug(mb->start, mb->end - mb->start); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9cf96d8..4d0b262 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,12 +106,6 @@ static inline unsigned long highmap_end_pfn(void) #endif -#ifdef CONFIG_DEBUG_PAGEALLOC -# define debug_pagealloc 1 -#else -# define debug_pagealloc 0 -#endif - static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -283,7 +277,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* * Once the kernel maps the text as RO (kernel_set_to_readonly is set), * kernel text mappings for the large page aligned text, rodata sections @@ -714,10 +708,10 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, { struct page *base; - if (!debug_pagealloc) + if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc) + if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); if (!base) return -ENOMEM; @@ -1128,8 +1122,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, /* * Ignore all non primary paths. */ - if (!primary) + if (!primary) { + cpa->numpages = 1; return 0; + } /* * Ignore the NULL PTE for kernel identity mapping, as it is expected @@ -1337,10 +1333,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) cpa->numpages = 1; - if (!debug_pagealloc) + if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); ret = __change_page_attr(cpa, checkalias); - if (!debug_pagealloc) + if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); if (ret) return ret; diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f4ae536..04e2e71 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return -EINVAL; } - *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); return 0; @@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* Set prot based on lookup */ pcm = lookup_memtype(pfn_t_to_phys(pfn)); - *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); return 0; diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 92e2eac..8bea847 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -4,6 +4,7 @@ #include <asm/pgtable.h> #include <asm/proto.h> +#include <asm/cpufeature.h> static int disable_nx; @@ -31,9 +32,8 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) - __supported_pte_mask |= _PAGE_NX; - else + /* If disable_nx is set, clear NX on all new mappings going forward. */ + if (disable_nx) __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 50d86c0..660a83c 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -24,7 +24,6 @@ #include <asm/nmi.h> #include <asm/apic.h> #include <asm/processor.h> -#include <asm/cpufeature.h> #include "op_x86_model.h" #include "op_counter.h" diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index d34b511..381a43c 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -12,7 +12,6 @@ #include <linux/dmi.h> #include <linux/slab.h> -#include <asm-generic/pci-bridge.h> #include <asm/acpi.h> #include <asm/segment.h> #include <asm/io.h> diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e585655..b7de192 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -297,14 +297,14 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r * * The standard boot ROM sequence for an x86 machine uses the BIOS * to select an initial video card for boot display. This boot video - * card will have it's BIOS copied to C0000 in system RAM. + * card will have its BIOS copied to 0xC0000 in system RAM. * IORESOURCE_ROM_SHADOW is used to associate the boot video * card with this copy. On laptops this copy has to be used since * the main ROM may be compressed or combined with another image. * See pci_map_rom() for use of this flag. Before marking the device * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set - * by either arch cde or vga-arbitration, if so only apply the fixup to this - * already determined primary video card. + * by either arch code or vga-arbitration; if so only apply the fixup to this + * already-determined primary video card. */ static void pci_fixup_video(struct pci_dev *pdev) @@ -312,6 +312,7 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_dev *bridge; struct pci_bus *bus; u16 config; + struct resource *res; /* Is VGA routed to us? */ bus = pdev->bus; @@ -336,8 +337,18 @@ static void pci_fixup_video(struct pci_dev *pdev) if (!vga_default_device() || pdev == vga_default_device()) { pci_read_config_word(pdev, PCI_COMMAND, &config); if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { - pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); + res = &pdev->resource[PCI_ROM_RESOURCE]; + + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = 0xC0000; + res->end = res->start + 0x20000 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", + res); } } } @@ -540,3 +551,10 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); + +static void pci_bdwep_bar(struct pci_dev *dev) +{ + dev->non_compliant_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index d57e480..7792aba 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -503,6 +503,18 @@ static struct pci_ops vmd_ops = { .write = vmd_pci_write, }; +static void vmd_attach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; + vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; +} + +static void vmd_detach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = NULL; + vmd->dev->resource[VMD_MEMBAR2].child = NULL; +} + /* * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. */ @@ -527,11 +539,28 @@ static int vmd_enable_domain(struct vmd_dev *vmd) res = &vmd->dev->resource[VMD_CFGBAR]; vmd->resources[0] = (struct resource) { .name = "VMD CFGBAR", - .start = res->start, + .start = 0, .end = (resource_size(res) >> 20) - 1, .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, }; + /* + * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can + * put 32-bit resources in the window. + * + * There's no hardware reason why a 64-bit window *couldn't* + * contain a 32-bit resource, but pbus_size_mem() computes the + * bridge window size assuming a 64-bit window will contain no + * 32-bit resources. __pci_assign_resource() enforces that + * artificial restriction to make sure everything will fit. + * + * The only way we could use a 64-bit non-prefechable MEMBAR is + * if its address is <4GB so that we can convert it to a 32-bit + * resource. To be visible to the host OS, all VMD endpoints must + * be initially configured by platform BIOS, which includes setting + * up these resources. We can assume the device is configured + * according to the platform needs. + */ res = &vmd->dev->resource[VMD_MEMBAR1]; upper_bits = upper_32_bits(res->end); flags = res->flags & ~IORESOURCE_SIZEALIGN; @@ -542,6 +571,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd) .start = res->start, .end = res->end, .flags = flags, + .parent = res, }; res = &vmd->dev->resource[VMD_MEMBAR2]; @@ -554,6 +584,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd) .start = res->start + 0x2000, .end = res->end, .flags = flags, + .parent = res, }; sd->domain = vmd_find_free_domain(); @@ -578,6 +609,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd) return -ENODEV; } + vmd_attach_resources(vmd); vmd_setup_dma_ops(vmd); dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); pci_rescan_bus(vmd->bus); @@ -674,6 +706,7 @@ static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); + vmd_detach_resources(vmd); pci_set_drvdata(dev, NULL); sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 2d66db8..ed30e79 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -131,6 +131,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) EXPORT_SYMBOL_GPL(efi_query_variable_store); /* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc) + */ +static bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820_all_mapped(start, start+size, E820_RAM)) + return false; + + return true; +} + +/* * The UEFI specification makes it clear that the operating system is free to do * whatever it wants with boot services code after ExitBootServices() has been * called. Ignoring this recommendation a significant bunch of EFI implementations @@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void) efi_memory_desc_t *md = p; u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Only reserve where possible: - * - Not within any already allocated areas - * - Not over any memory area (really needed, if above?) - * - Not within any part of the kernel - * - Not the bios reserved area - */ - if ((start + size > __pa_symbol(_text) - && start <= __pa_symbol(_end)) || - !e820_all_mapped(start, start+size, E820_RAM) || - memblock_is_region_reserved(start, size)) { - /* Could not reserve, skip it */ - md->num_pages = 0; - memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", - start, start+size-1); - } else + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with free_bootmem_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * free_bootmem_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; } } @@ -183,8 +228,8 @@ void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Could not reserve boot area */ - if (!size) + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) continue; free_bootmem_late(start, size); diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 76b6632..1865c19 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -21,7 +21,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -35,6 +35,11 @@ #define BIOS_SIGNATURE_COREBOOT 0x500 #define BIOS_REGION_SIZE 0x10000 +/* + * This driver is not modular, but to keep back compatibility + * with existing use cases, continuing with module_param is + * the easiest way forward. + */ static bool force = 0; module_param(force, bool, 0444); /* FIXME: Award bios is not automatically detected as Alix platform */ @@ -192,9 +197,4 @@ static int __init alix_init(void) return 0; } - -module_init(alix_init); - -MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>"); -MODULE_DESCRIPTION("PCEngines ALIX System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(alix_init); diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index aa733fb..4fcdb91 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -120,9 +119,4 @@ static int __init geos_init(void) return 0; } - -module_init(geos_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(geos_init); diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 927e38c..a2f6b98 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -20,7 +20,6 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -146,9 +145,4 @@ static int __init net5501_init(void) return 0; } - -module_init(net5501_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Soekris net5501 System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(net5501_init); diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index 23381d2..1eb47b6 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -52,10 +52,7 @@ static unsigned long __init mfld_calibrate_tsc(void) /* mark tsc clocksource as reliable */ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - if (fast_calibrate) - return fast_calibrate; - - return 0; + return fast_calibrate; } static void __init penwell_arch_setup(void) diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c index aaca917..bd1adc6 100644 --- a/arch/x86/platform/intel-mid/mrfl.c +++ b/arch/x86/platform/intel-mid/mrfl.c @@ -81,10 +81,7 @@ static unsigned long __init tangier_calibrate_tsc(void) /* mark tsc clocksource as reliable */ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - if (fast_calibrate) - return fast_calibrate; - - return 0; + return fast_calibrate; } static void __init tangier_arch_setup(void) diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index bfadcd0..17d6d22 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -1,5 +1,5 @@ /** - * imr.c + * imr.c -- Intel Isolated Memory Region driver * * Copyright(c) 2013 Intel Corporation. * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> @@ -31,7 +31,6 @@ #include <linux/debugfs.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/types.h> struct imr_device { @@ -135,11 +134,9 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) * @idev: pointer to imr_device structure. * @imr_id: IMR entry to write. * @imr: IMR structure representing address and access masks. - * @lock: indicates if the IMR lock bit should be applied. * @return: 0 on success or error code passed from mbi_iosf on failure. */ -static int imr_write(struct imr_device *idev, u32 imr_id, - struct imr_regs *imr, bool lock) +static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) { unsigned long flags; u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; @@ -163,15 +160,6 @@ static int imr_write(struct imr_device *idev, u32 imr_id, if (ret) goto failed; - /* Lock bit must be set separately to addr_lo address bits. */ - if (lock) { - imr->addr_lo |= IMR_LOCK; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, - reg - IMR_NUM_REGS, imr->addr_lo); - if (ret) - goto failed; - } - local_irq_restore(flags); return 0; failed: @@ -270,17 +258,6 @@ static int imr_debugfs_register(struct imr_device *idev) } /** - * imr_debugfs_unregister - unregister debugfs hooks. - * - * @idev: pointer to imr_device structure. - * @return: - */ -static void imr_debugfs_unregister(struct imr_device *idev) -{ - debugfs_remove(idev->file); -} - -/** * imr_check_params - check passed address range IMR alignment and non-zero size * * @base: base address of intended IMR. @@ -334,11 +311,10 @@ static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr) * @size: physical size of region in bytes must be aligned to 1KiB. * @read_mask: read access mask. * @write_mask: write access mask. - * @lock: indicates whether or not to permanently lock this region. * @return: zero on success or negative value indicating error. */ int imr_add_range(phys_addr_t base, size_t size, - unsigned int rmask, unsigned int wmask, bool lock) + unsigned int rmask, unsigned int wmask) { phys_addr_t end; unsigned int i; @@ -411,7 +387,7 @@ int imr_add_range(phys_addr_t base, size_t size, imr.rmask = rmask; imr.wmask = wmask; - ret = imr_write(idev, reg, &imr, lock); + ret = imr_write(idev, reg, &imr); if (ret < 0) { /* * In the highly unlikely event iosf_mbi_write failed @@ -422,7 +398,7 @@ int imr_add_range(phys_addr_t base, size_t size, imr.addr_hi = 0; imr.rmask = IMR_READ_ACCESS_ALL; imr.wmask = IMR_WRITE_ACCESS_ALL; - imr_write(idev, reg, &imr, false); + imr_write(idev, reg, &imr); } failed: mutex_unlock(&idev->lock); @@ -518,7 +494,7 @@ static int __imr_remove_range(int reg, phys_addr_t base, size_t size) imr.rmask = IMR_READ_ACCESS_ALL; imr.wmask = IMR_WRITE_ACCESS_ALL; - ret = imr_write(idev, reg, &imr, false); + ret = imr_write(idev, reg, &imr); failed: mutex_unlock(&idev->lock); @@ -599,7 +575,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev) * We don't round up @size since it is already PAGE_SIZE aligned. * See vmlinux.lds.S for details. */ - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); if (ret < 0) { pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", size / 1024, start, end); @@ -614,7 +590,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ {} }; -MODULE_DEVICE_TABLE(x86cpu, imr_ids); /** * imr_init - entry point for IMR driver. @@ -640,22 +615,4 @@ static int __init imr_init(void) imr_fixup_memmap(idev); return 0; } - -/** - * imr_exit - exit point for IMR code. - * - * Deregisters debugfs, leave IMR state as-is. - * - * return: - */ -static void __exit imr_exit(void) -{ - imr_debugfs_unregister(&imr_dev); -} - -module_init(imr_init); -module_exit(imr_exit); - -MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); -MODULE_DESCRIPTION("Intel Isolated Memory Region driver"); -MODULE_LICENSE("Dual BSD/GPL"); +device_initcall(imr_init); diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 278e4da..f5bad40 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -1,5 +1,5 @@ /** - * imr_selftest.c + * imr_selftest.c -- Intel Isolated Memory Region self-test driver * * Copyright(c) 2013 Intel Corporation. * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> @@ -15,7 +15,6 @@ #include <asm/imr.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/types.h> #define SELFTEST KBUILD_MODNAME ": " @@ -61,30 +60,30 @@ static void __init imr_self_test(void) int ret; /* Test zero zero. */ - ret = imr_add_range(0, 0, 0, 0, false); + ret = imr_add_range(0, 0, 0, 0); imr_self_test_result(ret < 0, "zero sized IMR\n"); /* Test exact overlap. */ - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test overlap with base inside of existing. */ base += size - IMR_ALIGN; - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test overlap with end inside of existing. */ base -= size + IMR_ALIGN * 2; - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, - IMR_WRITE_ACCESS_ALL, false); + IMR_WRITE_ACCESS_ALL); imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); /* Test that a 1 KiB IMR @ zero with CPU only will work. */ - ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU); imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); if (ret >= 0) { ret = imr_remove_range(0, IMR_ALIGN); @@ -93,8 +92,7 @@ static void __init imr_self_test(void) /* Test 2 KiB works. */ size = IMR_ALIGN * 2; - ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, - IMR_WRITE_ACCESS_ALL, false); + ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL); imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); if (ret >= 0) { ret = imr_remove_range(0, size); @@ -106,7 +104,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ {} }; -MODULE_DEVICE_TABLE(x86cpu, imr_ids); /** * imr_self_test_init - entry point for IMR driver. @@ -125,13 +122,4 @@ static int __init imr_self_test_init(void) * * return: */ -static void __exit imr_self_test_exit(void) -{ -} - -module_init(imr_self_test_init); -module_exit(imr_self_test_exit); - -MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); -MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver"); -MODULE_LICENSE("Dual BSD/GPL"); +device_initcall(imr_self_test_init); diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 174781a..00c3190 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -3,7 +3,7 @@ #include <asm/asm.h> #include <asm/segment.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cmpxchg.h> #include <asm/nops.h> diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 439c099..bfce503 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -25,11 +25,11 @@ #define old_mmap sys_old_mmap -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 -#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, +#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index b74ea6c..f306413 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -35,14 +35,11 @@ #define stub_execveat sys_execveat #define stub_rt_sigreturn sys_rt_sigreturn -#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ - -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_64.h> #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, +#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ce7e360..470564b 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -9,14 +9,12 @@ #include <asm/types.h> #ifdef __i386__ -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_32.h> }; #else -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ +#define __SYSCALL_64(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d09e4c9..2c26108 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void) cpu_detect(&new_cpu_data); set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); new_cpu_data.wp_works_ok = 1; - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); #endif if (xen_start_info->mod_start) { diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 724a087..9466354 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -11,7 +11,7 @@ #include "pmu.h" /* x86_pmu.handle_irq definition */ -#include "../kernel/cpu/perf_event.h" +#include "../events/perf_event.h" #define XENPMU_IRQ_PROCESSING 1 struct xenpmu { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3f4ebf0..3c6d17f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu) xen_pvh_secondary_vcpu_init(cpu); #endif cpu_bringup(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void xen_smp_intr_free(unsigned int cpu) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e9df156..7e9464b 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -413,8 +413,6 @@ config FORCE_MAX_ZONEORDER source "drivers/pcmcia/Kconfig" -source "drivers/pci/hotplug/Kconfig" - config PLATFORM_WANT_DEFAULT_MEM def_bool n diff --git a/arch/xtensa/include/asm/gpio.h b/arch/xtensa/include/asm/gpio.h deleted file mode 100644 index b3799d8..0000000 --- a/arch/xtensa/include/asm/gpio.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __LINUX_GPIO_H -#warning Include linux/gpio.h instead of asm/gpio.h -#include <linux/gpio.h> -#endif diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index e438a00..5d6bd93 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -55,9 +55,6 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, #endif /* __KERNEL__ */ -/* Implement the pci_ DMA API in terms of the generic device dma_ one */ -#include <asm-generic/pci-dma-compat.h> - /* Generic PCI */ #include <asm-generic/pci.h> diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 4d02e38..fc4ad21 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -157,7 +157,7 @@ void secondary_start_kernel(void) complete(&cpu_running); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void mx_cpu_start(void *p) diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index c9784c1..7f4a1fd 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -146,7 +146,7 @@ good_area: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (flags & VM_FAULT_MAJOR) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - else if (flags & VM_FAULT_MINOR) + else perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); return; diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 70cb408..c54505d 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -28,10 +28,6 @@ #include <linux/tty.h> #include <linux/tty_flip.h> -#ifdef SERIAL_INLINE -#define _INLINE_ inline -#endif - #define SERIAL_MAX_NUM_LINES 1 #define SERIAL_TIMER_VALUE (HZ / 10) diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index 8767896..5f4bd71 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -113,7 +113,7 @@ void platform_heartbeat(void) } //#define RS_TABLE_SIZE 2 -//#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) +//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST) #define _SERIAL_PORT(_base,_irq) \ { \ |