diff options
415 files changed, 3722 insertions, 2384 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap index 587db52..9467201 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-swap +++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap @@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead. still used for tmpfs etc. other users. If set to false, the global swap readahead algorithm will be used for all swappable pages. - -What: /sys/kernel/mm/swap/vma_ra_max_order -Date: August 2017 -Contact: Linux memory management mailing list <linux-mm@kvack.org> -Description: The max readahead size in order for VMA based swap readahead - - VMA based swap readahead algorithm will readahead at - most 1 << max_order pages for each readahead. The - real readahead size for each readahead will be scaled - according to the estimation algorithm. diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 8282099..5da1018 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -352,44 +352,30 @@ Read-Copy Update (RCU) ---------------------- .. kernel-doc:: include/linux/rcupdate.h - :external: .. kernel-doc:: include/linux/rcupdate_wait.h - :external: .. kernel-doc:: include/linux/rcutree.h - :external: .. kernel-doc:: kernel/rcu/tree.c - :external: .. kernel-doc:: kernel/rcu/tree_plugin.h - :external: .. kernel-doc:: kernel/rcu/tree_exp.h - :external: .. kernel-doc:: kernel/rcu/update.c - :external: .. kernel-doc:: include/linux/srcu.h - :external: .. kernel-doc:: kernel/rcu/srcutree.c - :external: .. kernel-doc:: include/linux/rculist_bl.h - :external: .. kernel-doc:: include/linux/rculist.h - :external: .. kernel-doc:: include/linux/rculist_nulls.h - :external: .. kernel-doc:: include/linux/rcu_sync.h - :external: .. kernel-doc:: kernel/rcu/sync.c - :external: diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 4a0a746..32df07e 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -344,3 +344,4 @@ Version History (wrong raid10_copies/raid10_format sequence) 1.11.1 Add raid4/5/6 journal write-back support via journal_mode option 1.12.1 fix for MD deadlock between mddev_suspend() and md_write_start() available +1.13.0 Fix dev_health status at end of "recover" (was 'a', now 'A') diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt index b878a1e..ed1456f 100644 --- a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt @@ -16,11 +16,13 @@ Required Properties: - clocks: Array of clocks required for SDHC. - Require at least input clock for Xenon IP core. + Require at least input clock for Xenon IP core. For Armada AP806 and + CP110, the AXI clock is also mandatory. - clock-names: Array of names corresponding to clocks property. The input clock for Xenon IP core should be named as "core". + The input clock for the AXI bus must be named as "axi". - reg: * For "marvell,armada-3700-sdhci", two register areas. @@ -106,8 +108,8 @@ Example: compatible = "marvell,armada-ap806-sdhci"; reg = <0xaa0000 0x1000>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH> - clocks = <&emmc_clk>; - clock-names = "core"; + clocks = <&emmc_clk>,<&axi_clk>; + clock-names = "core", "axi"; bus-width = <4>; marvell,xenon-phy-slow-mode; marvell,xenon-tun-count = <11>; @@ -126,8 +128,8 @@ Example: interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH> vqmmc-supply = <&sd_vqmmc_regulator>; vmmc-supply = <&sd_vmmc_regulator>; - clocks = <&sdclk>; - clock-names = "core"; + clocks = <&sdclk>, <&axi_clk>; + clock-names = "core", "axi"; bus-width = <4>; marvell,xenon-tun-count = <9>; }; diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 36f528a..8caa607 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -210,8 +210,11 @@ path as another overlay mount and it may use a lower layer path that is beneath or above the path of another overlay lower layer path. Using an upper layer path and/or a workdir path that are already used by -another overlay mount is not allowed and will fail with EBUSY. Using +another overlay mount is not allowed and may fail with EBUSY. Using partially overlapping paths is not allowed but will not fail with EBUSY. +If files are accessed from two overlayfs mounts which share or overlap the +upper layer and/or workdir path the behavior of the overlay is undefined, +though it will not result in a crash or deadlock. Mounting an overlay using an upper layer path, where the upper layer path was previously used by another mounted overlay in combination with a diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index 0500193..d477024 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -36,6 +36,7 @@ Supported adapters: * Intel Gemini Lake (SOC) * Intel Cannon Lake-H (PCH) * Intel Cannon Lake-LP (PCH) + * Intel Cedar Fork (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 57f52cd..9ba04c0 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -2387,7 +2387,7 @@ broadcast: Like active-backup, there is not much advantage to this and packet type ID), so in a "gatewayed" configuration, all outgoing traffic will generally use the same device. Incoming traffic may also end up on a single device, but that is - dependent upon the balancing policy of the peer's 8023.ad + dependent upon the balancing policy of the peer's 802.3ad implementation. In a "local" configuration, traffic will be distributed across the devices in the bond. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 82fc399..61e43cc 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -25,6 +25,7 @@ Below are the essential guides that every developer should read. submitting-patches coding-style email-clients + kernel-enforcement-statement Other guides to the community that are of interest to most developers are: diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst new file mode 100644 index 0000000..1e23d42 --- /dev/null +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -0,0 +1,147 @@ +Linux Kernel Enforcement Statement +---------------------------------- + +As developers of the Linux kernel, we have a keen interest in how our software +is used and how the license for our software is enforced. Compliance with the +reciprocal sharing obligations of GPL-2.0 is critical to the long-term +sustainability of our software and community. + +Although there is a right to enforce the separate copyright interests in the +contributions made to our community, we share an interest in ensuring that +individual enforcement actions are conducted in a manner that benefits our +community and do not have an unintended negative impact on the health and +growth of our software ecosystem. In order to deter unhelpful enforcement +actions, we agree that it is in the best interests of our development +community to undertake the following commitment to users of the Linux kernel +on behalf of ourselves and any successors to our copyright interests: + + Notwithstanding the termination provisions of the GPL-2.0, we agree that + it is in the best interests of our development community to adopt the + following provisions of GPL-3.0 as additional permissions under our + license with respect to any non-defensive assertion of rights under the + license. + + However, if you cease all violation of this License, then your license + from a particular copyright holder is reinstated (a) provisionally, + unless and until the copyright holder explicitly and finally + terminates your license, and (b) permanently, if the copyright holder + fails to notify you of the violation by some reasonable means prior to + 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from that + copyright holder, and you cure the violation prior to 30 days after + your receipt of the notice. + +Our intent in providing these assurances is to encourage more use of the +software. We want companies and individuals to use, modify and distribute +this software. We want to work with users in an open and transparent way to +eliminate any uncertainty about our expectations regarding compliance or +enforcement that might limit adoption of our software. We view legal action +as a last resort, to be initiated only when other community efforts have +failed to resolve the problem. + +Finally, once a non-compliance issue is resolved, we hope the user will feel +welcome to join us in our efforts on this project. Working together, we will +be stronger. + +Except where noted below, we speak only for ourselves, and not for any company +we might work for today, have in the past, or will in the future. + + - Bjorn Andersson (Linaro) + - Andrea Arcangeli (Red Hat) + - Neil Armstrong + - Jens Axboe + - Pablo Neira Ayuso + - Khalid Aziz + - Ralf Baechle + - Felipe Balbi + - Arnd Bergmann + - Ard Biesheuvel + - Paolo Bonzini (Red Hat) + - Christian Borntraeger + - Mark Brown (Linaro) + - Paul Burton + - Javier Martinez Canillas + - Rob Clark + - Jonathan Corbet + - Vivien Didelot (Savoir-faire Linux) + - Hans de Goede (Red Hat) + - Mel Gorman (SUSE) + - Sven Eckelmann + - Alex Elder (Linaro) + - Fabio Estevam + - Larry Finger + - Bhumika Goyal + - Andy Gross + - Juergen Gross + - Shawn Guo + - Ulf Hansson + - Tejun Heo + - Rob Herring + - Masami Hiramatsu + - Michal Hocko + - Simon Horman + - Johan Hovold (Hovold Consulting AB) + - Christophe JAILLET + - Olof Johansson + - Lee Jones (Linaro) + - Heiner Kallweit + - Srinivas Kandagatla + - Jan Kara + - Shuah Khan (Samsung) + - David Kershner + - Jaegeuk Kim + - Namhyung Kim + - Colin Ian King + - Jeff Kirsher + - Greg Kroah-Hartman (Linux Foundation) + - Christian König + - Vinod Koul + - Krzysztof Kozlowski + - Viresh Kumar + - Aneesh Kumar K.V + - Julia Lawall + - Doug Ledford (Red Hat) + - Chuck Lever (Oracle) + - Daniel Lezcano + - Shaohua Li + - Xin Long (Red Hat) + - Tony Luck + - Mike Marshall + - Chris Mason + - Paul E. McKenney + - David S. Miller + - Ingo Molnar + - Kuninori Morimoto + - Borislav Petkov + - Jiri Pirko + - Josh Poimboeuf + - Sebastian Reichel (Collabora) + - Guenter Roeck + - Joerg Roedel + - Leon Romanovsky + - Steven Rostedt (VMware) + - Ivan Safonov + - Ivan Safonov + - Anna Schumaker + - Jes Sorensen + - K.Y. Srinivasan + - Heiko Stuebner + - Jiri Kosina (SUSE) + - Dmitry Torokhov + - Linus Torvalds + - Thierry Reding + - Rik van Riel + - Geert Uytterhoeven (Glider bvba) + - Daniel Vetter + - Linus Walleij + - Richard Weinberger + - Dan Williams + - Rafael J. Wysocki + - Arvind Yadav + - Masahiro Yamada + - Wei Yongjun + - Lv Zheng diff --git a/MAINTAINERS b/MAINTAINERS index f205662..e652a3e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5259,7 +5259,8 @@ S: Maintained F: drivers/iommu/exynos-iommu.c EZchip NPS platform support -M: Noam Camus <noamc@ezchip.com> +M: Elad Kanfi <eladkan@mellanox.com> +M: Vineet Gupta <vgupta@synopsys.com> S: Supported F: arch/arc/plat-eznps F: arch/arc/boot/dts/eznps.dts @@ -5345,9 +5346,7 @@ M: "J. Bruce Fields" <bfields@fieldses.org> L: linux-fsdevel@vger.kernel.org S: Maintained F: include/linux/fcntl.h -F: include/linux/fs.h F: include/uapi/linux/fcntl.h -F: include/uapi/linux/fs.h F: fs/fcntl.c F: fs/locks.c @@ -5356,6 +5355,8 @@ M: Alexander Viro <viro@zeniv.linux.org.uk> L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/* +F: include/linux/fs.h +F: include/uapi/linux/fs.h FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio <riku.voipio@iki.fi> @@ -7570,7 +7571,7 @@ F: arch/mips/include/asm/kvm* F: arch/mips/kvm/ KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) -M: Alexander Graf <agraf@suse.com> +M: Paul Mackerras <paulus@ozlabs.org> L: kvm-ppc@vger.kernel.org W: http://www.linux-kvm.org/ T: git git://github.com/agraf/linux-2.6.git @@ -9360,7 +9361,7 @@ NETWORK BLOCK DEVICE (NBD) M: Josef Bacik <jbacik@fb.com> S: Maintained L: linux-block@vger.kernel.org -L: nbd-general@lists.sourceforge.net +L: nbd@other.debian.org F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c F: include/uapi/linux/nbd.h @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* @@ -933,7 +933,11 @@ ifdef CONFIG_STACK_VALIDATION ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else - $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + ifdef CONFIG_ORC_UNWINDER + $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + else + $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + endif SKIP_STACK_VALIDATION := 1 export SKIP_STACK_VALIDATION endif diff --git a/arch/Kconfig b/arch/Kconfig index 1aafb4e..d789a89 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -937,9 +937,6 @@ config STRICT_MODULE_RWX and non-text memory will be made non-executable. This provides protection against certain security exploits (e.g. writing to text) -config ARCH_WANT_RELAX_ORDER - bool - config ARCH_HAS_REFCOUNT bool help diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index a598641..c84e67f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -24,7 +24,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select HAVE_FUTEX_CMPXCHG + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 3a4b52b..d37f49d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,8 +6,6 @@ # published by the Free Software Foundation. # -UTS_MACHINE := arc - ifeq ($(CROSS_COMPILE),) ifndef CONFIG_CPU_BIG_ENDIAN CROSS_COMPILE := arc-linux- diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 2367a67..e114000 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -44,7 +44,14 @@ mmcclk: mmcclk { compatible = "fixed-clock"; - clock-frequency = <50000000>; + /* + * DW sdio controller has external ciu clock divider + * controlled via register in SDIO IP. It divides + * sdio_ref_clk (which comes from CGU) by 16 for + * default. So default mmcclk clock (which comes + * to sdk_in) is 25000000 Hz. + */ + clock-frequency = <25000000>; #clock-cells = <0>; }; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 229d13a..8adde1b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -12,6 +12,7 @@ /dts-v1/; #include <dt-bindings/net/ti-dp83867.h> +#include <dt-bindings/reset/snps,hsdk-reset.h> / { model = "snps,hsdk"; @@ -57,10 +58,10 @@ }; }; - core_clk: core-clk { + input_clk: input-clk { #clock-cells = <0>; compatible = "fixed-clock"; - clock-frequency = <500000000>; + clock-frequency = <33333333>; }; cpu_intc: cpu-interrupt-controller { @@ -102,6 +103,19 @@ ranges = <0x00000000 0xf0000000 0x10000000>; + cgu_rst: reset-controller@8a0 { + compatible = "snps,hsdk-reset"; + #reset-cells = <1>; + reg = <0x8A0 0x4>, <0xFF0 0x4>; + }; + + core_clk: core-clk@0 { + compatible = "snps,hsdk-core-pll-clock"; + reg = <0x00 0x10>, <0x14B8 0x4>; + #clock-cells = <0>; + clocks = <&input_clk>; + }; + serial: serial@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; @@ -120,7 +134,17 @@ mmcclk_ciu: mmcclk-ciu { compatible = "fixed-clock"; - clock-frequency = <100000000>; + /* + * DW sdio controller has external ciu clock divider + * controlled via register in SDIO IP. Due to its + * unexpected default value (it should devide by 1 + * but it devides by 8) SDIO IP uses wrong clock and + * works unstable (see STAR 9001204800) + * So add temporary fix and change clock frequency + * from 100000000 to 12500000 Hz until we fix dw sdio + * driver itself. + */ + clock-frequency = <12500000>; #clock-cells = <0>; }; @@ -141,6 +165,8 @@ clocks = <&gmacclk>; clock-names = "stmmaceth"; phy-handle = <&phy0>; + resets = <&cgu_rst HSDK_ETH_RESET>; + reset-names = "stmmaceth"; mdio { #address-cells = <1>; diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 6980b96..ec7c849 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -105,7 +105,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 2233f57..63d3cf6 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -104,7 +104,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 30a3d4c..f613eca 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -107,7 +107,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 821a2e5..3507be2 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -84,5 +84,5 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9a3fcf4..15f0f6b 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -63,6 +63,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set +CONFIG_RESET_HSDK=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y @@ -72,7 +73,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index c0d6a01..4fcf4f2 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -94,7 +94,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 5c09717..7b71464 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -98,7 +98,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index ba8e802..b1c56d3 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -98,6 +98,7 @@ /* Auxiliary registers */ #define AUX_IDENTITY 4 +#define AUX_EXEC_CTRL 8 #define AUX_INTR_VEC_BASE 0x25 #define AUX_VOL 0x5e @@ -135,12 +136,12 @@ struct bcr_identity { #endif }; -struct bcr_isa { +struct bcr_isa_arcv2 { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1, - pad1:11, atomic1:1, ver:8; + pad1:12, ver:8; #else - unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1, + unsigned int ver:8, pad1:12, be:1, atomic:1, unalign:1, ldd:1, pad2:4, div_rem:4; #endif }; @@ -263,13 +264,13 @@ struct cpuinfo_arc { struct cpuinfo_arc_mmu mmu; struct cpuinfo_arc_bpu bpu; struct bcr_identity core; - struct bcr_isa isa; + struct bcr_isa_arcv2 isa; const char *details, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, - fpu_sp:1, fpu_dp:1, pad2:6, + fpu_sp:1, fpu_dp:1, dual_iss_enb:1, dual_iss_exist:1, pad2:4, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 877cec8..fb83844 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -51,6 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = { { 0x51, "R2.0" }, { 0x52, "R2.1" }, { 0x53, "R3.0" }, + { 0x54, "R4.0" }, #endif { 0x00, NULL } }; @@ -62,6 +63,7 @@ static const struct id_to_str arc_cpu_nm[] = { #else { 0x40, "ARC EM" }, { 0x50, "ARC HS38" }, + { 0x54, "ARC HS48" }, #endif { 0x00, "Unknown" } }; @@ -119,11 +121,11 @@ static void read_arc_build_cfg_regs(void) struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; const struct id_to_str *tbl; + struct bcr_isa_arcv2 isa; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); - READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { if (cpu->core.family == tbl->id) { @@ -133,7 +135,7 @@ static void read_arc_build_cfg_regs(void) } for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { - if ((cpu->core.family & 0xF0) == tbl->id) + if ((cpu->core.family & 0xF4) == tbl->id) break; } cpu->name = tbl->str; @@ -192,6 +194,14 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.full = bpu.ft; cpu->bpu.num_cache = 256 << bpu.bce; cpu->bpu.num_pred = 2048 << bpu.pte; + + if (cpu->core.family >= 0x54) { + unsigned int exec_ctrl; + + READ_BCR(AUX_EXEC_CTRL, exec_ctrl); + cpu->extn.dual_iss_exist = 1; + cpu->extn.dual_iss_enb = exec_ctrl & 1; + } } READ_BCR(ARC_REG_AP_BCR, bcr); @@ -205,18 +215,25 @@ static void read_arc_build_cfg_regs(void) cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; + READ_BCR(ARC_REG_ISA_CFG_BCR, isa); + /* some hacks for lack of feature BCR info in old ARC700 cores */ if (is_isa_arcompact()) { - if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + if (!isa.ver) /* ISA BCR absent, use Kconfig info */ cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - else - cpu->isa.atomic = cpu->isa.atomic1; + else { + /* ARC700_BUILD only has 2 bits of isa info */ + struct bcr_generic bcr = *(struct bcr_generic *)&isa; + cpu->isa.atomic = bcr.info & 1; + } cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); /* there's no direct way to distinguish 750 vs. 770 */ if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) cpu->name = "ARC750"; + } else { + cpu->isa = isa; } } @@ -232,10 +249,11 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n", + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n", cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", - IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); + IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), + IS_AVAIL3(cpu->extn.dual_iss_exist, cpu->extn.dual_iss_enb, " Dual-Issue")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac679..cf14ebc 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -111,6 +111,13 @@ static void __init axs10x_early_init(void) axs10x_enable_gpio_intc_wire(); + /* + * Reset ethernet IP core. + * TODO: get rid of this quirk after axs10x reset driver (or simple + * reset driver) will be available in upstream. + */ + iowrite32((1 << 5), (void __iomem *) CREG_MB_SW_RESET); + scnprintf(mb, 32, "MainBoard v%d", mb_rev); axs10x_print_board_ver(CREG_MB_VER, mb); } diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 5a6ed5a..bd08de4 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -6,4 +6,5 @@ # menuconfig ARC_SOC_HSDK - bool "ARC HS Development Kit SOC" + bool "ARC HS Development Kit SOC" + select CLK_HSDK diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index a2e7fd1..744e62e 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,6 +38,42 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) +#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) +#define CREG_CORE_IF_CLK_DIV_2 0x1 +#define CGU_BASE ARC_PERIPHERAL_BASE +#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) +#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) +#define CGU_PLL_STATUS_LOCK BIT(0) +#define CGU_PLL_STATUS_ERR BIT(1) +#define CGU_PLL_CTRL_1GHZ 0x3A10 +#define HSDK_PLL_LOCK_TIMEOUT 500 + +#define HSDK_PLL_LOCKED() \ + !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) + +#define HSDK_PLL_ERR() \ + !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) + +static void __init hsdk_set_cpu_freq_1ghz(void) +{ + u32 timeout = HSDK_PLL_LOCK_TIMEOUT; + + /* + * As we set cpu clock which exceeds 500MHz, the divider for the interface + * clock must be programmed to div-by-2. + */ + iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); + + /* Set cpu clock to 1GHz */ + iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); + + while (!HSDK_PLL_LOCKED() && timeout--) + cpu_relax(); + + if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) + pr_err("Failed to setup CPU frequency to 1GHz!"); +} + static void __init hsdk_init_early(void) { /* @@ -52,6 +88,12 @@ static void __init hsdk_init_early(void) /* Really apply settings made above */ writel(1, (void __iomem *) CREG_PAE_UPDATE); + + /* + * Setup CPU frequency to 1GHz. + * TODO: remove it after smart hsdk pll driver will be introduced. + */ + hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 3585a5e..f7c4d21 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -95,16 +95,19 @@ #define KERNEL_END _end /* - * The size of the KASAN shadow region. This should be 1/8th of the - * size of the entire kernel virtual address space. + * KASAN requires 1/8th of the kernel virtual address space for the shadow + * region. KASAN can bloat the stack significantly, so double the (minimum) + * stack size when KASAN is in use. */ #ifdef CONFIG_KASAN #define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#define KASAN_THREAD_SHIFT 1 #else #define KASAN_SHADOW_SIZE (0) +#define KASAN_THREAD_SHIFT 0 #endif -#define MIN_THREAD_SHIFT 14 +#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* * VMAP'd stacks are allocated at page granularity, so we must ensure that such diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index f0e6d71..d06fbe4 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -649,4 +649,4 @@ static int __init armv8_deprecated_init(void) return 0; } -late_initcall(armv8_deprecated_init); +core_initcall(armv8_deprecated_init); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cd52d36..21e2c95 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1307,4 +1307,4 @@ static int __init enable_mrs_emulation(void) return 0; } -late_initcall(enable_mrs_emulation); +core_initcall(enable_mrs_emulation); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f444f37..5d547de 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -444,4 +444,4 @@ static int __init fpsimd_init(void) return 0; } -late_initcall(fpsimd_init); +core_initcall(fpsimd_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2069e9b..b64958b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -97,7 +97,7 @@ static void data_abort_decode(unsigned int esr) (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); } else { - pr_alert(" ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK); + pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); } pr_alert(" CM = %lu, WnR = %lu\n", diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 903f3bf..7e25c5c 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -155,14 +155,16 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return __cmpxchg_small(ptr, old, new, size); case 4: - return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, old, new); + return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, + (u32)old, new); case 8: /* lld/scd are only available for MIPS64 */ if (!IS_ENABLED(CONFIG_64BIT)) return __cmpxchg_called_with_bad_pointer(); - return __cmpxchg_asm("lld", "scd", (volatile u64 *)ptr, old, new); + return __cmpxchg_asm("lld", "scd", (volatile u64 *)ptr, + (u64)old, new); default: return __cmpxchg_called_with_bad_pointer(); diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 100f23d..ac584c5 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -183,18 +183,20 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv) } static struct plat_stmmacenet_data ls1x_eth0_pdata = { - .bus_id = 0, - .phy_addr = -1, + .bus_id = 0, + .phy_addr = -1, #if defined(CONFIG_LOONGSON1_LS1B) - .interface = PHY_INTERFACE_MODE_MII, + .interface = PHY_INTERFACE_MODE_MII, #elif defined(CONFIG_LOONGSON1_LS1C) - .interface = PHY_INTERFACE_MODE_RMII, + .interface = PHY_INTERFACE_MODE_RMII, #endif - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .init = ls1x_eth_mux_init, + .mdio_bus_data = &ls1x_mdio_bus_data, + .dma_cfg = &ls1x_eth_dma_cfg, + .has_gmac = 1, + .tx_coe = 1, + .rx_queues_to_use = 1, + .tx_queues_to_use = 1, + .init = ls1x_eth_mux_init, }; static struct resource ls1x_eth0_resources[] = { @@ -222,14 +224,16 @@ struct platform_device ls1x_eth0_pdev = { #ifdef CONFIG_LOONGSON1_LS1B static struct plat_stmmacenet_data ls1x_eth1_pdata = { - .bus_id = 1, - .phy_addr = -1, - .interface = PHY_INTERFACE_MODE_MII, - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .init = ls1x_eth_mux_init, + .bus_id = 1, + .phy_addr = -1, + .interface = PHY_INTERFACE_MODE_MII, + .mdio_bus_data = &ls1x_mdio_bus_data, + .dma_cfg = &ls1x_eth_dma_cfg, + .has_gmac = 1, + .tx_coe = 1, + .rx_queues_to_use = 1, + .tx_queues_to_use = 1, + .init = ls1x_eth_mux_init, }; static struct resource ls1x_eth1_resources[] = { diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 192542d..16d9ef5 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2558,7 +2558,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.S operation\n"); return SIGILL; } } @@ -2719,7 +2718,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.D operation\n"); return SIGILL; } } diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 7646891..01b7a87 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -667,7 +667,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, { int src, dst, r, td, ts, mem_off, b_off; bool need_swap, did_move, cmp_eq; - unsigned int target; + unsigned int target = 0; u64 t64; s64 t64s; int bpf_op = BPF_OP(insn->code); diff --git a/arch/mips/tools/generic-board-config.sh b/arch/mips/tools/generic-board-config.sh index 5c4f936..654d652 100755 --- a/arch/mips/tools/generic-board-config.sh +++ b/arch/mips/tools/generic-board-config.sh @@ -30,8 +30,6 @@ cfg="$4" boards_origin="$5" shift 5 -cd "${srctree}" - # Only print Skipping... lines if the user explicitly specified BOARDS=. In the # general case it only serves to obscure the useful output about what actually # was included. @@ -48,7 +46,7 @@ environment*) esac for board in $@; do - board_cfg="arch/mips/configs/generic/board-${board}.config" + board_cfg="${srctree}/arch/mips/configs/generic/board-${board}.config" if [ ! -f "${board_cfg}" ]; then echo "WARNING: Board config '${board_cfg}' not found" continue @@ -84,7 +82,7 @@ for board in $@; do done || continue # Merge this board config fragment into our final config file - ./scripts/kconfig/merge_config.sh \ + ${srctree}/scripts/kconfig/merge_config.sh \ -m -O ${objtree} ${cfg} ${board_cfg} \ | grep -Ev '^(#|Using)' done diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index c6d6272..7baa226 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -35,12 +35,12 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(__xchg8); EXPORT_SYMBOL(__xchg32); EXPORT_SYMBOL(__cmpxchg_u32); +EXPORT_SYMBOL(__cmpxchg_u64); #ifdef CONFIG_SMP EXPORT_SYMBOL(__atomic_hash); #endif #ifdef CONFIG_64BIT EXPORT_SYMBOL(__xchg64); -EXPORT_SYMBOL(__cmpxchg_u64); #endif #include <linux/uaccess.h> diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index a45a67d..30f9239 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -146,7 +146,7 @@ void machine_power_off(void) /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); - lockup_detector_suspend(); + lockup_detector_soft_poweroff(); for (;;); } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 23de307..41e60a9 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -742,7 +742,7 @@ lws_compare_and_swap_2: 10: ldd 0(%r25), %r25 11: ldd 0(%r24), %r24 #else - /* Load new value into r22/r23 - high/low */ + /* Load old value into r22/r23 - high/low */ 10: ldw 0(%r25), %r22 11: ldw 4(%r25), %r23 /* Load new value into fr4 for atomic store later */ @@ -834,11 +834,11 @@ cas2_action: copy %r0, %r28 #else /* Compare first word */ -19: ldw,ma 0(%r26), %r29 +19: ldw 0(%r26), %r29 sub,= %r29, %r22, %r0 b,n cas2_end /* Compare second word */ -20: ldw,ma 4(%r26), %r29 +20: ldw 4(%r26), %r29 sub,= %r29, %r23, %r0 b,n cas2_end /* Perform the store */ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 2d956aa..8c0105a 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -253,7 +253,10 @@ static int __init init_cr16_clocksource(void) cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; for_each_online_cpu(cpu) { - if (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc) + if (cpu == 0) + continue; + if ((cpu0_loc != 0) && + (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; clocksource_cr16.name = "cr16_unstable"; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1df770e..7275fed 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: - __flush_tlb_power8(POWER8_TLB_SETS); + __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL); break; case PVR_POWER9: - __flush_tlb_power9(POWER9_TLB_SETS_HASH); + __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL); break; default: pr_err("unknown CPU version for boot TLB flush\n"); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 48da0f5..b82586c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -734,7 +734,29 @@ EXC_REAL(program_check, 0x700, 0x100) EXC_VIRT(program_check, 0x4700, 0x100, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) - EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + /* + * It's possible to receive a TM Bad Thing type program check with + * userspace register values (in particular r1), but with SRR1 reporting + * that we came from the kernel. Normally that would confuse the bad + * stack logic, and we would report a bad kernel stack pointer. Instead + * we switch to the emergency stack if we're taking a TM Bad Thing from + * the kernel. + */ + li r10,MSR_PR /* Build a mask of MSR_PR .. */ + oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */ + and r10,r10,r12 /* Mask SRR1 with that. */ + srdi r10,r10,8 /* Shift it so we can compare */ + cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */ + bne 1f /* If != go to normal path. */ + + /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */ + andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */ + /* 3 in EXCEPTION_PROLOG_COMMON */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + b 3f /* Jump into the macro !! */ +1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b76ca19..72f153c6 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -624,5 +624,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long __machine_check_early_realmode_p9(struct pt_regs *regs) { + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 25 is set. This + * will result in the MCE handler seeing an unknown event and the kernel + * crashing. An MCE that occurs like this is spurious, so we don't need + * to do anything in terms of servicing it. If there is something that + * needs to be serviced, the CPU will raise the MCE again with the + * correct DSISR so that it can be serviced properly. So detect this + * case and mark it as handled. + */ + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) + return 1; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0ac741f..2e3bc16 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -904,9 +904,6 @@ void __init setup_arch(char **cmdline_p) #endif #endif -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); #endif diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c83c115..b2c0029 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, if (MSR_TM_RESV(msr)) return -EINVAL; - /* pull in MSR TM from user context */ + /* pull in MSR TS bits from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + /* + * Ensure that TM is enabled in regs->msr before we leave the signal + * handler. It could be the case that (a) user disabled the TM bit + * through the manipulation of the MSR bits in uc_mcontext or (b) the + * TM bit was disabled because a sufficient number of context switches + * happened whilst in the signal handler and load_tm overflowed, + * disabling the TM bit. In either case we can end up with an illegal + * TM state leading to a TM Bad Thing when we return to userspace. + */ + regs->msr |= MSR_TM; + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index c98e90b..b4e2b71 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub) * - we have no stack frame and can not allocate one * - LR points back to the original caller (in A) * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + * - r0, r11 & r12 are free */ livepatch_handler: CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) + addi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) + std r2, -24(r11) mflr r12 - std r12, -16(r1) + std r12, -16(r11) /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 + std r12, -8(r11) /* Put ctr in r12 for global entry and branch there */ mfctr r12 @@ -216,36 +207,30 @@ livepatch_handler: /* * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we + * caller A. We are free to use r11, r12 and we can use r2 until we * restore it. */ CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) /* Check stack marker hasn't been trashed */ lis r2, STACK_END_MAGIC@h ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) + ld r12, -8(r11) 1: tdne r12, r2 EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) + ld r12, -16(r11) mtlr r12 - ld r2, -24(r1) + ld r2, -24(r11) /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 + CURRENT_THREAD_INFO(r12, r1) + subi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Return to original caller of live patched function */ blr diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 2f6eadd..c702a89 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) return 0; - if (watchdog_suspended) - return 0; - if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; @@ -358,36 +355,39 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_reconfigure(void) +void watchdog_nmi_stop(void) { int cpu; - watchdog_calc_timeouts(); - for_each_cpu(cpu, &wd_cpus_enabled) stop_wd_on_cpu(cpu); +} +void watchdog_nmi_start(void) +{ + int cpu; + + watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) start_wd_on_cpu(cpu); } /* - * This runs after lockup_detector_init() which sets up watchdog_cpumask. + * Invoked from core watchdog init. */ -static int __init powerpc_watchdog_init(void) +int __init watchdog_nmi_probe(void) { int err; - watchdog_calc_timeouts(); - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); - if (err < 0) + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "powerpc/watchdog:online", + start_wd_on_cpu, stop_wd_on_cpu); + if (err < 0) { pr_warn("Watchdog could not be initialized"); - + return err; + } return 0; } -arch_initcall(powerpc_watchdog_init); static void handle_backtrace_ipi(struct pt_regs *regs) { diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1330462..bf45784 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -622,7 +622,7 @@ int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server, return -EINVAL; state = &sb->irq_state[idx]; arch_spin_lock(&sb->lock); - *server = state->guest_server; + *server = state->act_server; *priority = state->guest_priority; arch_spin_unlock(&sb->lock); @@ -1331,7 +1331,7 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) xive->saved_src_count++; /* Convert saved state into something compatible with xics */ - val = state->guest_server; + val = state->act_server; prio = state->saved_scan_prio; if (prio == MASKED) { @@ -1507,7 +1507,6 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* First convert prio and mark interrupt as untargetted */ act_prio = xive_prio_from_guest(guest_prio); state->act_priority = MASKED; - state->guest_server = server; /* * We need to drop the lock due to the mutex below. Hopefully diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 5938f76..6ba63f8 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -35,7 +35,6 @@ struct kvmppc_xive_irq_state { struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */ /* Targetting as set by guest */ - u32 guest_server; /* Current guest selected target */ u8 guest_priority; /* Guest set priority */ u8 saved_priority; /* Saved priority when masking */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5e8418c..f208f56 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1684,11 +1684,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * Logical instructions */ case 26: /* cntlzw */ - op->val = __builtin_clz((unsigned int) regs->gpr[rd]); + val = (unsigned int) regs->gpr[rd]; + op->val = ( val ? __builtin_clz(val) : 32 ); goto logical_done; #ifdef __powerpc64__ case 58: /* cntlzd */ - op->val = __builtin_clzl(regs->gpr[rd]); + val = regs->gpr[rd]; + op->val = ( val ? __builtin_clzl(val) : 64 ); goto logical_done; #endif case 28: /* and */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b95c584..a51df9e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1438,7 +1438,6 @@ out: int arch_update_cpu_topology(void) { - lockdep_assert_cpus_held(); return numa_update_cpu_topology(true); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 65eda19..f6c7f54 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -361,9 +361,9 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) break; } wmb(); + local_irq_restore(flags); flush_tlb_kernel_range((unsigned long)page_address(start), (unsigned long)page_address(page)); - local_irq_restore(flags); return err; } diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ccac86..8812624 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event *event) /* Take the mutex lock for this node and then decrement the reference count */ mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given node. + * + * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given node and make + * an OPAL call to disable the engine in that node. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, @@ -523,8 +537,8 @@ static int core_imc_mem_init(int cpu, int size) /* We need only vbase for core counters */ mem_info->vbase = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!mem_info->vbase) return -ENOMEM; @@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event *event) return; mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given core. + * + * In the cpuhotplug offline path, ppc_core_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given core and make + * an OPAL call to disable the engine in that core. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, @@ -763,8 +791,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size) * free the memory in cpu offline path. */ local_mem = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!local_mem) return -ENOMEM; @@ -1148,7 +1176,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) } /* Only free the attr_groups which are dynamically allocated */ - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); return; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 897aa14..bbb73aa 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -272,7 +272,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) { - return 256UL * 1024 * 1024; + /* + * We map the kernel linear region with 1GB large pages on radix. For + * memory hot unplug to work our memory block size must be at least + * this size. + */ + if (radix_enabled()) + return 1UL * 1024 * 1024 * 1024; + else + return 256UL * 1024 * 1024; } #endif diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f387318..a3b8d7d 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1402,6 +1402,14 @@ void xive_teardown_cpu(void) if (xive_ops->teardown_cpu) xive_ops->teardown_cpu(cpu, xc); + +#ifdef CONFIG_SMP + /* Get rid of IPI */ + xive_cleanup_cpu_ipi(cpu, xc); +#endif + + /* Disable and free the queues */ + xive_cleanup_cpu_queues(cpu, xc); } void xive_kexec_teardown_cpu(int secondary) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f24a70b..d9c4c93 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -431,7 +431,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { + if (!xc->hw_ipi) + return; + xive_irq_bitmap_free(xc->hw_ipi); + xc->hw_ipi = 0; } #endif /* CONFIG_SMP */ diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index afa46a7..04e042e 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -27,6 +27,7 @@ CONFIG_NET=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -59,6 +60,7 @@ CONFIG_CONFIGFS_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1cee675..495ff69 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -293,7 +293,10 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->lpp = LPP_MAGIC; lc->current_pid = tsk->pid; lc->user_timer = tsk->thread.user_timer; + lc->guest_timer = tsk->thread.guest_timer; lc->system_timer = tsk->thread.system_timer; + lc->hardirq_timer = tsk->thread.hardirq_timer; + lc->softirq_timer = tsk->thread.softirq_timer; lc->steal_timer = 0; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 0be3828..4e83f95 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -44,7 +44,6 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP - select ARCH_WANT_RELAX_ORDER config SPARC32 def_bool !64BIT diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8a13d46..50e0d2b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -176,7 +176,7 @@ /* * This is a sneaky trick to help the unwinder find pt_regs on the stack. The * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just setting the LSB, which makes it an invalid stack address and is also + * is just clearing the MSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the @@ -185,7 +185,7 @@ .macro ENCODE_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER mov %esp, %ebp - orl $0x1, %ebp + andl $0x7fffffff, %ebp #endif .endm diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 829e89c..9fb9a1f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void) return 0; } - if (lockup_detector_suspend() != 0) { - pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); - return 0; - } + cpus_read_lock(); + + hardlockup_detector_perf_stop(); x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); @@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void) x86_pmu.commit_scheduling = NULL; x86_pmu.stop_scheduling = NULL; - lockup_detector_resume(); - - cpus_read_lock(); + hardlockup_detector_perf_restart(); for_each_online_cpu(c) free_excl_cntrs(c); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1a8eb55..a5db63f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); +u32 hv_max_vp_index; + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; @@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu) hv_vp_index[smp_processor_id()] = msr_vp_index; + if (msr_vp_index > hv_max_vp_index) + hv_max_vp_index = msr_vp_index; + return 0; } diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 39e7f6e..9cc9e1c 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex { /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu *pcpu_flush; +static struct hv_flush_pcpu __percpu **pcpu_flush; -static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex; +static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + flush->hv_vp_set.bank_contents[vcpu_bank] = 0; + /* * Some banks may end up being empty but this is acceptable. */ @@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, vcpu = hv_cpu_number_to_vp_number(cpu); vcpu_bank = vcpu / 64; vcpu_offset = vcpu % 64; - - /* valid_bank_mask can represent up to 64 banks */ - if (vcpu_bank >= 64) - return 0; - __set_bit(vcpu_offset, (unsigned long *) &flush->hv_vp_set.bank_contents[vcpu_bank]); if (vcpu_bank >= nr_bank) @@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { int cpu, vcpu, gva_n, max_gvas; + struct hv_flush_pcpu **flush_pcpu; struct hv_flush_pcpu *flush; u64 status = U64_MAX; unsigned long flags; @@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush = this_cpu_ptr(pcpu_flush); + flush_pcpu = this_cpu_ptr(pcpu_flush); + + if (unlikely(!*flush_pcpu)) + *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto do_native; + } if (info->mm) { flush->address_space = virt_to_phys(info->mm->pgd); @@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, const struct flush_tlb_info *info) { int nr_bank = 0, max_gvas, gva_n; + struct hv_flush_pcpu_ex **flush_pcpu; struct hv_flush_pcpu_ex *flush; u64 status = U64_MAX; unsigned long flags; @@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush = this_cpu_ptr(pcpu_flush_ex); + flush_pcpu = this_cpu_ptr(pcpu_flush_ex); + + if (unlikely(!*flush_pcpu)) + *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto do_native; + } if (info->mm) { flush->address_space = virt_to_phys(info->mm->pgd); @@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, - 0, nr_bank + 2, flush, NULL); + 0, nr_bank, flush, NULL); } else if (info->end && ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, - 0, nr_bank + 2, flush, NULL); + 0, nr_bank, flush, NULL); } else { gva_n = fill_gva_list(flush->gva_list, nr_bank, info->start, info->end); status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, - gva_n, nr_bank + 2, flush, NULL); + gva_n, nr_bank, flush, NULL); } local_irq_restore(flags); @@ -266,7 +295,7 @@ void hyper_alloc_mmu(void) return; if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); + pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); else - pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); + pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); } diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e7636ba..6c98821 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -62,8 +62,10 @@ #define new_len2 145f-144f /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c096624..ccbe24e 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end) alt_end_marker ":\n" /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax * - * The additional "-" is needed because gas works with s32s. + * The additional "-" is needed because gas uses a "true" value of -1. */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" /* * Pad the second replacement alternative with additional NOPs if it is diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7c..59ad3d1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); @@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_guest_init() do {} while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1812649..8edac1d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -187,7 +187,6 @@ struct mca_msr_regs { extern struct mce_vendor_flags mce_flags; -extern struct mca_config mca_cfg; extern struct mca_msr_regs msr_ops; enum mce_notifier_prios { diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c120b5d..3c856a1 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) DEBUG_LOCKS_WARN_ON(preemptible()); } -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_clear_cpu(cpu, mm_cpumask(mm)); -} +void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 738503e..530f448 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, * to this information. */ extern u32 *hv_vp_index; +extern u32 hv_max_vp_index; /** * hv_cpu_number_to_vp_number() - Map CPU to VP. diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 4893abf..d362161 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -83,6 +83,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) #endif /* + * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point + * to init_mm when we switch to a kernel thread (e.g. the idle thread). If + * it's false, then we immediately switch CR3 when entering a kernel thread. + */ +DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode); + +/* * 6 because 6 should be plenty and struct tlb_state will fit in * two cache lines. */ @@ -105,6 +112,23 @@ struct tlb_state { u16 next_asid; /* + * We can be in one of several states: + * + * - Actively using an mm. Our CPU's bit will be set in + * mm_cpumask(loaded_mm) and is_lazy == false; + * + * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit + * will not be set in mm_cpumask(&init_mm) and is_lazy == false. + * + * - Lazily using a real mm. loaded_mm != &init_mm, our bit + * is set in mm_cpumask(loaded_mm), but is_lazy == true. + * We're heuristically guessing that the CR3 load we + * skipped more than makes up for the overhead added by + * lazy mode. + */ + bool is_lazy; + + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d705c76..ff89177 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void) return ~0U; } +static u32 skx_deadline_rev(void) +{ + switch (boot_cpu_data.x86_mask) { + case 0x03: return 0x01000136; + case 0x04: return 0x02000014; + } + + return ~0U; +} + static const struct x86_cpu_id deadline_match[] = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014), + DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20), @@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void) const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || + boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; m = x86_match_cpu(deadline_match); diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 098530a..debb974 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -1,3 +1,6 @@ +#ifndef __X86_MCE_INTERNAL_H__ +#define __X86_MCE_INTERNAL_H__ + #include <linux/device.h> #include <asm/mce.h> @@ -108,3 +111,7 @@ static inline void mce_work_trigger(void) { } static inline void mce_register_injector_chain(struct notifier_block *nb) { } static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } #endif + +extern struct mca_config mca_cfg; + +#endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 40e28ed..486f640 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -28,6 +28,8 @@ #include <asm/msr.h> #include <asm/trace/irq_vectors.h> +#include "mce-internal.h" + #define NR_BLOCKS 5 #define THRESHOLD_MAX 0xFFF #define INT_TYPE_APIC 0x00020000 diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 86e8f0b..c4fa4a8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (!have_cpuid_p()) - return *res; - /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not * completely accurate as xen pv guests don't see that CPUID bit set but @@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { unsigned int cpuid_1_eax; + bool intel = true; - if (check_loader_disabled_bsp()) + if (!have_cpuid_p()) return; cpuid_1_eax = native_cpuid_eax(1); switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (x86_family(cpuid_1_eax) >= 6) - load_ucode_intel_bsp(); + if (x86_family(cpuid_1_eax) < 6) + return; break; + case X86_VENDOR_AMD: - if (x86_family(cpuid_1_eax) >= 0x10) - load_ucode_amd_bsp(cpuid_1_eax); + if (x86_family(cpuid_1_eax) < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(cpuid_1_eax); } static bool check_loader_disabled_ap(void) diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index db2182d..3fc0f9a 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -3,6 +3,15 @@ /* Kprobes and Optprobes common header */ +#include <asm/asm.h> + +#ifdef CONFIG_FRAME_POINTER +# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \ + " mov %" _ASM_SP ", %" _ASM_BP "\n" +#else +# define SAVE_RBP_STRING " push %" _ASM_BP "\n" +#endif + #ifdef CONFIG_X86_64 #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax. */ \ @@ -17,7 +26,7 @@ " pushq %r10\n" \ " pushq %r11\n" \ " pushq %rbx\n" \ - " pushq %rbp\n" \ + SAVE_RBP_STRING \ " pushq %r12\n" \ " pushq %r13\n" \ " pushq %r14\n" \ @@ -48,7 +57,7 @@ " pushl %es\n" \ " pushl %ds\n" \ " pushl %eax\n" \ - " pushl %ebp\n" \ + SAVE_RBP_STRING \ " pushl %edi\n" \ " pushl %esi\n" \ " pushl %edx\n" \ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f015371..0742491 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * raw stack chunk with redzones: */ __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); - regs->flags &= ~X86_EFLAGS_IF; - trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e675704..8bb9594 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -void kvm_async_pf_task_wait(u32 token) +/* + * @interrupt_kernel: Is this called from a routine which interrupts the kernel + * (other than user space)? + */ +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1 || - rcu_preempt_depth(); + n.halted = is_idle_task(current) || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) + ? preempt_count() > 1 || rcu_preempt_depth() + : interrupt_kernel); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); @@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - kvm_async_pf_task_wait((u32)read_cr2()); + kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 54180fa..add33f6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type) load_cr3(initial_page_table); #else write_cr3(real_mode_header->trampoline_pgd); + + /* Exiting long mode will fail if CR4.PCIDE is set. */ + if (static_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); #endif /* Jump to the identity-mapped low memory code */ diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index d145a0b..3dc26f9 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state) state->stack_info.type, state->stack_info.next_sp, state->stack_mask, state->graph_idx); - for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; + sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) break; @@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state) * This determines if the frame pointer actually contains an encoded pointer to * pt_regs on the stack. See ENCODE_FRAME_POINTER. */ +#ifdef CONFIG_X86_64 static struct pt_regs *decode_frame_pointer(unsigned long *bp) { unsigned long regs = (unsigned long)bp; @@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) return (struct pt_regs *)(regs & ~0x1); } +#else +static struct pt_regs *decode_frame_pointer(unsigned long *bp) +{ + unsigned long regs = (unsigned long)bp; + + if (regs & 0x80000000) + return NULL; + + return (struct pt_regs *)(regs | 0x80000000); +} +#endif + +#ifdef CONFIG_X86_32 +#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) +#else +#define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) +#endif static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) @@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state, regs = decode_frame_pointer(next_bp); if (regs) { frame = (unsigned long *)regs; - len = regs_size(regs); + len = KERNEL_REGS_SIZE; state->got_irq = true; } else { frame = next_bp; @@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state, frame < prev_frame_end) return false; + /* + * On 32-bit with user mode regs, make sure the last two regs are safe + * to access: + */ + if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && + !on_stack(info, frame, len + 2*sizeof(long))) + return false; + /* Move state to the next frame: */ if (regs) { state->regs = regs; @@ -328,6 +355,13 @@ bad_address: state->regs->sp < (unsigned long)task_pt_regs(state->task)) goto the_end; + /* + * There are some known frame pointer issues on 32-bit. Disable + * unwinder warnings on 32-bit until it gets objtool support. + */ + if (IS_ENABLED(CONFIG_X86_32)) + goto the_end; + if (state->regs) { printk_deferred_once(KERN_WARNING "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3ea6244..3c48bc8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM depends on HIGH_RES_TIMERS # for TASKSTATS/TASK_DELAY_ACCT: depends on NET && MULTIUSER + depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a36254c..d90cdc7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -425,8 +425,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); #op " %al \n\t" \ FOP_RET -asm(".global kvm_fastop_exception \n" - "kvm_fastop_exception: xor %esi, %esi; ret"); +asm(".pushsection .fixup, \"ax\"\n" + ".global kvm_fastop_exception \n" + "kvm_fastop_exception: xor %esi, %esi; ret\n" + ".popsection"); FOP_START(setcc) FOP_SETCC(seto) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca30c1..7a69cf0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, case KVM_PV_REASON_PAGE_NOT_PRESENT: vcpu->arch.apf.host_apf_reason = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address); + kvm_async_pf_task_wait(fault_address, 0); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: @@ -3974,19 +3974,19 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) { /* - * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set - * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means - * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. - */ - gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - - /* * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. * If it is clear, there are no large pages at this level, so clear * PT_PAGE_SIZE_MASK in gpte if that is the case. */ gpte &= level - mmu->last_nonleaf_level; + /* + * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set + * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means + * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + */ + gpte |= level - PT_PAGE_TABLE_LEVEL - 1; + return gpte & PT_PAGE_SIZE_MASK; } @@ -4555,6 +4555,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); + update_last_nonleaf_level(vcpu, context); reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 86b68dc..f18d1f8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -334,10 +334,11 @@ retry_walk: --walker->level; index = PT_INDEX(addr, walker->level); - table_gfn = gpte_to_gfn(pte); offset = index * sizeof(pt_element_t); pte_gpa = gfn_to_gpa(table_gfn) + offset; + + BUG_ON(walker->level < 1); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a2b804e..95a0160 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11297,7 +11297,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, /* Same as above - no reason to call set_cr4_guest_host_mask(). */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); - kvm_set_cr4(vcpu, vmcs12->host_cr4); + vmx_set_cr4(vcpu, vmcs12->host_cr4); nested_ept_uninit_mmu_context(vcpu); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 72bf8c0..e1f0958 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,12 @@ -# Kernel does not boot with instrumentation of tlb.c. -KCOV_INSTRUMENT_tlb.o := n +# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_mem_encrypt.o := n + +KASAN_SANITIZE_mem_encrypt.o := n + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mem_encrypt.o = -pg +endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o physaddr.o setup_nx.o tlb.o diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 49d9778..658bf00 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,6 +30,8 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); +DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode); + static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) { @@ -80,7 +82,7 @@ void leave_mm(int cpu) return; /* Warn if we're not lazy. */ - WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))); + WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy)); switch_mm(NULL, &init_mm, NULL); } @@ -142,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, __flush_tlb_all(); } #endif + this_cpu_write(cpu_tlbstate.is_lazy, false); if (real_prev == next) { VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); - if (cpumask_test_cpu(cpu, mm_cpumask(next))) { - /* - * There's nothing to do: we weren't lazy, and we - * aren't changing our mm. We don't need to flush - * anything, nor do we need to update CR3, CR4, or - * LDTR. - */ - return; - } - - /* Resume remote flushes and then read tlb_gen. */ - cpumask_set_cpu(cpu, mm_cpumask(next)); - next_tlb_gen = atomic64_read(&next->context.tlb_gen); - - if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) < - next_tlb_gen) { - /* - * Ideally, we'd have a flush_tlb() variant that - * takes the known CR3 value as input. This would - * be faster on Xen PV and on hypothetical CPUs - * on which INVPCID is fast. - */ - this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen, - next_tlb_gen); - write_cr3(build_cr3(next, prev_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, - TLB_FLUSH_ALL); - } - /* - * We just exited lazy mode, which means that CR4 and/or LDTR - * may be stale. (Changes to the required CR4 and LDTR states - * are not reflected in tlb_gen.) + * We don't currently support having a real mm loaded without + * our cpu set in mm_cpumask(). We have all the bookkeeping + * in place to figure out whether we would need to flush + * if our cpu were cleared in mm_cpumask(), but we don't + * currently use it. */ + if (WARN_ON_ONCE(real_prev != &init_mm && + !cpumask_test_cpu(cpu, mm_cpumask(next)))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + + return; } else { u16 new_asid; bool need_flush; @@ -199,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } /* Stop remote flushes for the previous mm */ - if (cpumask_test_cpu(cpu, mm_cpumask(real_prev))) - cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); - - VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && + real_prev != &init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); /* * Start remote flushes and then read tlb_gen. @@ -233,6 +213,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } /* + * enter_lazy_tlb() is a hint from the scheduler that we are entering a + * kernel thread or other context without an mm. Acceptable implementations + * include doing nothing whatsoever, switching to init_mm, or various clever + * lazy tricks to try to minimize TLB flushes. + * + * The scheduler reserves the right to call enter_lazy_tlb() several times + * in a row. It will notify us that we're going back to a real mm by + * calling switch_mm_irqs_off(). + */ +void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) + return; + + if (static_branch_unlikely(&tlb_use_lazy_mode)) { + /* + * There's a significant optimization that may be possible + * here. We have accurate enough TLB flush tracking that we + * don't need to maintain coherence of TLB per se when we're + * lazy. We do, however, need to maintain coherence of + * paging-structure caches. We could, in principle, leave our + * old mm loaded and only switch to init_mm when + * tlb_remove_page() happens. + */ + this_cpu_write(cpu_tlbstate.is_lazy, true); + } else { + switch_mm(NULL, &init_mm, NULL); + } +} + +/* * Call this when reinitializing a CPU. It fixes the following potential * problems: * @@ -303,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, /* This code cannot presently handle being reentered. */ VM_WARN_ON(!irqs_disabled()); + if (unlikely(loaded_mm == &init_mm)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) { + if (this_cpu_read(cpu_tlbstate.is_lazy)) { /* - * We're in lazy mode -- don't flush. We can get here on - * remote flushes due to races and on local flushes if a - * kernel thread coincidentally flushes the mm it's lazily - * still using. + * We're in lazy mode. We need to at least flush our + * paging-structure cache to avoid speculatively reading + * garbage into our TLB. Since switching to init_mm is barely + * slower than a minimal flush, just switch to init_mm. */ + switch_mm_irqs_off(NULL, &init_mm, NULL); return; } @@ -611,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); + +static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[2]; + + buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0'; + buf[1] = '\n'; + + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t tlblazy_write_file(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + bool val; + + if (kstrtobool_from_user(user_buf, count, &val)) + return -EINVAL; + + if (val) + static_branch_enable(&tlb_use_lazy_mode); + else + static_branch_disable(&tlb_use_lazy_mode); + + return count; +} + +static const struct file_operations fops_tlblazy = { + .read = tlblazy_read_file, + .write = tlblazy_write_file, + .llseek = default_llseek, +}; + +static int __init init_tlb_use_lazy_mode(void) +{ + if (boot_cpu_has(X86_FEATURE_PCID)) { + /* + * Heuristic: with PCID on, switching to and from + * init_mm is reasonably fast, but remote flush IPIs + * as expensive as ever, so turn off lazy TLB mode. + * + * We can't do this in setup_pcid() because static keys + * haven't been initialized yet, and it would blow up + * badly. + */ + static_branch_disable(&tlb_use_lazy_mode); + } + + debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR, + arch_debugfs_dir, NULL, &fops_tlblazy); + return 0; +} +late_initcall(init_tlb_use_lazy_mode); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0e7ef69..d669e9d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -93,11 +93,11 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), int rc; rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, - "x86/xen/hvm_guest:prepare", + "x86/xen/guest:prepare", cpu_up_prepare_cb, cpu_dead_cb); if (rc >= 0) { rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "x86/xen/hvm_guest:online", + "x86/xen/guest:online", xen_cpu_up_online, NULL); if (rc < 0) cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); diff --git a/block/bio.c b/block/bio.c index b38e962..101c2a9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1239,8 +1239,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, */ bmd->is_our_pages = map_data ? 0 : 1; memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - iov_iter_init(&bmd->iter, iter->type, bmd->iov, - iter->nr_segs, iter->count); + bmd->iter = *iter; + bmd->iter.iov = bmd->iov; ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); @@ -1331,6 +1331,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, int ret, offset; struct iov_iter i; struct iovec iov; + struct bio_vec *bvec; iov_for_each(iov, i, *iter) { unsigned long uaddr = (unsigned long) iov.iov_base; @@ -1375,7 +1376,12 @@ struct bio *bio_map_user_iov(struct request_queue *q, ret = get_user_pages_fast(uaddr, local_nr_pages, (iter->type & WRITE) != WRITE, &pages[cur_page]); - if (ret < local_nr_pages) { + if (unlikely(ret < local_nr_pages)) { + for (j = cur_page; j < page_limit; j++) { + if (!pages[j]) + break; + put_page(pages[j]); + } ret = -EFAULT; goto out_unmap; } @@ -1383,6 +1389,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, offset = offset_in_page(uaddr); for (j = cur_page; j < page_limit; j++) { unsigned int bytes = PAGE_SIZE - offset; + unsigned short prev_bi_vcnt = bio->bi_vcnt; if (len <= 0) break; @@ -1397,6 +1404,13 @@ struct bio *bio_map_user_iov(struct request_queue *q, bytes) break; + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(pages[j]); + len -= bytes; offset = 0; } @@ -1423,10 +1437,8 @@ struct bio *bio_map_user_iov(struct request_queue *q, return bio; out_unmap: - for (j = 0; j < nr_pages; j++) { - if (!pages[j]) - break; - put_page(pages[j]); + bio_for_each_segment_all(bvec, bio, j) { + put_page(bvec->bv_page); } out: kfree(pages); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 980e730..de294d7 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -815,10 +815,14 @@ int blk_mq_debugfs_register(struct request_queue *q) goto err; /* - * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir + * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir * didn't exist yet (because we don't know what to name the directory * until the queue is registered to a gendisk). */ + if (q->elevator && !q->sched_debugfs_dir) + blk_mq_debugfs_register_sched(q); + + /* Similarly, blk_mq_init_hctx() couldn't do this previously. */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) goto err; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0fea76a..17816a0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1911,11 +1911,11 @@ static void throtl_upgrade_state(struct throtl_data *td) tg->disptime = jiffies - 1; throtl_select_dispatch(sq); - throtl_schedule_next_dispatch(sq, false); + throtl_schedule_next_dispatch(sq, true); } rcu_read_unlock(); throtl_select_dispatch(&td->service_queue); - throtl_schedule_next_dispatch(&td->service_queue, false); + throtl_schedule_next_dispatch(&td->service_queue, true); queue_work(kthrotld_workqueue, &td->dispatch_work); } diff --git a/block/bsg-lib.c b/block/bsg-lib.c index dbddff8..15d25cc 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -207,20 +207,34 @@ static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) struct bsg_job *job = blk_mq_rq_to_pdu(req); struct scsi_request *sreq = &job->sreq; + /* called right after the request is allocated for the request_queue */ + + sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp); + if (!sreq->sense) + return -ENOMEM; + + return 0; +} + +static void bsg_initialize_rq(struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + void *sense = sreq->sense; + + /* called right before the request is given to the request_queue user */ + memset(job, 0, sizeof(*job)); scsi_req_init(sreq); + + sreq->sense = sense; sreq->sense_len = SCSI_SENSE_BUFFERSIZE; - sreq->sense = kzalloc(sreq->sense_len, gfp); - if (!sreq->sense) - return -ENOMEM; job->req = req; - job->reply = sreq->sense; + job->reply = sense; job->reply_len = sreq->sense_len; job->dd_data = job + 1; - - return 0; } static void bsg_exit_rq(struct request_queue *q, struct request *req) @@ -251,6 +265,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, q->cmd_size = sizeof(struct bsg_job) + dd_job_size; q->init_rq_fn = bsg_init_rq; q->exit_rq_fn = bsg_exit_rq; + q->initialize_rq_fn = bsg_initialize_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); diff --git a/crypto/shash.c b/crypto/shash.c index 5e31c8d..325a14d 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -41,7 +41,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, int err; absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); - buffer = kmalloc(absize, GFP_KERNEL); + buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; @@ -275,12 +275,14 @@ static int shash_async_finup(struct ahash_request *req) int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) { - struct scatterlist *sg = req->src; - unsigned int offset = sg->offset; unsigned int nbytes = req->nbytes; + struct scatterlist *sg; + unsigned int offset; int err; - if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) { + if (nbytes && + (sg = req->src, offset = sg->offset, + nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; data = kmap_atomic(sg_page(sg)); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 4faa0fd..d5692e3 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -426,14 +426,9 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) static int skcipher_walk_first(struct skcipher_walk *walk) { - walk->nbytes = 0; - if (WARN_ON_ONCE(in_irq())) return -EDEADLK; - if (unlikely(!walk->total)) - return 0; - walk->buffer = NULL; if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { int err = skcipher_copy_iv(walk); @@ -452,10 +447,15 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + walk->total = req->cryptlen; + walk->nbytes = 0; + + if (unlikely(!walk->total)) + return 0; + scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->total = req->cryptlen; walk->iv = req->iv; walk->oiv = req->iv; @@ -509,6 +509,11 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, struct crypto_aead *tfm = crypto_aead_reqtfm(req); int err; + walk->nbytes = 0; + + if (unlikely(!walk->total)) + return 0; + walk->flags &= ~SKCIPHER_WALK_PHYS; scatterwalk_start(&walk->in, req->src); diff --git a/crypto/xts.c b/crypto/xts.c index d86c11a..e31828e 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -554,8 +554,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) ctx->name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; + "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) { + err = -ENAMETOOLONG; + goto err_drop_spawn; + } } else goto err_drop_spawn; diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 9565d57..de56394 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1178,12 +1178,44 @@ dev_put: return ret; } +static bool __init iort_enable_acs(struct acpi_iort_node *iort_node) +{ + if (iort_node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { + struct acpi_iort_node *parent; + struct acpi_iort_id_mapping *map; + int i; + + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, iort_node, + iort_node->mapping_offset); + + for (i = 0; i < iort_node->mapping_count; i++, map++) { + if (!map->output_reference) + continue; + + parent = ACPI_ADD_PTR(struct acpi_iort_node, + iort_table, map->output_reference); + /* + * If we detect a RC->SMMU mapping, make sure + * we enable ACS on the system. + */ + if ((parent->type == ACPI_IORT_NODE_SMMU) || + (parent->type == ACPI_IORT_NODE_SMMU_V3)) { + pci_request_acs(); + return true; + } + } + } + + return false; +} + static void __init iort_init_platform_devices(void) { struct acpi_iort_node *iort_node, *iort_end; struct acpi_table_iort *iort; struct fwnode_handle *fwnode; int i, ret; + bool acs_enabled = false; /* * iort_table and iort both point to the start of IORT table, but @@ -1203,6 +1235,9 @@ static void __init iort_init_platform_devices(void) return; } + if (!acs_enabled) + acs_enabled = iort_enable_acs(iort_node); + if ((iort_node->type == ACPI_IORT_NODE_SMMU) || (iort_node->type == ACPI_IORT_NODE_SMMU_V3)) { diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 3fb8ff5..e26ea20 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -571,10 +571,9 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data, * } * } * - * Calling this function with index %2 return %-ENOENT and with index %3 - * returns the last entry. If the property does not contain any more values - * %-ENODATA is returned. The NULL entry must be single integer and - * preferably contain value %0. + * Calling this function with index %2 or index %3 return %-ENOENT. If the + * property does not contain any more values %-ENOENT is returned. The NULL + * entry must be single integer and preferably contain value %0. * * Return: %0 on success, negative error code on failure. */ @@ -590,11 +589,11 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, data = acpi_device_data_of_node(fwnode); if (!data) - return -EINVAL; + return -ENOENT; ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj); if (ret) - return ret; + return ret == -EINVAL ? -ENOENT : -EINVAL; /* * The simplest case is when the value is a single reference. Just @@ -606,7 +605,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, ret = acpi_bus_get_device(obj->reference.handle, &device); if (ret) - return ret; + return ret == -ENODEV ? -EINVAL : ret; args->adev = device; args->nargs = 0; @@ -622,8 +621,10 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, * The index argument is then used to determine which reference * the caller wants (along with the arguments). */ - if (obj->type != ACPI_TYPE_PACKAGE || index >= obj->package.count) - return -EPROTO; + if (obj->type != ACPI_TYPE_PACKAGE) + return -EINVAL; + if (index >= obj->package.count) + return -ENOENT; element = obj->package.elements; end = element + obj->package.count; @@ -635,7 +636,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, ret = acpi_bus_get_device(element->reference.handle, &device); if (ret) - return -ENODEV; + return -EINVAL; nargs = 0; element++; @@ -649,11 +650,11 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, else if (type == ACPI_TYPE_LOCAL_REFERENCE) break; else - return -EPROTO; + return -EINVAL; } if (nargs > MAX_ACPI_REFERENCE_ARGS) - return -EPROTO; + return -EINVAL; if (idx == index) { args->adev = device; @@ -670,13 +671,13 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -ENOENT; element++; } else { - return -EPROTO; + return -EINVAL; } idx++; } - return -ENODATA; + return -ENOENT; } EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ab34239..0621a95 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2582,6 +2582,48 @@ static bool binder_proc_transaction(struct binder_transaction *t, return true; } +/** + * binder_get_node_refs_for_txn() - Get required refs on node for txn + * @node: struct binder_node for which to get refs + * @proc: returns @node->proc if valid + * @error: if no @proc then returns BR_DEAD_REPLY + * + * User-space normally keeps the node alive when creating a transaction + * since it has a reference to the target. The local strong ref keeps it + * alive if the sending process dies before the target process processes + * the transaction. If the source process is malicious or has a reference + * counting bug, relying on the local strong ref can fail. + * + * Since user-space can cause the local strong ref to go away, we also take + * a tmpref on the node to ensure it survives while we are constructing + * the transaction. We also need a tmpref on the proc while we are + * constructing the transaction, so we take that here as well. + * + * Return: The target_node with refs taken or NULL if no @node->proc is NULL. + * Also sets @proc if valid. If the @node->proc is NULL indicating that the + * target proc has died, @error is set to BR_DEAD_REPLY + */ +static struct binder_node *binder_get_node_refs_for_txn( + struct binder_node *node, + struct binder_proc **procp, + uint32_t *error) +{ + struct binder_node *target_node = NULL; + + binder_node_inner_lock(node); + if (node->proc) { + target_node = node; + binder_inc_node_nilocked(node, 1, 0, NULL); + binder_inc_node_tmpref_ilocked(node); + node->proc->tmp_ref++; + *procp = node->proc; + } else + *error = BR_DEAD_REPLY; + binder_node_inner_unlock(node); + + return target_node; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -2685,43 +2727,35 @@ static void binder_transaction(struct binder_proc *proc, ref = binder_get_ref_olocked(proc, tr->target.handle, true); if (ref) { - binder_inc_node(ref->node, 1, 0, NULL); - target_node = ref->node; - } - binder_proc_unlock(proc); - if (target_node == NULL) { + target_node = binder_get_node_refs_for_txn( + ref->node, &target_proc, + &return_error); + } else { binder_user_error("%d:%d got transaction to invalid handle\n", - proc->pid, thread->pid); + proc->pid, thread->pid); return_error = BR_FAILED_REPLY; - return_error_param = -EINVAL; - return_error_line = __LINE__; - goto err_invalid_target_handle; } + binder_proc_unlock(proc); } else { mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; - if (target_node == NULL) { + if (target_node) + target_node = binder_get_node_refs_for_txn( + target_node, &target_proc, + &return_error); + else return_error = BR_DEAD_REPLY; - mutex_unlock(&context->context_mgr_node_lock); - return_error_line = __LINE__; - goto err_no_context_mgr_node; - } - binder_inc_node(target_node, 1, 0, NULL); mutex_unlock(&context->context_mgr_node_lock); } - e->to_node = target_node->debug_id; - binder_node_lock(target_node); - target_proc = target_node->proc; - if (target_proc == NULL) { - binder_node_unlock(target_node); - return_error = BR_DEAD_REPLY; + if (!target_node) { + /* + * return_error is set above + */ + return_error_param = -EINVAL; return_error_line = __LINE__; goto err_dead_binder; } - binder_inner_proc_lock(target_proc); - target_proc->tmp_ref++; - binder_inner_proc_unlock(target_proc); - binder_node_unlock(target_node); + e->to_node = target_node->debug_id; if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; @@ -3071,6 +3105,8 @@ static void binder_transaction(struct binder_proc *proc, if (target_thread) binder_thread_dec_tmpref(target_thread); binder_proc_dec_tmpref(target_proc); + if (target_node) + binder_dec_node_tmpref(target_node); /* * write barrier to synchronize with initialization * of log entry @@ -3090,6 +3126,8 @@ err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); + if (target_node) + binder_dec_node_tmpref(target_node); target_node = NULL; t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); @@ -3104,13 +3142,14 @@ err_bad_call_stack: err_empty_call_stack: err_dead_binder: err_invalid_target_handle: -err_no_context_mgr_node: if (target_thread) binder_thread_dec_tmpref(target_thread); if (target_proc) binder_proc_dec_tmpref(target_proc); - if (target_node) + if (target_node) { binder_dec_node(target_node, 1, 0); + binder_dec_node_tmpref(target_node); + } binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", diff --git a/drivers/base/node.c b/drivers/base/node.c index 3855902..aae2402 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -27,13 +27,21 @@ static struct bus_type node_subsys = { static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) { + ssize_t n; + cpumask_var_t mask; struct node *node_dev = to_node(dev); - const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); - return cpumap_print_to_pagebuf(list, buf, mask); + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return 0; + + cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); + n = cpumap_print_to_pagebuf(list, buf, mask); + free_cpumask_var(mask); + + return n; } static inline ssize_t node_read_cpumask(struct device *dev, diff --git a/drivers/base/property.c b/drivers/base/property.c index d0b65bb..7ed99c1 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -21,6 +21,7 @@ #include <linux/phy.h> struct property_set { + struct device *dev; struct fwnode_handle fwnode; const struct property_entry *properties; }; @@ -682,6 +683,10 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_string); * Caller is responsible to call fwnode_handle_put() on the returned * args->fwnode pointer. * + * Returns: %0 on success + * %-ENOENT when the index is out of bounds, the index has an empty + * reference or the property was not found + * %-EINVAL on parse error */ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, @@ -891,6 +896,7 @@ static struct property_set *pset_copy_set(const struct property_set *pset) void device_remove_properties(struct device *dev) { struct fwnode_handle *fwnode; + struct property_set *pset; fwnode = dev_fwnode(dev); if (!fwnode) @@ -900,16 +906,16 @@ void device_remove_properties(struct device *dev) * the pset. If there is no real firmware node (ACPI/DT) primary * will hold the pset. */ - if (is_pset_node(fwnode)) { + pset = to_pset_node(fwnode); + if (pset) { set_primary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); } else { - fwnode = fwnode->secondary; - if (!IS_ERR(fwnode) && is_pset_node(fwnode)) { + pset = to_pset_node(fwnode->secondary); + if (pset && dev == pset->dev) set_secondary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); - } } + if (pset && dev == pset->dev) + pset_free_set(pset); } EXPORT_SYMBOL_GPL(device_remove_properties); @@ -938,6 +944,7 @@ int device_add_properties(struct device *dev, p->fwnode.ops = &pset_fwnode_ops; set_secondary_fwnode(dev, &p->fwnode); + p->dev = dev; return 0; } EXPORT_SYMBOL_GPL(device_add_properties); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4a438b8..2dfe99b 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -17,7 +17,7 @@ if BLK_DEV config BLK_DEV_NULL_BLK tristate "Null test block driver" - depends on CONFIGFS_FS + select CONFIGFS_FS config BLK_DEV_FD tristate "Normal floppy disk support" diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3684e21..baebbdf 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -243,7 +243,6 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, struct nbd_config *config = nbd->config; config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; - nbd_size_update(nbd); } static void nbd_complete_rq(struct request *req) @@ -820,9 +819,13 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * appropriate. */ ret = nbd_handle_cmd(cmd, hctx->queue_num); + if (ret < 0) + ret = BLK_STS_IOERR; + else if (!ret) + ret = BLK_STS_OK; complete(&cmd->send_complete); - return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK; + return ret; } static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, @@ -1090,6 +1093,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(recv_workqueue, &args->work); } + nbd_size_update(nbd); return error; } diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 7cedb42..64d0fc1 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2604,7 +2604,7 @@ static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s, return NULL; *dma_handle = dma_map_single(dev, buf, s->size, dir); if (dma_mapping_error(dev, *dma_handle)) { - kfree(buf); + kmem_cache_free(s, buf); buf = NULL; } return buf; diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c index c834f5a..4c10456 100644 --- a/drivers/clk/clk-bulk.c +++ b/drivers/clk/clk-bulk.c @@ -105,6 +105,7 @@ err: return ret; } +EXPORT_SYMBOL_GPL(clk_bulk_prepare); #endif /* CONFIG_HAVE_CLK_PREPARE */ diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index 62d7854..5970a50 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -315,13 +315,13 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(10), 8, GFLAGS), GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 0, GFLAGS), GATE(SCLK_PVTM_GPU, "clk_pvtm_gpu", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 1, GFLAGS), GATE(SCLK_PVTM_FUNC, "clk_pvtm_func", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 2, GFLAGS), GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(2), 15, GFLAGS), COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0, RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, @@ -541,7 +541,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(0, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), GATE(0, "pclk_mipiphy", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 2, GFLAGS), + GATE(0, "pclk_pmu", "pclk_pmu_pre", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 3, GFLAGS), /* PD_MMC */ @@ -577,6 +577,8 @@ static const char *const rk3128_critical_clocks[] __initconst = { "aclk_peri", "hclk_peri", "pclk_peri", + "pclk_pmu", + "sclk_timer5", }; static struct rockchip_clk_provider *__init rk3128_common_clk_init(struct device_node *np) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index e40b775..d8d3cb6 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -294,6 +294,18 @@ static const struct samsung_clk_reg_dump src_mask_suspend_e4210[] = { #define PLL_ENABLED (1 << 31) #define PLL_LOCKED (1 << 29) +static void exynos4_clk_enable_pll(u32 reg) +{ + u32 pll_con = readl(reg_base + reg); + pll_con |= PLL_ENABLED; + writel(pll_con, reg_base + reg); + + while (!(pll_con & PLL_LOCKED)) { + cpu_relax(); + pll_con = readl(reg_base + reg); + } +} + static void exynos4_clk_wait_for_pll(u32 reg) { u32 pll_con; @@ -315,6 +327,9 @@ static int exynos4_clk_suspend(void) samsung_clk_save(reg_base, exynos4_save_pll, ARRAY_SIZE(exynos4_clk_pll_regs)); + exynos4_clk_enable_pll(EPLL_CON0); + exynos4_clk_enable_pll(VPLL_CON0); + if (exynos4_soc == EXYNOS4210) { samsung_clk_save(reg_base, exynos4_save_soc, ARRAY_SIZE(exynos4210_clk_save)); diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index d9fbbf0..0f9754e 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -349,8 +349,6 @@ struct artpec6_crypto_aead_req_ctx { /* The crypto framework makes it hard to avoid this global. */ static struct device *artpec6_crypto_dev; -static struct dentry *dbgfs_root; - #ifdef CONFIG_FAULT_INJECTION static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read); static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full); @@ -2984,6 +2982,8 @@ struct dbgfs_u32 { char *desc; }; +static struct dentry *dbgfs_root; + static void artpec6_crypto_init_debugfs(void) { dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL); diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index b585ce5..4835dd4 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -553,9 +553,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct scatterlist sg[1], *tsg; - int err = 0, len = 0, reg, ncp; + int err = 0, len = 0, reg, ncp = 0; unsigned int i; - const u32 *buffer = (const u32 *)rctx->buffer; + u32 *buffer = (void *)rctx->buffer; rctx->sg = hdev->req->src; rctx->total = hdev->req->nbytes; @@ -620,10 +620,13 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) reg |= HASH_CR_DMAA; stm32_hash_write(hdev, HASH_CR, reg); - for (i = 0; i < DIV_ROUND_UP(ncp, sizeof(u32)); i++) - stm32_hash_write(hdev, HASH_DIN, buffer[i]); - - stm32_hash_set_nblw(hdev, ncp); + if (ncp) { + memset(buffer + ncp, 0, + DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } + stm32_hash_set_nblw(hdev, DIV_ROUND_UP(ncp, sizeof(u32))); reg = stm32_hash_read(hdev, HASH_STR); reg |= HASH_STR_DCAL; stm32_hash_write(hdev, HASH_STR, reg); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 66fb40d..0383063 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -383,7 +383,7 @@ err_put_fd: return err; } -static void sync_fill_fence_info(struct dma_fence *fence, +static int sync_fill_fence_info(struct dma_fence *fence, struct sync_fence_info *info) { strlcpy(info->obj_name, fence->ops->get_timeline_name(fence), @@ -399,6 +399,8 @@ static void sync_fill_fence_info(struct dma_fence *fence, test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? ktime_to_ns(fence->timestamp) : ktime_set(0, 0); + + return info->status; } static long sync_file_ioctl_fence_info(struct sync_file *sync_file, @@ -424,8 +426,12 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, * sync_fence_info and return the actual number of fences on * info->num_fences. */ - if (!info.num_fences) + if (!info.num_fences) { + info.status = dma_fence_is_signaled(sync_file->fence); goto no_fences; + } else { + info.status = 1; + } if (info.num_fences < num_fences) return -EINVAL; @@ -435,8 +441,10 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!fence_info) return -ENOMEM; - for (i = 0; i < num_fences; i++) - sync_fill_fence_info(fences[i], &fence_info[i]); + for (i = 0; i < num_fences; i++) { + int status = sync_fill_fence_info(fences[i], &fence_info[i]); + info.status = info.status <= 0 ? info.status : status; + } if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info, size)) { @@ -446,7 +454,6 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, no_fences: sync_file_get_name(sync_file, info.name, sizeof(info.name)); - info.status = dma_fence_is_signaled(sync_file->fence); info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 32905d5..339186f 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -212,11 +212,12 @@ struct msgdma_device { static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) { struct msgdma_sw_desc *desc; + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); list_del(&desc->node); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); INIT_LIST_HEAD(&desc->tx_list); @@ -306,13 +307,14 @@ static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) struct msgdma_device *mdev = to_mdev(tx->chan); struct msgdma_sw_desc *new; dma_cookie_t cookie; + unsigned long flags; new = tx_to_desc(tx); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); cookie = dma_cookie_assign(tx); list_add_tail(&new->node, &mdev->pending_list); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); return cookie; } @@ -336,17 +338,18 @@ msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, struct msgdma_extended_desc *desc; size_t copy; u32 desc_cnt; + unsigned long irqflags; desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); do { /* Allocate and populate the descriptor */ @@ -397,18 +400,19 @@ msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, u32 desc_cnt = 0, i; struct scatterlist *sg; u32 stride; + unsigned long irqflags; for_each_sg(sgl, sg, sg_len, i) desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); avail = sg_dma_len(sgl); @@ -566,10 +570,11 @@ static void msgdma_start_transfer(struct msgdma_device *mdev) static void msgdma_issue_pending(struct dma_chan *chan) { struct msgdma_device *mdev = to_mdev(chan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_start_transfer(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /** @@ -634,10 +639,11 @@ static void msgdma_free_descriptors(struct msgdma_device *mdev) static void msgdma_free_chan_resources(struct dma_chan *dchan) { struct msgdma_device *mdev = to_mdev(dchan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_free_descriptors(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); kfree(mdev->sw_desq); } @@ -682,8 +688,9 @@ static void msgdma_tasklet(unsigned long data) u32 count; u32 __maybe_unused size; u32 __maybe_unused status; + unsigned long flags; - spin_lock(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); /* Read number of responses that are available */ count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL); @@ -698,13 +705,13 @@ static void msgdma_tasklet(unsigned long data) * bits. So we need to just drop these values. */ size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED); - status = ioread32(mdev->resp - MSGDMA_RESP_STATUS); + status = ioread32(mdev->resp + MSGDMA_RESP_STATUS); msgdma_complete_descriptor(mdev); msgdma_chan_desc_cleanup(mdev); } - spin_unlock(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /** diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3879f80..a7ea20e 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1143,11 +1143,24 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); - unsigned int width, pset_len; + unsigned int width, pset_len, array_size; if (unlikely(!echan || !len)) return NULL; + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + if (len < SZ_64K) { /* * Transfer size less than 64K can be handled with one paRAM @@ -1169,7 +1182,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( * When the full_length is multibple of 32767 one slot can be * used to complete the transfer. */ - width = SZ_32K - 1; + width = array_size; pset_len = rounddown(len, width); /* One slot is enough for lengths multiple of (SZ_32K -1) */ if (unlikely(pset_len == len)) @@ -1217,7 +1230,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( } dest += pset_len; src += pset_len; - pset_len = width = len % (SZ_32K - 1); + pset_len = width = len % array_size; ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, width, pset_len, DMA_MEM_TO_MEM); diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 2f65a8f..f1d04b7 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -262,13 +262,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_lock(&xbar->mutex); map->xbar_out = find_first_zero_bit(xbar->dma_inuse, xbar->dma_requests); - mutex_unlock(&xbar->mutex); if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); map->xbar_in = (u16)dma_spec->args[0]; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3388d54..3f80f16 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -453,7 +453,8 @@ config GPIO_TS4800 config GPIO_THUNDERX tristate "Cavium ThunderX/OCTEON-TX GPIO" depends on ARCH_THUNDER || (64BIT && COMPILE_TEST) - depends on PCI_MSI && IRQ_DOMAIN_HIERARCHY + depends on PCI_MSI + select IRQ_DOMAIN_HIERARCHY select IRQ_FASTEOI_HIERARCHY_HANDLERS help Say yes here to support the on-chip GPIO lines on the ThunderX diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index dbf869f..3233b72 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -518,7 +518,13 @@ static int omap_gpio_irq_type(struct irq_data *d, unsigned type) if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) irq_set_handler_locked(d, handle_level_irq); else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) - irq_set_handler_locked(d, handle_edge_irq); + /* + * Edge IRQs are already cleared/acked in irq_handler and + * not need to be masked, as result handle_edge_irq() + * logic is excessed here and may cause lose of interrupts. + * So just use handle_simple_irq. + */ + irq_set_handler_locked(d, handle_simple_irq); return 0; @@ -678,7 +684,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) { void __iomem *isr_reg = NULL; - u32 isr; + u32 enabled, isr, level_mask; unsigned int bit; struct gpio_bank *bank = gpiobank; unsigned long wa_lock_flags; @@ -691,23 +697,21 @@ static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) pm_runtime_get_sync(bank->chip.parent); while (1) { - u32 isr_saved, level_mask = 0; - u32 enabled; - raw_spin_lock_irqsave(&bank->lock, lock_flags); enabled = omap_get_gpio_irqbank_mask(bank); - isr_saved = isr = readl_relaxed(isr_reg) & enabled; + isr = readl_relaxed(isr_reg) & enabled; if (bank->level_mask) level_mask = bank->level_mask & enabled; + else + level_mask = 0; /* clear edge sensitive interrupts before handler(s) are called so that we don't miss any interrupt occurred while executing them */ - omap_disable_gpio_irqbank(bank, isr_saved & ~level_mask); - omap_clear_gpio_irqbank(bank, isr_saved & ~level_mask); - omap_enable_gpio_irqbank(bank, isr_saved & ~level_mask); + if (isr & ~level_mask) + omap_clear_gpio_irqbank(bank, isr & ~level_mask); raw_spin_unlock_irqrestore(&bank->lock, lock_flags); @@ -1010,7 +1014,7 @@ static void omap_gpio_set(struct gpio_chip *chip, unsigned offset, int value) /*---------------------------------------------------------------------*/ -static void __init omap_gpio_show_rev(struct gpio_bank *bank) +static void omap_gpio_show_rev(struct gpio_bank *bank) { static bool called; u32 rev; diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 4d21135..eb4528c 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -203,7 +203,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, if (pin <= 255) { char ev_name[5]; - sprintf(ev_name, "_%c%02X", + sprintf(ev_name, "_%c%02hhX", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7ef6c28..bc74613 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -834,7 +834,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; - placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); if (unlikely(r)) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 4e53aae..0028591 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2960,6 +2960,7 @@ out: drm_modeset_backoff(&ctx); } + drm_atomic_state_put(state); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 19404c9..af289d3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3013,10 +3013,15 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { + unsigned long flags; + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } static void engine_set_wedged(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index d805b6e..27743be 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -606,11 +606,6 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, connector->encoder->base.id, connector->encoder->name); - /* ELD Conn_Type */ - connector->eld[5] &= ~(3 << 2); - if (intel_crtc_has_dp_encoder(crtc_state)) - connector->eld[5] |= (1 << 2); - connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; if (dev_priv->display.audio_codec_enable) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 183e87e..5d4cd3d 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1163,6 +1163,13 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = is_dvi && (child->common.device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; is_edp = is_dp && (child->common.device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } + info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1233,7 +1240,7 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ff9ecd2..b8315bc 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,7 +74,7 @@ #define I9XX_CSC_COEFF_1_0 \ ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool crtc_state_is_legacy(struct drm_crtc_state *state) +static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && @@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) } mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); - if (!crtc_state_is_legacy(state)) { + if (!crtc_state_is_legacy_gamma(state)) { mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); } @@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(state->crtc)->pipe; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state) glk_load_degamma_lut(state); - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) uint32_t i, lut_size; uint32_t word0, word1; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); @@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc, return 0; /* - * We also allow no degamma lut and a gamma lut at the legacy + * We also allow no degamma lut/ctm and a gamma lut at the legacy * size (256 entries). */ - if (!crtc_state->degamma_lut && - crtc_state->gamma_lut && - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) + if (crtc_state_is_legacy_gamma(crtc_state)) return 0; return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 965988f..92c1f8e 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -216,7 +216,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 4b4fd1f..476681d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1655,7 +1655,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, out: if (ret && IS_GEN9_LP(dev_priv)) { tmp = I915_READ(BXT_PHY_CTL(port)); - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | + BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) DRM_ERROR("Port %c enabled but PHY powered down? " "(PHY_CTL %08x)\n", port_name(port), tmp); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 00cd17c..5c7828c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10245,13 +10245,10 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder; struct drm_display_mode *mode; struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); + u32 htot, hsync, vtot, vsync; enum pipe pipe = intel_crtc->pipe; mode = kzalloc(sizeof(*mode), GFP_KERNEL); @@ -10279,6 +10276,13 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, i9xx_crtc_clock_get(intel_crtc, pipe_config); mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; + + cpu_transcoder = pipe_config->cpu_transcoder; + htot = I915_READ(HTOTAL(cpu_transcoder)); + hsync = I915_READ(HSYNC(cpu_transcoder)); + vtot = I915_READ(VTOTAL(cpu_transcoder)); + vsync = I915_READ(VSYNC(cpu_transcoder)); + mode->hdisplay = (htot & 0xffff) + 1; mode->htotal = ((htot & 0xffff0000) >> 16) + 1; mode->hsync_start = (hsync & 0xffff) + 1; @@ -12359,7 +12363,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; - bool hw_check = intel_state->modeset; u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -12376,7 +12379,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (needs_modeset(new_crtc_state) || to_intel_crtc_state(new_crtc_state)->update_pipe) { - hw_check = true; put_domains[to_intel_crtc(crtc)->pipe] = modeset_get_crtc_power_domains(crtc, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6413494..2031986 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2307,8 +2307,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ intel_display_power_put(dev_priv, intel_dp->aux_power_domain); @@ -5273,7 +5273,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, * seems sufficient to avoid this problem. */ if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", vbt.t11_t12); } diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 09b6709..de38d01 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = { }, }; -static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) -{ - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | - BIT(phy_info->channel[DPIO_CH0].port); -} - static const struct bxt_ddi_phy_info * bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) { @@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - enum port port; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - return true; } diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 951e834..28a778b 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -30,6 +30,21 @@ #include "intel_drv.h" #include "i915_drv.h" +static void intel_connector_update_eld_conn_type(struct drm_connector *connector) +{ + u8 conn_type; + + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + conn_type = DRM_ELD_CONN_TYPE_DP; + } else { + conn_type = DRM_ELD_CONN_TYPE_HDMI; + } + + connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] &= ~DRM_ELD_CONN_TYPE_MASK; + connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] |= conn_type; +} + /** * intel_connector_update_modes - update connector from edid * @connector: DRM connector device to use @@ -44,6 +59,8 @@ int intel_connector_update_modes(struct drm_connector *connector, ret = drm_add_edid_modes(connector, edid); drm_edid_to_eld(connector, edid); + intel_connector_update_eld_conn_type(connector); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index b66d8e1..49577eb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = power_well->id; bool wait_fuses = power_well->hsw.has_fuses; - enum skl_power_gate pg; + enum skl_power_gate uninitialized_var(pg); u32 val; if (wait_fuses) { @@ -2782,6 +2782,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 6. Enable DBUF */ gen9_dbuf_enable(dev_priv); + + if (resume && dev_priv->csr.dmc_payload) + intel_csr_load_program(dev_priv); } #undef CNL_PROCMON_IDX diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index dbb31a0..deaf869 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -248,7 +248,7 @@ disable_clks: clk_disable_unprepare(ahb_clk); disable_gdsc: regulator_disable(gdsc_reg); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); put_clk: clk_put(ahb_clk); put_gdsc: diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index c2bdad8..824067d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -83,6 +83,8 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .caps = MDP_LM_CAP_WB }, }, .nb_stages = 5, + .max_width = 2048, + .max_height = 0xFFFF, }, .dspp = { .count = 3, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 6fcb58a..4409776 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -804,8 +804,6 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); - pm_runtime_put_autosuspend(&pdev->dev); - set_cursor: ret = mdp5_ctl_set_cursor(ctl, pipeline, 0, cursor_enable); if (ret) { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f15821a0..ea5bb0e 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -610,17 +610,6 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - if (!exclusive) { - /* NOTE: _reserve_shared() must happen before _add_shared_fence(), - * which makes this a slightly strange place to call it. OTOH this - * is a convenient can-fail point to hook it in. (And similar to - * how etnaviv and nouveau handle this.) - */ - ret = reservation_object_reserve_shared(msm_obj->resv); - if (ret) - return ret; - } - fobj = reservation_object_get_list(msm_obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = reservation_object_get_excl(msm_obj->resv); @@ -1045,10 +1034,10 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, } vaddr = msm_gem_get_vaddr(obj); - if (!vaddr) { + if (IS_ERR(vaddr)) { msm_gem_put_iova(obj, aspace); drm_gem_object_unreference(obj); - return ERR_PTR(-ENOMEM); + return ERR_CAST(vaddr); } if (bo) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5d0a75d..93535ca 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -221,7 +221,7 @@ fail: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit) +static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) { int i, ret = 0; @@ -229,6 +229,20 @@ static int submit_fence_sync(struct msm_gem_submit *submit) struct msm_gem_object *msm_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; + if (!write) { + /* NOTE: _reserve_shared() must happen before + * _add_shared_fence(), which makes this a slightly + * strange place to call it. OTOH this is a + * convenient can-fail point to hook it in. + */ + ret = reservation_object_reserve_shared(msm_obj->resv); + if (ret) + return ret; + } + + if (no_implicit) + continue; + ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); if (ret) break; @@ -451,11 +465,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { - ret = submit_fence_sync(submit); - if (ret) - goto out; - } + ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); + if (ret) + goto out; ret = submit_pin_objects(submit); if (ret) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ffbff27..6a88703 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -718,7 +718,8 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_gem_put_iova(gpu->rb->bo, gpu->aspace); msm_ringbuffer_destroy(gpu->rb); } - if (gpu->aspace) { + + if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, NULL, 0); msm_gem_address_space_put(gpu->aspace); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 0366b80..ec56794 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -111,10 +111,14 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + /* Note that smp_load_acquire() is not strictly required + * as CIRC_SPACE_TO_END() does not access the tail more + * than once. + */ n = min(sz, circ_space_to_end(&rd->fifo)); memcpy(fptr, ptr, n); - fifo->head = (fifo->head + n) & (BUF_SZ - 1); + smp_store_release(&fifo->head, (fifo->head + n) & (BUF_SZ - 1)); sz -= n; ptr += n; @@ -145,13 +149,17 @@ static ssize_t rd_read(struct file *file, char __user *buf, if (ret) goto out; + /* Note that smp_load_acquire() is not strictly required + * as CIRC_CNT_TO_END() does not access the head more than + * once. + */ n = min_t(int, sz, circ_count_to_end(&rd->fifo)); if (copy_to_user(buf, fptr, n)) { ret = -EFAULT; goto out; } - fifo->tail = (fifo->tail + n) & (BUF_SZ - 1); + smp_store_release(&fifo->tail, (fifo->tail + n) & (BUF_SZ - 1)); *ppos += n; wake_up_all(&rd->fifo_event); diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 9ea6cd5..3cf1a69 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -302,26 +302,29 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, hdmi->mod_clk = devm_clk_get(dev, "mod"); if (IS_ERR(hdmi->mod_clk)) { dev_err(dev, "Couldn't get the HDMI mod clock\n"); - return PTR_ERR(hdmi->mod_clk); + ret = PTR_ERR(hdmi->mod_clk); + goto err_disable_bus_clk; } clk_prepare_enable(hdmi->mod_clk); hdmi->pll0_clk = devm_clk_get(dev, "pll-0"); if (IS_ERR(hdmi->pll0_clk)) { dev_err(dev, "Couldn't get the HDMI PLL 0 clock\n"); - return PTR_ERR(hdmi->pll0_clk); + ret = PTR_ERR(hdmi->pll0_clk); + goto err_disable_mod_clk; } hdmi->pll1_clk = devm_clk_get(dev, "pll-1"); if (IS_ERR(hdmi->pll1_clk)) { dev_err(dev, "Couldn't get the HDMI PLL 1 clock\n"); - return PTR_ERR(hdmi->pll1_clk); + ret = PTR_ERR(hdmi->pll1_clk); + goto err_disable_mod_clk; } ret = sun4i_tmds_create(hdmi); if (ret) { dev_err(dev, "Couldn't create the TMDS clock\n"); - return ret; + goto err_disable_mod_clk; } writel(SUN4I_HDMI_CTRL_ENABLE, hdmi->base + SUN4I_HDMI_CTRL_REG); @@ -362,7 +365,7 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, ret = sun4i_hdmi_i2c_create(dev, hdmi); if (ret) { dev_err(dev, "Couldn't create the HDMI I2C adapter\n"); - return ret; + goto err_disable_mod_clk; } drm_encoder_helper_add(&hdmi->encoder, @@ -422,6 +425,10 @@ err_cleanup_connector: drm_encoder_cleanup(&hdmi->encoder); err_del_i2c_adapter: i2c_del_adapter(hdmi->i2c); +err_disable_mod_clk: + clk_disable_unprepare(hdmi->mod_clk); +err_disable_bus_clk: + clk_disable_unprepare(hdmi->bus_clk); return ret; } @@ -434,6 +441,8 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, drm_connector_cleanup(&hdmi->connector); drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); + clk_disable_unprepare(hdmi->mod_clk); + clk_disable_unprepare(hdmi->bus_clk); } static const struct component_ops sun4i_hdmi_ops = { diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 6a573d2..658fa2d 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -405,6 +405,14 @@ int ipu_idmac_lock_enable(struct ipuv3_channel *channel, int num_bursts) return -EINVAL; } + /* + * IPUv3EX / i.MX51 has a different register layout, and on IPUv3M / + * i.MX53 channel arbitration locking doesn't seem to work properly. + * Allow enabling the lock feature on IPUv3H / i.MX6 only. + */ + if (bursts && ipu->ipu_type != IPUV3H) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(idmac_lock_en_info); i++) { if (channel->num == idmac_lock_en_info[i].chnum) break; diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index c35f74c..c860a79 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -73,6 +73,14 @@ #define IPU_PRE_STORE_ENG_CTRL_WR_NUM_BYTES(v) ((v & 0x7) << 1) #define IPU_PRE_STORE_ENG_CTRL_OUTPUT_ACTIVE_BPP(v) ((v & 0x3) << 4) +#define IPU_PRE_STORE_ENG_STATUS 0x120 +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_X_MASK 0xffff +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_X_SHIFT 0 +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_MASK 0x3fff +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_SHIFT 16 +#define IPU_PRE_STORE_ENG_STATUS_STORE_FIFO_FULL (1 << 30) +#define IPU_PRE_STORE_ENG_STATUS_STORE_FIELD (1 << 31) + #define IPU_PRE_STORE_ENG_SIZE 0x130 #define IPU_PRE_STORE_ENG_SIZE_INPUT_WIDTH(v) ((v & 0xffff) << 0) #define IPU_PRE_STORE_ENG_SIZE_INPUT_HEIGHT(v) ((v & 0xffff) << 16) @@ -93,6 +101,7 @@ struct ipu_pre { dma_addr_t buffer_paddr; void *buffer_virt; bool in_use; + unsigned int safe_window_end; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -160,6 +169,9 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, u32 active_bpp = info->cpp[0] >> 1; u32 val; + /* calculate safe window for ctrl register updates */ + pre->safe_window_end = height - 2; + writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); @@ -199,7 +211,24 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) { + unsigned long timeout = jiffies + msecs_to_jiffies(5); + unsigned short current_yblock; + u32 val; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + + do { + if (time_after(jiffies, timeout)) { + dev_warn(pre->dev, "timeout waiting for PRE safe window\n"); + return; + } + + val = readl(pre->regs + IPU_PRE_STORE_ENG_STATUS); + current_yblock = + (val >> IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_SHIFT) & + IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_MASK; + } while (current_yblock == 0 || current_yblock >= pre->safe_window_end); + writel(IPU_PRE_CTRL_SDW_UPDATE, pre->regs + IPU_PRE_CTRL_SET); } diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index ecc9ea4..0013ca9 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -14,6 +14,7 @@ #include <drm/drm_fourcc.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/iopoll.h> #include <linux/mfd/syscon.h> #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> #include <linux/module.h> @@ -329,6 +330,12 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, val = IPU_PRG_REG_UPDATE_REG_UPDATE; writel(val, prg->regs + IPU_PRG_REG_UPDATE); + /* wait for both double buffers to be filled */ + readl_poll_timeout(prg->regs + IPU_PRG_STATUS, val, + (val & IPU_PRG_STATUS_BUFFER0_READY(prg_chan)) && + (val & IPU_PRG_STATUS_BUFFER1_READY(prg_chan)), + 5, 1000); + clk_disable_unprepare(prg->clk_ipg); chan->enabled = true; diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 0a3117c..374301f 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -281,6 +281,7 @@ config HID_ELECOM Support for ELECOM devices: - BM084 Bluetooth Mouse - DEFT Trackball (Wired and wireless) + - HUGE Trackball (Wired and wireless) config HID_ELO tristate "ELO USB 4000/4500 touchscreen" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9bc9116..330ca98 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2032,6 +2032,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRELESS) }, #endif #if IS_ENABLED(CONFIG_HID_ELO) { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index e2c7465..54aeea5 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -3,6 +3,7 @@ * Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com> * Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com> * Copyright (c) 2017 Diego Elio Pettenò <flameeyes@flameeyes.eu> + * Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org> */ /* @@ -32,9 +33,11 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; case USB_DEVICE_ID_ELECOM_DEFT_WIRED: case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS: - /* The DEFT trackball has eight buttons, but its descriptor only - * reports five, disabling the three Fn buttons on the top of - * the mouse. + case USB_DEVICE_ID_ELECOM_HUGE_WIRED: + case USB_DEVICE_ID_ELECOM_HUGE_WIRELESS: + /* The DEFT/HUGE trackball has eight buttons, but its descriptor + * only reports five, disabling the three Fn buttons on the top + * of the mouse. * * Apply the following diff to the descriptor: * @@ -62,7 +65,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, * End Collection, End Collection, */ if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) { - hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n"); + hid_info(hdev, "Fixing up Elecom DEFT/HUGE Fn buttons\n"); rdesc[13] = 8; /* Button/Variable Report Count */ rdesc[21] = 8; /* Button/Variable Usage Maximum */ rdesc[29] = 0; /* Button/Constant Report Count */ @@ -76,6 +79,8 @@ static const struct hid_device_id elecom_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRELESS) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index a989191..be2e005 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -368,6 +368,8 @@ #define USB_DEVICE_ID_ELECOM_BM084 0x0061 #define USB_DEVICE_ID_ELECOM_DEFT_WIRED 0x00fe #define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS 0x00ff +#define USB_DEVICE_ID_ELECOM_HUGE_WIRED 0x010c +#define USB_DEVICE_ID_ELECOM_HUGE_WIRELESS 0x010d #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34 #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004 diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 089bad8..045b5da 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -975,6 +975,8 @@ static int usbhid_parse(struct hid_device *hid) unsigned int rsize = 0; char *rdesc; int ret, n; + int num_descriptors; + size_t offset = offsetof(struct hid_descriptor, desc); quirks = usbhid_lookup_quirk(le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); @@ -997,10 +999,18 @@ static int usbhid_parse(struct hid_device *hid) return -ENODEV; } + if (hdesc->bLength < sizeof(struct hid_descriptor)) { + dbg_hid("hid descriptor is too short\n"); + return -EINVAL; + } + hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; - for (n = 0; n < hdesc->bNumDescriptors; n++) + num_descriptors = min_t(int, hdesc->bNumDescriptors, + (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); + + for (n = 0; n < num_descriptors; n++) if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index efd5db7..894b67a 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -640,6 +640,7 @@ void vmbus_close(struct vmbus_channel *channel) */ return; } + mutex_lock(&vmbus_connection.channel_mutex); /* * Close all the sub-channels first and then close the * primary channel. @@ -648,16 +649,15 @@ void vmbus_close(struct vmbus_channel *channel) cur_channel = list_entry(cur, struct vmbus_channel, sc_list); vmbus_close_internal(cur_channel); if (cur_channel->rescind) { - mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(cur_channel, + hv_process_channel_removal( cur_channel->offermsg.child_relid); - mutex_unlock(&vmbus_connection.channel_mutex); } } /* * Now close the primary. */ vmbus_close_internal(channel); + mutex_unlock(&vmbus_connection.channel_mutex); } EXPORT_SYMBOL_GPL(vmbus_close); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index bcbb031..018d2e0 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -159,7 +159,7 @@ static void vmbus_rescind_cleanup(struct vmbus_channel *channel) spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); - + channel->rescind = true; list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { @@ -381,14 +381,21 @@ static void vmbus_release_relid(u32 relid) true); } -void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) +void hv_process_channel_removal(u32 relid) { unsigned long flags; - struct vmbus_channel *primary_channel; + struct vmbus_channel *primary_channel, *channel; - BUG_ON(!channel->rescind); BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + /* + * Make sure channel is valid as we may have raced. + */ + channel = relid2channel(relid); + if (!channel) + return; + + BUG_ON(!channel->rescind); if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -515,6 +522,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!fnew) { if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); + newchannel->probe_done = true; return; } @@ -834,7 +842,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) { struct vmbus_channel_rescind_offer *rescind; struct vmbus_channel *channel; - unsigned long flags; struct device *dev; rescind = (struct vmbus_channel_rescind_offer *)hdr; @@ -873,16 +880,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) return; } - spin_lock_irqsave(&channel->lock, flags); - channel->rescind = true; - spin_unlock_irqrestore(&channel->lock, flags); - - /* - * Now that we have posted the rescind state, perform - * rescind related cleanup. - */ - vmbus_rescind_cleanup(channel); - /* * Now wait for offer handling to complete. */ @@ -901,6 +898,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); + vmbus_rescind_cleanup(channel); return; } /* @@ -909,6 +907,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) */ dev = get_device(&channel->device_obj->device); if (dev) { + vmbus_rescind_cleanup(channel); vmbus_device_unregister(channel->device_obj); put_device(dev); } @@ -921,16 +920,16 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * 1. Close all sub-channels first * 2. Then close the primary channel. */ + mutex_lock(&vmbus_connection.channel_mutex); + vmbus_rescind_cleanup(channel); if (channel->state == CHANNEL_OPEN_STATE) { /* * The channel is currently not open; * it is safe for us to cleanup the channel. */ - mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - mutex_unlock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(rescind->child_relid); } + mutex_unlock(&vmbus_connection.channel_mutex); } } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a9d49f6..937801a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -768,8 +768,7 @@ static void vmbus_device_release(struct device *device) struct vmbus_channel *channel = hv_dev->channel; mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel, - channel->offermsg.child_relid); + hv_process_channel_removal(channel->offermsg.child_relid); mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 9c0dbb8..e1be610 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -630,7 +630,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE, GFP_KERNEL); if (rc) - goto out_mbox_free; + return -ENOMEM; INIT_WORK(&ctx->workq, xgene_hwmon_evt_work); @@ -646,7 +646,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (IS_ERR(ctx->mbox_chan)) { dev_err(&pdev->dev, "SLIMpro mailbox channel request failed\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } } else { struct acpi_pcct_hw_reduced *cppc_ss; @@ -654,7 +655,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (device_property_read_u32(&pdev->dev, "pcc-channel", &ctx->mbox_idx)) { dev_err(&pdev->dev, "no pcc-channel property\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } cl->rx_callback = xgene_hwmon_pcc_rx_cb; @@ -662,7 +664,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (IS_ERR(ctx->mbox_chan)) { dev_err(&pdev->dev, "PPC channel request failed\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } /* @@ -675,13 +678,13 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (!cppc_ss) { dev_err(&pdev->dev, "PPC subspace not found\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } if (!ctx->mbox_chan->mbox->txdone_irq) { dev_err(&pdev->dev, "PCC IRQ not supported\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } /* @@ -696,14 +699,14 @@ static int xgene_hwmon_probe(struct platform_device *pdev) } else { dev_err(&pdev->dev, "Failed to get PCC comm region\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } if (!ctx->pcc_comm_addr) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; - goto out_mbox_free; + goto out; } /* diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c06dce2..45a3f3ca 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -131,6 +131,7 @@ config I2C_I801 Gemini Lake (SOC) Cannon Lake-H (PCH) Cannon Lake-LP (PCH) + Cedar Fork (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e114e4e..9e12a53 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -68,6 +68,7 @@ * Gemini Lake (SOC) 0x31d4 32 hard yes yes yes * Cannon Lake-H (PCH) 0xa323 32 hard yes yes yes * Cannon Lake-LP (PCH) 0x9da3 32 hard yes yes yes + * Cedar Fork (PCH) 0x18df 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -204,6 +205,7 @@ /* Older devices have their ID defined in <linux/pci_ids.h> */ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 +#define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 @@ -1025,6 +1027,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, @@ -1513,6 +1516,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS: case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS: case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: + case PCI_DEVICE_ID_INTEL_CDF_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: priv->features |= FEATURE_I2C_BLOCK_READ; diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 22e08ae..25fcc3c 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -627,6 +627,7 @@ static const struct dev_pm_ops sprd_i2c_pm_ops = { static const struct of_device_id sprd_i2c_of_match[] = { { .compatible = "sprd,sc9860-i2c", }, + {}, }; static struct platform_driver sprd_i2c_driver = { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 47c67b0..d4a6e9c 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -215,7 +215,7 @@ struct stm32f7_i2c_dev { unsigned int msg_num; unsigned int msg_id; struct stm32f7_i2c_msg f7_msg; - struct stm32f7_i2c_setup *setup; + struct stm32f7_i2c_setup setup; struct stm32f7_i2c_timings timing; }; @@ -265,7 +265,7 @@ static struct stm32f7_i2c_spec i2c_specs[] = { }, }; -struct stm32f7_i2c_setup stm32f7_setup = { +static const struct stm32f7_i2c_setup stm32f7_setup = { .rise_time = STM32F7_I2C_RISE_TIME_DEFAULT, .fall_time = STM32F7_I2C_FALL_TIME_DEFAULT, .dnf = STM32F7_I2C_DNF_DEFAULT, @@ -537,7 +537,7 @@ static void stm32f7_i2c_hw_config(struct stm32f7_i2c_dev *i2c_dev) writel_relaxed(timing, i2c_dev->base + STM32F7_I2C_TIMINGR); /* Enable I2C */ - if (i2c_dev->setup->analog_filter) + if (i2c_dev->setup.analog_filter) stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_ANFOFF); else @@ -887,22 +887,19 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) } setup = of_device_get_match_data(&pdev->dev); - i2c_dev->setup->rise_time = setup->rise_time; - i2c_dev->setup->fall_time = setup->fall_time; - i2c_dev->setup->dnf = setup->dnf; - i2c_dev->setup->analog_filter = setup->analog_filter; + i2c_dev->setup = *setup; ret = device_property_read_u32(i2c_dev->dev, "i2c-scl-rising-time-ns", &rise_time); if (!ret) - i2c_dev->setup->rise_time = rise_time; + i2c_dev->setup.rise_time = rise_time; ret = device_property_read_u32(i2c_dev->dev, "i2c-scl-falling-time-ns", &fall_time); if (!ret) - i2c_dev->setup->fall_time = fall_time; + i2c_dev->setup.fall_time = fall_time; - ret = stm32f7_i2c_setup_timing(i2c_dev, i2c_dev->setup); + ret = stm32f7_i2c_setup_timing(i2c_dev, &i2c_dev->setup); if (ret) goto clk_free; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 01b2adf..eaf39e5 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1451,6 +1451,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, if (hwif_init(hwif) == 0) { printk(KERN_INFO "%s: failed to initialize IDE " "interface\n", hwif->name); + device_unregister(hwif->portdev); device_unregister(&hwif->gendev); ide_disable_port(hwif); continue; diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c index 86aa88a..acf8748 100644 --- a/drivers/ide/ide-scan-pci.c +++ b/drivers/ide/ide-scan-pci.c @@ -56,6 +56,7 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) { struct list_head *l; struct pci_driver *d; + int ret; list_for_each(l, &ide_pci_drivers) { d = list_entry(l, struct pci_driver, node); @@ -63,10 +64,14 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) const struct pci_device_id *id = pci_match_id(d->id_table, dev); - if (id != NULL && d->probe(dev, id) >= 0) { - dev->driver = d; - pci_dev_get(dev); - return 1; + if (id != NULL) { + pci_assign_irq(dev); + ret = d->probe(dev, id); + if (ret >= 0) { + dev->driver = d; + pci_dev_get(dev); + return 1; + } } } } diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 112d2fe..fdc8e81 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -179,6 +179,7 @@ EXPORT_SYMBOL_GPL(ide_setup_pci_noise); /** * ide_pci_enable - do PCI enables * @dev: PCI device + * @bars: PCI BARs mask * @d: IDE port info * * Enable the IDE PCI device. We attempt to enable the device in full @@ -189,9 +190,10 @@ EXPORT_SYMBOL_GPL(ide_setup_pci_noise); * Returns zero on success or an error code */ -static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d) +static int ide_pci_enable(struct pci_dev *dev, int bars, + const struct ide_port_info *d) { - int ret, bars; + int ret; if (pci_enable_device(dev)) { ret = pci_enable_device_io(dev); @@ -216,18 +218,6 @@ static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d) goto out; } - if (d->host_flags & IDE_HFLAG_SINGLE) - bars = (1 << 2) - 1; - else - bars = (1 << 4) - 1; - - if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { - if (d->host_flags & IDE_HFLAG_CS5520) - bars |= (1 << 2); - else - bars |= (1 << 4); - } - ret = pci_request_selected_regions(dev, bars, d->name); if (ret < 0) printk(KERN_ERR "%s %s: can't reserve resources\n", @@ -403,6 +393,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) /** * ide_setup_pci_controller - set up IDE PCI * @dev: PCI device + * @bars: PCI BARs mask * @d: IDE port info * @noisy: verbose flag * @@ -411,7 +402,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) * and enables it if need be */ -static int ide_setup_pci_controller(struct pci_dev *dev, +static int ide_setup_pci_controller(struct pci_dev *dev, int bars, const struct ide_port_info *d, int noisy) { int ret; @@ -420,7 +411,7 @@ static int ide_setup_pci_controller(struct pci_dev *dev, if (noisy) ide_setup_pci_noise(dev, d); - ret = ide_pci_enable(dev, d); + ret = ide_pci_enable(dev, bars, d); if (ret < 0) goto out; @@ -428,16 +419,20 @@ static int ide_setup_pci_controller(struct pci_dev *dev, if (ret < 0) { printk(KERN_ERR "%s %s: error accessing PCI regs\n", d->name, pci_name(dev)); - goto out; + goto out_free_bars; } if (!(pcicmd & PCI_COMMAND_IO)) { /* is device disabled? */ ret = ide_pci_configure(dev, d); if (ret < 0) - goto out; + goto out_free_bars; printk(KERN_INFO "%s %s: device enabled (Linux)\n", d->name, pci_name(dev)); } + goto out; + +out_free_bars: + pci_release_selected_regions(dev, bars); out: return ret; } @@ -540,13 +535,28 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, { struct pci_dev *pdev[] = { dev1, dev2 }; struct ide_host *host; - int ret, i, n_ports = dev2 ? 4 : 2; + int ret, i, n_ports = dev2 ? 4 : 2, bars; struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL }; + if (d->host_flags & IDE_HFLAG_SINGLE) + bars = (1 << 2) - 1; + else + bars = (1 << 4) - 1; + + if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { + if (d->host_flags & IDE_HFLAG_CS5520) + bars |= (1 << 2); + else + bars |= (1 << 4); + } + for (i = 0; i < n_ports / 2; i++) { - ret = ide_setup_pci_controller(pdev[i], d, !i); - if (ret < 0) + ret = ide_setup_pci_controller(pdev[i], bars, d, !i); + if (ret < 0) { + if (i == 1) + pci_release_selected_regions(pdev[0], bars); goto out; + } ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]); } @@ -554,7 +564,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, host = ide_host_alloc(d, hws, n_ports); if (host == NULL) { ret = -ENOMEM; - goto out; + goto out_free_bars; } host->dev[0] = &dev1->dev; @@ -576,7 +586,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, * do_ide_setup_pci_device() on the first device! */ if (ret < 0) - goto out; + goto out_free_bars; /* fixup IRQ */ if (ide_pci_is_in_compatibility_mode(pdev[i])) { @@ -589,6 +599,13 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, ret = ide_host_register(host, d, hws); if (ret) ide_host_free(host); + else + goto out; + +out_free_bars: + i = n_ports / 2; + while (i--) + pci_release_selected_regions(pdev[i], bars); out: return ret; } diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 30825bb..8861c05 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -100,6 +100,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) if (ret) goto pid_query_error; + nlmsg_end(skb, nlh); + pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); @@ -170,6 +172,8 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR); if (ret) goto add_mapping_error; + + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); @@ -246,6 +250,8 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR); if (ret) goto query_mapping_error; + + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); @@ -308,6 +314,8 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) if (ret) goto remove_mapping_error; + nlmsg_end(skb, nlh); + ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index c81c559..3c4faad 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -597,6 +597,9 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); if (ret) goto mapinfo_num_error; + + nlmsg_end(skb, nlh); + ret = rdma_nl_unicast(skb, iwpm_pid); if (ret) { skb = NULL; @@ -678,6 +681,8 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) if (ret) goto send_mapping_info_unlock; + nlmsg_end(skb, nlh); + iwpm_print_sockaddr(&map_info->local_sockaddr, "send_mapping_info: Local sockaddr:"); iwpm_print_sockaddr(&map_info->mapped_sockaddr, diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index d1f5345..42ca534 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -48,7 +48,7 @@ * @wqe: cqp wqe for header * @header: header for the cqp wqe */ -static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) +void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) { wmb(); /* make sure WQE is populated before polarity is set */ set_64bit_val(wqe, 24, header); diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index e217a12..5498ad0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -59,6 +59,8 @@ enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp, struct i40iw_fast_reg_stag_info *info, bool post_sq); +void i40iw_insert_wqe_hdr(u64 *wqe, u64 header); + /* HMC/FPM functions */ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id); diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index c2cab20..59f7067 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -123,12 +123,11 @@ static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx, get_64bit_val(wqe, 24, &offset24); offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, offset24); set_64bit_val(wqe, 0, buf->mem.pa); set_64bit_val(wqe, 8, LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN)); - set_64bit_val(wqe, 24, offset24); + i40iw_insert_wqe_hdr(wqe, offset24); } /** @@ -409,9 +408,7 @@ enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp, set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN)); set_64bit_val(wqe, 16, header[0]); - /* Ensure all data is written before writing valid bit */ - wmb(); - set_64bit_val(wqe, 24, header[1]); + i40iw_insert_wqe_hdr(wqe, header[1]); i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32); i40iw_qp_post_wr(&qp->qp_uk); @@ -539,7 +536,7 @@ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) | LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + i40iw_insert_wqe_hdr(wqe, header); i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32); i40iw_sc_cqp_post_sq(cqp); @@ -655,7 +652,7 @@ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + i40iw_insert_wqe_hdr(wqe, header); i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, I40IW_CQP_WQE_SIZE * 8); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 28b3d02..62be0a4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -826,12 +826,14 @@ static int i40iw_query_qp(struct ib_qp *ibqp, attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + attr->port_num = 1; init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; init_attr->recv_cq = iwqp->ibqp.recv_cq; init_attr->srq = iwqp->ibqp.srq; init_attr->cap = attr->cap; + init_attr->port_num = 1; return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d6fbad8..552f7bd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4174,9 +4174,9 @@ err_bfreg: err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); -err_cnt: - mlx5_ib_cleanup_cong_debugfs(dev); err_cong: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cnt: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index b2bb42e..254083b 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -387,7 +387,7 @@ struct qedr_qp { u8 wqe_size; u8 smac[ETH_ALEN]; - u16 vlan_id; + u16 vlan; int rc; } *rqe_wr_id; diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 4689e80..ad89653 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -105,7 +105,7 @@ void qedr_ll2_complete_rx_packet(void *cxt, qp->rqe_wr_id[qp->rq.gsi_cons].rc = data->u.data_length_error ? -EINVAL : 0; - qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan; + qp->rqe_wr_id[qp->rq.gsi_cons].vlan = data->vlan; /* note: length stands for data length i.e. GRH is excluded */ qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = data->length.data_length; @@ -694,6 +694,7 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct qedr_cq *cq = get_qedr_cq(ibcq); struct qedr_qp *qp = dev->gsi_qp; unsigned long flags; + u16 vlan_id; int i = 0; spin_lock_irqsave(&cq->cq_lock, flags); @@ -712,9 +713,14 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK; ether_addr_copy(wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac); wc[i].wc_flags |= IB_WC_WITH_SMAC; - if (qp->rqe_wr_id[qp->rq.cons].vlan_id) { + + vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan & + VLAN_VID_MASK; + if (vlan_id) { wc[i].wc_flags |= IB_WC_WITH_VLAN; - wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id; + wc[i].vlan_id = vlan_id; + wc[i].sl = (qp->rqe_wr_id[qp->rq.cons].vlan & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } qedr_inc_sw_cons(&qp->rq); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215..8e8874d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2773,14 +2773,16 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { - swiotlb = iommu_pass_through ? 1 : 0; + swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; iommu_detected = 1; /* * In case we don't initialize SWIOTLB (actually the common case - * when AMD IOMMU is enabled), make sure there are global - * dma_ops set as a fall-back for devices not handled by this - * driver (for example non-PCI devices). + * when AMD IOMMU is enabled and SME is not active), make sure there + * are global dma_ops set as a fall-back for devices not handled by + * this driver (for example non-PCI devices). When SME is active, + * make sure that swiotlb variable remains set so the global dma_ops + * continue to be SWIOTLB. */ if (!swiotlb) dma_ops = &nommu_dma_ops; @@ -3046,6 +3048,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, mutex_unlock(&domain->api_lock); domain_flush_tlb_pde(domain); + domain_flush_complete(domain); return unmap_size; } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index f596fcc..25c2c75 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -709,7 +709,7 @@ static const struct dev_pm_ops sysmmu_pm_ops = { pm_runtime_force_resume) }; -static const struct of_device_id sysmmu_of_match[] __initconst = { +static const struct of_device_id sysmmu_of_match[] = { { .compatible = "samsung,exynos-sysmmu", }, { }, }; diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 7d5286b..1841d03 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(closure_put); void __closure_wake_up(struct closure_waitlist *wait_list) { struct llist_node *list; - struct closure *cl; + struct closure *cl, *t; struct llist_node *reverse = NULL; list = llist_del_all(&wait_list->list); @@ -73,7 +73,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) reverse = llist_reverse_order(list); /* Then do the wakeups */ - llist_for_each_entry(cl, reverse, list) { + llist_for_each_entry_safe(cl, t, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 24eddbd..2031447 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -149,5 +149,6 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen extern atomic_t dm_global_event_nr; extern wait_queue_head_t dm_global_eventq; +void dm_issue_global_event(void); #endif diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a55ffd4..96ab465 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2466,6 +2466,7 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key kfree(cipher_api); return ret; } + kfree(cipher_api); return 0; bad_mem: @@ -2584,6 +2585,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar ti->error = "Invalid feature value for sector_size"; return -EINVAL; } + if (ti->len & ((cc->sector_size >> SECTOR_SHIFT) - 1)) { + ti->error = "Device size is not multiple of sector_size feature"; + return -EINVAL; + } cc->sector_shift = __ffs(cc->sector_size) - SECTOR_SHIFT; } else if (!strcasecmp(opt_string, "iv_large_sectors")) set_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 8756a68..e52676f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -477,9 +477,13 @@ static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_si * Round up the ptr to an 8-byte boundary. */ #define ALIGN_MASK 7 +static inline size_t align_val(size_t val) +{ + return (val + ALIGN_MASK) & ~ALIGN_MASK; +} static inline void *align_ptr(void *ptr) { - return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK); + return (void *)align_val((size_t)ptr); } /* @@ -505,7 +509,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; - struct dm_name_list *nl, *old_nl = NULL; + struct dm_name_list *orig_nl, *nl, *old_nl = NULL; uint32_t *event_nr; down_write(&_hash_lock); @@ -516,17 +520,15 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { - needed += sizeof(struct dm_name_list); - needed += strlen(hc->name) + 1; - needed += ALIGN_MASK; - needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK; + needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); + needed += align_val(sizeof(uint32_t)); } } /* * Grab our output buffer. */ - nl = get_result_buffer(param, param_size, &len); + nl = orig_nl = get_result_buffer(param, param_size, &len); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; goto out; @@ -549,11 +551,16 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ strcpy(nl->name, hc->name); old_nl = nl; - event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1); + event_nr = align_ptr(nl->name + strlen(hc->name) + 1); *event_nr = dm_get_event_nr(hc->md); nl = align_ptr(event_nr + 1); } } + /* + * If mismatch happens, security may be compromised due to buffer + * overflow, so it's better to crash. + */ + BUG_ON((char *)nl - (char *)orig_nl != needed); out: up_write(&_hash_lock); @@ -1621,7 +1628,8 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para * which has a variable size, is not used by the function processing * the ioctl. */ -#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT 2 /*----------------------------------------------------------------- * Implementation of open/close/ioctl on the special char @@ -1635,12 +1643,12 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) ioctl_fn fn; } _ioctls[] = { {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */ - {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all}, + {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all}, {DM_LIST_DEVICES_CMD, 0, list_devices}, - {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create}, - {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove}, - {DM_DEV_RENAME_CMD, 0, dev_rename}, + {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create}, + {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove}, + {DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename}, {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend}, {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status}, {DM_DEV_WAIT_CMD, 0, dev_wait}, @@ -1869,6 +1877,9 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd); + if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT) + dm_issue_global_event(); + /* * Copy the results back to userland. */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1ac58c5..2245d06 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3297,11 +3297,10 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, static sector_t rs_get_progress(struct raid_set *rs, sector_t resync_max_sectors, bool *array_in_sync) { - sector_t r, recovery_cp, curr_resync_completed; + sector_t r, curr_resync_completed; struct mddev *mddev = &rs->md; curr_resync_completed = mddev->curr_resync_completed ?: mddev->recovery_cp; - recovery_cp = mddev->recovery_cp; *array_in_sync = false; if (rs_is_raid0(rs)) { @@ -3330,9 +3329,11 @@ static sector_t rs_get_progress(struct raid_set *rs, } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) r = curr_resync_completed; else - r = recovery_cp; + r = mddev->recovery_cp; - if (r == MaxSector) { + if ((r == MaxSector) || + (test_bit(MD_RECOVERY_DONE, &mddev->recovery) && + (mddev->curr_resync_completed == resync_max_sectors))) { /* * Sync complete. */ @@ -3892,7 +3893,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 12, 1}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6e54145..4be8532 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -52,6 +52,12 @@ static struct workqueue_struct *deferred_remove_workqueue; atomic_t dm_global_event_nr = ATOMIC_INIT(0); DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); +void dm_issue_global_event(void) +{ + atomic_inc(&dm_global_event_nr); + wake_up(&dm_global_eventq); +} + /* * One of these is allocated per bio. */ @@ -1865,9 +1871,8 @@ static void event_callback(void *context) dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); - atomic_inc(&dm_global_event_nr); wake_up(&md->eventq); - wake_up(&dm_global_eventq); + dm_issue_global_event(); } /* @@ -2283,6 +2288,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) } map = __bind(md, table, &limits); + dm_issue_global_event(); out: mutex_unlock(&md->suspend_lock); diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index eed6c39..f8a808d 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1797,12 +1797,19 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, */ switch (msg->msg[1]) { case CEC_MSG_GET_CEC_VERSION: - case CEC_MSG_GIVE_DEVICE_VENDOR_ID: case CEC_MSG_ABORT: case CEC_MSG_GIVE_DEVICE_POWER_STATUS: - case CEC_MSG_GIVE_PHYSICAL_ADDR: case CEC_MSG_GIVE_OSD_NAME: + /* + * These messages reply with a directed message, so ignore if + * the initiator is Unregistered. + */ + if (!adap->passthrough && from_unregistered) + return 0; + /* Fall through */ + case CEC_MSG_GIVE_DEVICE_VENDOR_ID: case CEC_MSG_GIVE_FEATURES: + case CEC_MSG_GIVE_PHYSICAL_ADDR: /* * Skip processing these messages if the passthrough mode * is on. @@ -1810,7 +1817,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, if (adap->passthrough) goto skip_processing; /* Ignore if addressing is wrong */ - if (is_broadcast || from_unregistered) + if (is_broadcast) return 0; break; diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 2fcba16..9139d01 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -141,22 +141,39 @@ struct dvb_frontend_private { static void dvb_frontend_invoke_release(struct dvb_frontend *fe, void (*release)(struct dvb_frontend *fe)); -static void dvb_frontend_free(struct kref *ref) +static void __dvb_frontend_free(struct dvb_frontend *fe) { - struct dvb_frontend *fe = - container_of(ref, struct dvb_frontend, refcount); struct dvb_frontend_private *fepriv = fe->frontend_priv; + if (!fepriv) + return; + dvb_free_device(fepriv->dvbdev); dvb_frontend_invoke_release(fe, fe->ops.release); kfree(fepriv); + fe->frontend_priv = NULL; +} + +static void dvb_frontend_free(struct kref *ref) +{ + struct dvb_frontend *fe = + container_of(ref, struct dvb_frontend, refcount); + + __dvb_frontend_free(fe); } static void dvb_frontend_put(struct dvb_frontend *fe) { - kref_put(&fe->refcount, dvb_frontend_free); + /* + * Check if the frontend was registered, as otherwise + * kref was not initialized yet. + */ + if (fe->frontend_priv) + kref_put(&fe->refcount, dvb_frontend_free); + else + __dvb_frontend_free(fe); } static void dvb_frontend_get(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c index 224283f..4d086a7 100644 --- a/drivers/media/dvb-frontends/dib3000mc.c +++ b/drivers/media/dvb-frontends/dib3000mc.c @@ -55,29 +55,57 @@ struct dib3000mc_state { static u16 dib3000mc_read_word(struct dib3000mc_state *state, u16 reg) { - u8 wb[2] = { (reg >> 8) | 0x80, reg & 0xff }; - u8 rb[2]; struct i2c_msg msg[2] = { - { .addr = state->i2c_addr >> 1, .flags = 0, .buf = wb, .len = 2 }, - { .addr = state->i2c_addr >> 1, .flags = I2C_M_RD, .buf = rb, .len = 2 }, + { .addr = state->i2c_addr >> 1, .flags = 0, .len = 2 }, + { .addr = state->i2c_addr >> 1, .flags = I2C_M_RD, .len = 2 }, }; + u16 word; + u8 *b; + + b = kmalloc(4, GFP_KERNEL); + if (!b) + return 0; + + b[0] = (reg >> 8) | 0x80; + b[1] = reg; + b[2] = 0; + b[3] = 0; + + msg[0].buf = b; + msg[1].buf = b + 2; if (i2c_transfer(state->i2c_adap, msg, 2) != 2) dprintk("i2c read error on %d\n",reg); - return (rb[0] << 8) | rb[1]; + word = (b[2] << 8) | b[3]; + kfree(b); + + return word; } static int dib3000mc_write_word(struct dib3000mc_state *state, u16 reg, u16 val) { - u8 b[4] = { - (reg >> 8) & 0xff, reg & 0xff, - (val >> 8) & 0xff, val & 0xff, - }; struct i2c_msg msg = { - .addr = state->i2c_addr >> 1, .flags = 0, .buf = b, .len = 4 + .addr = state->i2c_addr >> 1, .flags = 0, .len = 4 }; - return i2c_transfer(state->i2c_adap, &msg, 1) != 1 ? -EREMOTEIO : 0; + int rc; + u8 *b; + + b = kmalloc(4, GFP_KERNEL); + if (!b) + return -ENOMEM; + + b[0] = reg >> 8; + b[1] = reg; + b[2] = val >> 8; + b[3] = val; + + msg.buf = b; + + rc = i2c_transfer(state->i2c_adap, &msg, 1) != 1 ? -EREMOTEIO : 0; + kfree(b); + + return rc; } static int dib3000mc_identify(struct dib3000mc_state *state) diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c index 7bec3e0..5553b89 100644 --- a/drivers/media/dvb-frontends/dvb-pll.c +++ b/drivers/media/dvb-frontends/dvb-pll.c @@ -753,13 +753,19 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, struct i2c_adapter *i2c, unsigned int pll_desc_id) { - u8 b1 [] = { 0 }; - struct i2c_msg msg = { .addr = pll_addr, .flags = I2C_M_RD, - .buf = b1, .len = 1 }; + u8 *b1; + struct i2c_msg msg = { .addr = pll_addr, .flags = I2C_M_RD, .len = 1 }; struct dvb_pll_priv *priv = NULL; int ret; const struct dvb_pll_desc *desc; + b1 = kmalloc(1, GFP_KERNEL); + if (!b1) + return NULL; + + b1[0] = 0; + msg.buf = b1; + if ((id[dvb_pll_devcount] > DVB_PLL_UNDEFINED) && (id[dvb_pll_devcount] < ARRAY_SIZE(pll_list))) pll_desc_id = id[dvb_pll_devcount]; @@ -773,15 +779,19 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, fe->ops.i2c_gate_ctrl(fe, 1); ret = i2c_transfer (i2c, &msg, 1); - if (ret != 1) + if (ret != 1) { + kfree(b1); return NULL; + } if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); } priv = kzalloc(sizeof(struct dvb_pll_priv), GFP_KERNEL); - if (priv == NULL) + if (!priv) { + kfree(b1); return NULL; + } priv->pll_i2c_address = pll_addr; priv->i2c = i2c; @@ -811,6 +821,8 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, "insmod option" : "autodetected"); } + kfree(b1); + return fe; } EXPORT_SYMBOL(dvb_pll_attach); diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 7e7cc49..3c4f7fa 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -112,7 +112,7 @@ config VIDEO_PXA27x config VIDEO_QCOM_CAMSS tristate "Qualcomm 8x16 V4L2 Camera Subsystem driver" - depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && HAS_DMA depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST select VIDEOBUF2_DMA_SG select V4L2_FWNODE diff --git a/drivers/media/platform/qcom/camss-8x16/camss-vfe.c b/drivers/media/platform/qcom/camss-8x16/camss-vfe.c index b21b3c2..b22d2df 100644 --- a/drivers/media/platform/qcom/camss-8x16/camss-vfe.c +++ b/drivers/media/platform/qcom/camss-8x16/camss-vfe.c @@ -2660,7 +2660,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd, * * Return -EINVAL or zero on success */ -int vfe_set_selection(struct v4l2_subdev *sd, +static int vfe_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_selection *sel) { diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 68933d2..9b2a401 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -682,6 +682,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q) hfi_session_abort(inst); load_scale_clocks(core); + INIT_LIST_HEAD(&inst->registeredbufs); } venus_helper_buffers_done(inst, VB2_BUF_STATE_ERROR); diff --git a/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c b/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c index 1edf667..146ae6f 100644 --- a/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c +++ b/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c @@ -172,7 +172,8 @@ u32 s5p_cec_get_status(struct s5p_cec_dev *cec) { u32 status = 0; - status = readb(cec->reg + S5P_CEC_STATUS_0); + status = readb(cec->reg + S5P_CEC_STATUS_0) & 0xf; + status |= (readb(cec->reg + S5P_CEC_TX_STAT1) & 0xf) << 4; status |= readb(cec->reg + S5P_CEC_STATUS_1) << 8; status |= readb(cec->reg + S5P_CEC_STATUS_2) << 16; status |= readb(cec->reg + S5P_CEC_STATUS_3) << 24; diff --git a/drivers/media/platform/s5p-cec/s5p_cec.c b/drivers/media/platform/s5p-cec/s5p_cec.c index 58d200e..8837e26 100644 --- a/drivers/media/platform/s5p-cec/s5p_cec.c +++ b/drivers/media/platform/s5p-cec/s5p_cec.c @@ -92,7 +92,10 @@ static irqreturn_t s5p_cec_irq_handler(int irq, void *priv) dev_dbg(cec->dev, "irq received\n"); if (status & CEC_STATUS_TX_DONE) { - if (status & CEC_STATUS_TX_ERROR) { + if (status & CEC_STATUS_TX_NACK) { + dev_dbg(cec->dev, "CEC_STATUS_TX_NACK set\n"); + cec->tx = STATE_NACK; + } else if (status & CEC_STATUS_TX_ERROR) { dev_dbg(cec->dev, "CEC_STATUS_TX_ERROR set\n"); cec->tx = STATE_ERROR; } else { @@ -135,6 +138,12 @@ static irqreturn_t s5p_cec_irq_handler_thread(int irq, void *priv) cec_transmit_done(cec->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); cec->tx = STATE_IDLE; break; + case STATE_NACK: + cec_transmit_done(cec->adap, + CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_NACK, + 0, 1, 0, 0); + cec->tx = STATE_IDLE; + break; case STATE_ERROR: cec_transmit_done(cec->adap, CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_ERROR, diff --git a/drivers/media/platform/s5p-cec/s5p_cec.h b/drivers/media/platform/s5p-cec/s5p_cec.h index 8bcd8dc..86ded522 100644 --- a/drivers/media/platform/s5p-cec/s5p_cec.h +++ b/drivers/media/platform/s5p-cec/s5p_cec.h @@ -35,6 +35,7 @@ #define CEC_STATUS_TX_TRANSFERRING (1 << 1) #define CEC_STATUS_TX_DONE (1 << 2) #define CEC_STATUS_TX_ERROR (1 << 3) +#define CEC_STATUS_TX_NACK (1 << 4) #define CEC_STATUS_TX_BYTES (0xFF << 8) #define CEC_STATUS_RX_RUNNING (1 << 16) #define CEC_STATUS_RX_RECEIVING (1 << 17) @@ -55,6 +56,7 @@ enum cec_state { STATE_IDLE, STATE_BUSY, STATE_DONE, + STATE_NACK, STATE_ERROR }; diff --git a/drivers/media/tuners/mt2060.c b/drivers/media/tuners/mt2060.c index 2e487f9..4983eeb 100644 --- a/drivers/media/tuners/mt2060.c +++ b/drivers/media/tuners/mt2060.c @@ -38,41 +38,74 @@ MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off)."); static int mt2060_readreg(struct mt2060_priv *priv, u8 reg, u8 *val) { struct i2c_msg msg[2] = { - { .addr = priv->cfg->i2c_address, .flags = 0, .buf = ®, .len = 1 }, - { .addr = priv->cfg->i2c_address, .flags = I2C_M_RD, .buf = val, .len = 1 }, + { .addr = priv->cfg->i2c_address, .flags = 0, .len = 1 }, + { .addr = priv->cfg->i2c_address, .flags = I2C_M_RD, .len = 1 }, }; + int rc = 0; + u8 *b; + + b = kmalloc(2, GFP_KERNEL); + if (!b) + return -ENOMEM; + + b[0] = reg; + b[1] = 0; + + msg[0].buf = b; + msg[1].buf = b + 1; if (i2c_transfer(priv->i2c, msg, 2) != 2) { printk(KERN_WARNING "mt2060 I2C read failed\n"); - return -EREMOTEIO; + rc = -EREMOTEIO; } - return 0; + *val = b[1]; + kfree(b); + + return rc; } // Writes a single register static int mt2060_writereg(struct mt2060_priv *priv, u8 reg, u8 val) { - u8 buf[2] = { reg, val }; struct i2c_msg msg = { - .addr = priv->cfg->i2c_address, .flags = 0, .buf = buf, .len = 2 + .addr = priv->cfg->i2c_address, .flags = 0, .len = 2 }; + u8 *buf; + int rc = 0; + + buf = kmalloc(2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = reg; + buf[1] = val; + + msg.buf = buf; if (i2c_transfer(priv->i2c, &msg, 1) != 1) { printk(KERN_WARNING "mt2060 I2C write failed\n"); - return -EREMOTEIO; + rc = -EREMOTEIO; } - return 0; + kfree(buf); + return rc; } // Writes a set of consecutive registers static int mt2060_writeregs(struct mt2060_priv *priv,u8 *buf, u8 len) { int rem, val_len; - u8 xfer_buf[16]; + u8 *xfer_buf; + int rc = 0; struct i2c_msg msg = { - .addr = priv->cfg->i2c_address, .flags = 0, .buf = xfer_buf + .addr = priv->cfg->i2c_address, .flags = 0 }; + xfer_buf = kmalloc(16, GFP_KERNEL); + if (!xfer_buf) + return -ENOMEM; + + msg.buf = xfer_buf; + for (rem = len - 1; rem > 0; rem -= priv->i2c_max_regs) { val_len = min_t(int, rem, priv->i2c_max_regs); msg.len = 1 + val_len; @@ -81,11 +114,13 @@ static int mt2060_writeregs(struct mt2060_priv *priv,u8 *buf, u8 len) if (i2c_transfer(priv->i2c, &msg, 1) != 1) { printk(KERN_WARNING "mt2060 I2C write failed (len=%i)\n", val_len); - return -EREMOTEIO; + rc = -EREMOTEIO; + break; } } - return 0; + kfree(xfer_buf); + return rc; } // Initialisation sequences diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 5dba23c..dc9bc18 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -219,8 +219,17 @@ int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) down_read(&mm->mmap_sem); - for (dar = addr; dar < addr + size; dar += page_size) { - if (!vma || dar < vma->vm_start || dar > vma->vm_end) { + vma = find_vma(mm, addr); + if (!vma) { + pr_err("Can't find vma for addr %016llx\n", addr); + rc = -EFAULT; + goto out; + } + /* get the size of the pages allocated */ + page_size = vma_kernel_pagesize(vma); + + for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) { + if (dar < vma->vm_start || dar >= vma->vm_end) { vma = find_vma(mm, addr); if (!vma) { pr_err("Can't find vma for addr %016llx\n", addr); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c8307e8..0ccccba 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ + #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 4ff40d3..78b3172 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -93,6 +93,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, @@ -226,12 +228,15 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; /* - * For not wake-able HW runtime pm framework - * can't be used on pci device level. - * Use domain runtime pm callbacks instead. - */ - if (!pci_dev_run_wake(pdev)) - mei_me_set_pm_domain(dev); + * ME maps runtime suspend/resume to D0i states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state, + * but the mei device cannot wake up from D3 unlike from D0i3. + * To get around the PCI device native runtime pm, + * ME uses runtime pm domain handlers which take precedence + * over the driver's pm handlers. + */ + mei_me_set_pm_domain(dev); if (mei_pg_is_enabled(dev)) pm_runtime_put_noidle(&pdev->dev); @@ -271,8 +276,7 @@ static void mei_me_shutdown(struct pci_dev *pdev) dev_dbg(&pdev->dev, "shutdown\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_me_unset_pm_domain(dev); + mei_me_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -300,8 +304,7 @@ static void mei_me_remove(struct pci_dev *pdev) dev_dbg(&pdev->dev, "stop\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_me_unset_pm_domain(dev); + mei_me_unset_pm_domain(dev); mei_disable_interrupts(dev); diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index e38a5f1..0566f9b 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -144,12 +144,14 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; /* - * For not wake-able HW runtime pm framework - * can't be used on pci device level. - * Use domain runtime pm callbacks instead. - */ - if (!pci_dev_run_wake(pdev)) - mei_txe_set_pm_domain(dev); + * TXE maps runtime suspend/resume to own power gating states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state. + * But the TXE device cannot wake up from D3 unlike from own + * power gating. To get around PCI device native runtime pm, + * TXE uses runtime pm domain handlers which take precedence. + */ + mei_txe_set_pm_domain(dev); pm_runtime_put_noidle(&pdev->dev); @@ -186,8 +188,7 @@ static void mei_txe_shutdown(struct pci_dev *pdev) dev_dbg(&pdev->dev, "shutdown\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_txe_unset_pm_domain(dev); + mei_txe_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -215,8 +216,7 @@ static void mei_txe_remove(struct pci_dev *pdev) mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_txe_unset_pm_domain(dev); + mei_txe_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -318,15 +318,7 @@ static int mei_txe_pm_runtime_suspend(struct device *device) else ret = -EAGAIN; - /* - * If everything is okay we're about to enter PCI low - * power state (D3) therefor we need to disable the - * interrupts towards host. - * However if device is not wakeable we do not enter - * D-low state and we need to keep the interrupt kicking - */ - if (!ret && pci_dev_run_wake(pdev)) - mei_disable_interrupts(dev); + /* keep irq on we are staying in D0 */ dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 29fc1e6..2ad7b5c 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1634,8 +1634,6 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, } mqrq->areq.mrq = &brq->mrq; - - mmc_queue_bounce_pre(mqrq); } static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, @@ -1829,7 +1827,6 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) brq = &mq_rq->brq; old_req = mmc_queue_req_to_req(mq_rq); type = rq_data_dir(old_req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; - mmc_queue_bounce_post(mq_rq); switch (status) { case MMC_BLK_SUCCESS: diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a7eb623..36217ad 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1286,6 +1286,23 @@ out_err: return err; } +static void mmc_select_driver_type(struct mmc_card *card) +{ + int card_drv_type, drive_strength, drv_type; + + card_drv_type = card->ext_csd.raw_driver_strength | + mmc_driver_type_mask(0); + + drive_strength = mmc_select_drive_strength(card, + card->ext_csd.hs200_max_dtr, + card_drv_type, &drv_type); + + card->drive_strength = drive_strength; + + if (drv_type) + mmc_set_driver_type(card->host, drv_type); +} + static int mmc_select_hs400es(struct mmc_card *card) { struct mmc_host *host = card->host; @@ -1341,6 +1358,8 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + mmc_select_driver_type(card); + /* Switch card to HS400 */ val = EXT_CSD_TIMING_HS400 | card->drive_strength << EXT_CSD_DRV_STR_SHIFT; @@ -1374,23 +1393,6 @@ out_err: return err; } -static void mmc_select_driver_type(struct mmc_card *card) -{ - int card_drv_type, drive_strength, drv_type; - - card_drv_type = card->ext_csd.raw_driver_strength | - mmc_driver_type_mask(0); - - drive_strength = mmc_select_drive_strength(card, - card->ext_csd.hs200_max_dtr, - card_drv_type, &drv_type); - - card->drive_strength = drive_strength; - - if (drv_type) - mmc_set_driver_type(card->host, drv_type); -} - /* * For device supporting HS200 mode, the following sequence * should be done before executing the tuning process. diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 74c663b..0a4e77a 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -23,8 +23,6 @@ #include "core.h" #include "card.h" -#define MMC_QUEUE_BOUNCESZ 65536 - /* * Prepare a MMC request. This just filters out odd stuff. */ @@ -150,26 +148,6 @@ static void mmc_queue_setup_discard(struct request_queue *q, queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q); } -static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host) -{ - unsigned int bouncesz = MMC_QUEUE_BOUNCESZ; - - if (host->max_segs != 1 || (host->caps & MMC_CAP_NO_BOUNCE_BUFF)) - return 0; - - if (bouncesz > host->max_req_size) - bouncesz = host->max_req_size; - if (bouncesz > host->max_seg_size) - bouncesz = host->max_seg_size; - if (bouncesz > host->max_blk_count * 512) - bouncesz = host->max_blk_count * 512; - - if (bouncesz <= 512) - return 0; - - return bouncesz; -} - /** * mmc_init_request() - initialize the MMC-specific per-request data * @q: the request queue @@ -184,26 +162,9 @@ static int mmc_init_request(struct request_queue *q, struct request *req, struct mmc_card *card = mq->card; struct mmc_host *host = card->host; - if (card->bouncesz) { - mq_rq->bounce_buf = kmalloc(card->bouncesz, gfp); - if (!mq_rq->bounce_buf) - return -ENOMEM; - if (card->bouncesz > 512) { - mq_rq->sg = mmc_alloc_sg(1, gfp); - if (!mq_rq->sg) - return -ENOMEM; - mq_rq->bounce_sg = mmc_alloc_sg(card->bouncesz / 512, - gfp); - if (!mq_rq->bounce_sg) - return -ENOMEM; - } - } else { - mq_rq->bounce_buf = NULL; - mq_rq->bounce_sg = NULL; - mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); - if (!mq_rq->sg) - return -ENOMEM; - } + mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); + if (!mq_rq->sg) + return -ENOMEM; return 0; } @@ -212,13 +173,6 @@ static void mmc_exit_request(struct request_queue *q, struct request *req) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); - /* It is OK to kfree(NULL) so this will be smooth */ - kfree(mq_rq->bounce_sg); - mq_rq->bounce_sg = NULL; - - kfree(mq_rq->bounce_buf); - mq_rq->bounce_buf = NULL; - kfree(mq_rq->sg); mq_rq->sg = NULL; } @@ -242,12 +196,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT; - /* - * mmc_init_request() depends on card->bouncesz so it must be calculated - * before blk_init_allocated_queue() starts allocating requests. - */ - card->bouncesz = mmc_queue_calc_bouncesz(host); - mq->card = card; mq->queue = blk_alloc_queue(GFP_KERNEL); if (!mq->queue) @@ -271,17 +219,11 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_can_erase(card)) mmc_queue_setup_discard(mq->queue, card); - if (card->bouncesz) { - blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512); - blk_queue_max_segments(mq->queue, card->bouncesz / 512); - blk_queue_max_segment_size(mq->queue, card->bouncesz); - } else { - blk_queue_bounce_limit(mq->queue, limit); - blk_queue_max_hw_sectors(mq->queue, - min(host->max_blk_count, host->max_req_size / 512)); - blk_queue_max_segments(mq->queue, host->max_segs); - blk_queue_max_segment_size(mq->queue, host->max_seg_size); - } + blk_queue_bounce_limit(mq->queue, limit); + blk_queue_max_hw_sectors(mq->queue, + min(host->max_blk_count, host->max_req_size / 512)); + blk_queue_max_segments(mq->queue, host->max_segs); + blk_queue_max_segment_size(mq->queue, host->max_seg_size); sema_init(&mq->thread_sem, 1); @@ -370,56 +312,7 @@ void mmc_queue_resume(struct mmc_queue *mq) */ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) { - unsigned int sg_len; - size_t buflen; - struct scatterlist *sg; struct request *req = mmc_queue_req_to_req(mqrq); - int i; - - if (!mqrq->bounce_buf) - return blk_rq_map_sg(mq->queue, req, mqrq->sg); - - sg_len = blk_rq_map_sg(mq->queue, req, mqrq->bounce_sg); - - mqrq->bounce_sg_len = sg_len; - - buflen = 0; - for_each_sg(mqrq->bounce_sg, sg, sg_len, i) - buflen += sg->length; - - sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen); - - return 1; -} - -/* - * If writing, bounce the data to the buffer before the request - * is sent to the host driver - */ -void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq) -{ - if (!mqrq->bounce_buf) - return; - - if (rq_data_dir(mmc_queue_req_to_req(mqrq)) != WRITE) - return; - - sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); -} - -/* - * If reading, bounce the data from the buffer after the request - * has been handled by the host driver - */ -void mmc_queue_bounce_post(struct mmc_queue_req *mqrq) -{ - if (!mqrq->bounce_buf) - return; - - if (rq_data_dir(mmc_queue_req_to_req(mqrq)) != READ) - return; - sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); + return blk_rq_map_sg(mq->queue, req, mqrq->sg); } diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 04fc893..f18d3f6 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -49,9 +49,6 @@ enum mmc_drv_op { struct mmc_queue_req { struct mmc_blk_request brq; struct scatterlist *sg; - char *bounce_buf; - struct scatterlist *bounce_sg; - unsigned int bounce_sg_len; struct mmc_async_req areq; enum mmc_drv_op drv_op; int drv_op_result; @@ -81,11 +78,8 @@ extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); - extern unsigned int mmc_queue_map_sg(struct mmc_queue *, struct mmc_queue_req *); -extern void mmc_queue_bounce_pre(struct mmc_queue_req *); -extern void mmc_queue_bounce_post(struct mmc_queue_req *); extern int mmc_access_rpmb(struct mmc_queue *); diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index 27fb625..fbd29f0 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -1038,7 +1038,7 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) */ mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD | - MMC_CAP_3_3V_DDR | MMC_CAP_NO_BOUNCE_BUFF; + MMC_CAP_3_3V_DDR; if (host->use_sg) mmc->max_segs = 16; diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index c885c2d..85745ef 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -531,8 +531,7 @@ static int meson_mmc_clk_init(struct meson_host *host) div->shift = __ffs(CLK_DIV_MASK); div->width = __builtin_popcountl(CLK_DIV_MASK); div->hw.init = &init; - div->flags = (CLK_DIVIDER_ONE_BASED | - CLK_DIVIDER_ROUND_CLOSEST); + div->flags = CLK_DIVIDER_ONE_BASED; clk = devm_clk_register(host->dev, &div->hw); if (WARN_ON(IS_ERR(clk))) @@ -717,6 +716,22 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode, static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct meson_host *host = mmc_priv(mmc); + int ret; + + /* + * If this is the initial tuning, try to get a sane Rx starting + * phase before doing the actual tuning. + */ + if (!mmc->doing_retune) { + ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); + + if (ret) + return ret; + } + + ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk); + if (ret) + return ret; return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); } @@ -746,6 +761,11 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_UP: if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); + + /* Reset phases */ + clk_set_phase(host->rx_clk, 0); + clk_set_phase(host->tx_clk, 270); + break; case MMC_POWER_ON: @@ -759,8 +779,6 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->vqmmc_enabled = true; } - /* Reset rx phase */ - clk_set_phase(host->rx_clk, 0); break; } diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 59ab194..c763b40 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -702,11 +702,7 @@ static int pxamci_probe(struct platform_device *pdev) pxamci_init_ocr(host); - /* - * This architecture used to disable bounce buffers through its - * defconfig, now it is done at runtime as a host property. - */ - mmc->caps = MMC_CAP_NO_BOUNCE_BUFF; + mmc->caps = 0; host->cmdat = 0; if (!cpu_is_pxa25x()) { mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 2eec2e6..0842bbc 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -466,6 +466,7 @@ static int xenon_probe(struct platform_device *pdev) { struct sdhci_pltfm_host *pltfm_host; struct sdhci_host *host; + struct xenon_priv *priv; int err; host = sdhci_pltfm_init(pdev, &sdhci_xenon_pdata, @@ -474,6 +475,7 @@ static int xenon_probe(struct platform_device *pdev) return PTR_ERR(host); pltfm_host = sdhci_priv(host); + priv = sdhci_pltfm_priv(pltfm_host); /* * Link Xenon specific mmc_host_ops function, @@ -491,9 +493,20 @@ static int xenon_probe(struct platform_device *pdev) if (err) goto free_pltfm; + priv->axi_clk = devm_clk_get(&pdev->dev, "axi"); + if (IS_ERR(priv->axi_clk)) { + err = PTR_ERR(priv->axi_clk); + if (err == -EPROBE_DEFER) + goto err_clk; + } else { + err = clk_prepare_enable(priv->axi_clk); + if (err) + goto err_clk; + } + err = mmc_of_parse(host->mmc); if (err) - goto err_clk; + goto err_clk_axi; sdhci_get_of_property(pdev); @@ -502,11 +515,11 @@ static int xenon_probe(struct platform_device *pdev) /* Xenon specific dt parse */ err = xenon_probe_dt(pdev); if (err) - goto err_clk; + goto err_clk_axi; err = xenon_sdhc_prepare(host); if (err) - goto err_clk; + goto err_clk_axi; pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); @@ -527,6 +540,8 @@ remove_sdhc: pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); xenon_sdhc_unprepare(host); +err_clk_axi: + clk_disable_unprepare(priv->axi_clk); err_clk: clk_disable_unprepare(pltfm_host->clk); free_pltfm: @@ -538,6 +553,7 @@ static int xenon_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); pm_runtime_get_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -546,7 +562,7 @@ static int xenon_remove(struct platform_device *pdev) sdhci_remove_host(host, 0); xenon_sdhc_unprepare(host); - + clk_disable_unprepare(priv->axi_clk); clk_disable_unprepare(pltfm_host->clk); sdhci_pltfm_free(pdev); diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h index 2bc0510..9994995 100644 --- a/drivers/mmc/host/sdhci-xenon.h +++ b/drivers/mmc/host/sdhci-xenon.h @@ -83,6 +83,7 @@ struct xenon_priv { unsigned char bus_width; unsigned char timing; unsigned int clock; + struct clk *axi_clk; int phy_type; /* diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 49b80da..805ab45 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -565,8 +565,10 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, return true; default: bpf_warn_invalid_xdp_action(action); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(nic->netdev, prog, action); + /* fall through */ case XDP_DROP: /* Check if it's a recycled page, if not * unmap the DMA mapping. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 523f9d0..8a32eb7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -175,31 +175,9 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) **/ static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) { -#ifndef CONFIG_SPARC - u32 regval; - u32 i; -#endif s32 ret_val; ret_val = ixgbe_start_hw_generic(hw); - -#ifndef CONFIG_SPARC - /* Disable relaxed ordering */ - for (i = 0; ((i < hw->mac.max_tx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); - } - - for (i = 0; ((i < hw->mac.max_rx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } -#endif if (ret_val) return ret_val; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 2c19070..6e6ab6f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -366,25 +366,6 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) } IXGBE_WRITE_FLUSH(hw); -#ifndef CONFIG_ARCH_WANT_RELAX_ORDER - /* Disable relaxed ordering */ - for (i = 0; i < hw->mac.max_tx_queues; i++) { - u32 regval; - - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); - } - - for (i = 0; i < hw->mac.max_rx_queues; i++) { - u32 regval; - - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } -#endif return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 72c5657..c3e7a81 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1048,7 +1048,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_ring *temp_ring; - int i, err = 0; + int i, j, err = 0; u32 new_rx_count, new_tx_count; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) @@ -1085,8 +1085,8 @@ static int ixgbe_set_ringparam(struct net_device *netdev, } /* allocate temporary buffer to store rings in */ - i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues); - i = max_t(int, i, adapter->num_xdp_queues); + i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues, + adapter->num_rx_queues); temp_ring = vmalloc(i * sizeof(struct ixgbe_ring)); if (!temp_ring) { @@ -1118,8 +1118,8 @@ static int ixgbe_set_ringparam(struct net_device *netdev, } } - for (i = 0; i < adapter->num_xdp_queues; i++) { - memcpy(&temp_ring[i], adapter->xdp_ring[i], + for (j = 0; j < adapter->num_xdp_queues; j++, i++) { + memcpy(&temp_ring[i], adapter->xdp_ring[j], sizeof(struct ixgbe_ring)); temp_ring[i].count = new_tx_count; @@ -1139,10 +1139,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev, memcpy(adapter->tx_ring[i], &temp_ring[i], sizeof(struct ixgbe_ring)); } - for (i = 0; i < adapter->num_xdp_queues; i++) { - ixgbe_free_tx_resources(adapter->xdp_ring[i]); + for (j = 0; j < adapter->num_xdp_queues; j++, i++) { + ixgbe_free_tx_resources(adapter->xdp_ring[j]); - memcpy(adapter->xdp_ring[i], &temp_ring[i], + memcpy(adapter->xdp_ring[j], &temp_ring[i], sizeof(struct ixgbe_ring)); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d962368..4d76afd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4881,7 +4881,7 @@ static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))) return; - vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask; + vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) & ~mask; IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl); if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK) @@ -8529,6 +8529,10 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) return ixgbe_ptp_set_ts_config(adapter, req); case SIOCGHWTSTAMP: return ixgbe_ptp_get_ts_config(adapter, req); + case SIOCGMIIPHY: + if (!adapter->hw.phy.ops.read_reg) + return -EOPNOTSUPP; + /* fall through */ default: return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 032089e..c16718d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3505,20 +3505,6 @@ static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; - struct mlxsw_sp_lpm_tree *lpm_tree; - - /* Aggregate prefix lengths across all virtual routers to make - * sure we only have used prefix lengths in the LPM tree. - */ - mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - goto err_tree_get; - mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); - -err_tree_get: if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) return; mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index c3f77e3..e365866 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1339,7 +1339,17 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) static int ppp_dev_init(struct net_device *dev) { + struct ppp *ppp; + netdev_lockdep_set_classes(dev); + + ppp = netdev_priv(dev); + /* Let the netdevice take a reference on the ppp file. This ensures + * that ppp_destroy_interface() won't run before the device gets + * unregistered. + */ + atomic_inc(&ppp->file.refcnt); + return 0; } @@ -1362,6 +1372,15 @@ static void ppp_dev_uninit(struct net_device *dev) wake_up_interruptible(&ppp->file.rwait); } +static void ppp_dev_priv_destructor(struct net_device *dev) +{ + struct ppp *ppp; + + ppp = netdev_priv(dev); + if (atomic_dec_and_test(&ppp->file.refcnt)) + ppp_destroy_interface(ppp); +} + static const struct net_device_ops ppp_netdev_ops = { .ndo_init = ppp_dev_init, .ndo_uninit = ppp_dev_uninit, @@ -1387,6 +1406,7 @@ static void ppp_setup(struct net_device *dev) dev->tx_queue_len = 3; dev->type = ARPHRD_PPP; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->priv_destructor = ppp_dev_priv_destructor; netif_keep_dst(dev); } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 29c7e2e..52ea80b 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -560,6 +560,7 @@ static const struct driver_info wwan_info = { #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e +#define UBLOX_VENDOR_ID 0x1546 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -869,6 +870,18 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&zte_cdc_info, }, { + /* U-blox TOBY-L2 */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1143, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { + /* U-blox SARA-U2 */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1104, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb2aad0..5a14cc7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2136,7 +2136,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct nvme_ns *ns = nvme_get_ns_from_dev(dev); if (a == &dev_attr_uuid.attr) { - if (uuid_is_null(&ns->uuid) || + if (uuid_is_null(&ns->uuid) && !memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) return 0; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cb73bc8..3f5a04c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -94,7 +94,7 @@ struct nvme_dev { struct mutex shutdown_lock; bool subsystem; void __iomem *cmb; - dma_addr_t cmb_dma_addr; + pci_bus_addr_t cmb_bus_addr; u64 cmb_size; u32 cmbsz; u32 cmbloc; @@ -1226,7 +1226,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), dev->ctrl.page_size); - nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; + nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; nvmeq->sq_cmds_io = dev->cmb + offset; } else { nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), @@ -1527,7 +1527,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; - dma_addr_t dma_addr; + int bar; dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) @@ -1540,7 +1540,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); offset = szu * NVME_CMB_OFST(dev->cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); + bar = NVME_CMB_BIR(dev->cmbloc); + bar_size = pci_resource_len(pdev, bar); if (offset > bar_size) return NULL; @@ -1553,12 +1554,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; - cmb = ioremap_wc(dma_addr, size); + cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); if (!cmb) return NULL; - dev->cmb_dma_addr = dma_addr; + dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; dev->cmb_size = size; return cmb; } diff --git a/drivers/of/base.c b/drivers/of/base.c index 260d33c..6389753 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1781,8 +1781,12 @@ bool of_console_check(struct device_node *dn, char *name, int index) { if (!dn || dn != of_stdout || console_set_on_cmdline) return false; - return !add_preferred_console(name, index, - kstrdup(of_stdout_options, GFP_KERNEL)); + + /* + * XXX: cast `options' to char pointer to suppress complication + * warnings: printk, UART and console drivers expect char pointer. + */ + return !add_preferred_console(name, index, (char *)of_stdout_options); } EXPORT_SYMBOL_GPL(of_console_check); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index d507c35..32771c2 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -25,7 +25,7 @@ #include <linux/sort.h> #include <linux/slab.h> -#define MAX_RESERVED_REGIONS 16 +#define MAX_RESERVED_REGIONS 32 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; static int reserved_mem_count; diff --git a/drivers/of/property.c b/drivers/of/property.c index fbb7211..264c355 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -954,7 +954,7 @@ of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) struct device_node *np; /* Get the parent of the port */ - np = of_get_next_parent(to_of_node(fwnode)); + np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 89f4e3d..26ed0c0 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -935,6 +935,8 @@ static int advk_pcie_probe(struct platform_device *pdev) bridge->sysdata = pcie; bridge->busnr = 0; bridge->ops = &advk_pcie_ops; + bridge->map_irq = of_irq_parse_and_map_pci; + bridge->swizzle_irq = pci_common_swizzle; ret = pci_scan_root_bus_bridge(bridge); if (ret < 0) { diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 9c40da5..1987fec 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -233,6 +233,7 @@ struct tegra_msi { struct msi_controller chip; DECLARE_BITMAP(used, INT_PCI_MSI_NR); struct irq_domain *domain; + unsigned long pages; struct mutex lock; u64 phys; int irq; @@ -1529,22 +1530,9 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) goto err; } - /* - * The PCI host bridge on Tegra contains some logic that intercepts - * MSI writes, which means that the MSI target address doesn't have - * to point to actual physical memory. Rather than allocating one 4 - * KiB page of system memory that's never used, we can simply pick - * an arbitrary address within an area reserved for system memory - * in the FPCI address map. - * - * However, in order to avoid confusion, we pick an address that - * doesn't map to physical memory. The FPCI address map reserves a - * 1012 GiB region for system memory and memory-mapped I/O. Since - * none of the Tegra SoCs that contain this PCI host bridge can - * address more than 16 GiB of system memory, the last 4 KiB of - * these 1012 GiB is a good candidate. - */ - msi->phys = 0xfcfffff000; + /* setup AFI/FPCI range */ + msi->pages = __get_free_pages(GFP_KERNEL, 0); + msi->phys = virt_to_phys((void *)msi->pages); afi_writel(pcie, msi->phys >> soc->msi_base_shift, AFI_MSI_FPCI_BAR_ST); afi_writel(pcie, msi->phys, AFI_MSI_AXI_BAR_ST); @@ -1596,6 +1584,8 @@ static int tegra_pcie_disable_msi(struct tegra_pcie *pcie) afi_writel(pcie, 0, AFI_MSI_EN_VEC6); afi_writel(pcie, 0, AFI_MSI_EN_VEC7); + free_pages(msi->pages, 0); + if (msi->irq > 0) free_irq(msi->irq, pcie); diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 1778cf4..82cd8b0 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -100,6 +100,7 @@ config PINCTRL_AMD tristate "AMD GPIO pin control" depends on GPIOLIB select GPIOLIB_IRQCHIP + select PINMUX select PINCONF select GENERIC_PINCONF help diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 0944310..ff78244 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -373,16 +373,12 @@ static void bcm2835_gpio_irq_handle_bank(struct bcm2835_pinctrl *pc, unsigned long events; unsigned offset; unsigned gpio; - unsigned int type; events = bcm2835_gpio_rd(pc, GPEDS0 + bank * 4); events &= mask; events &= pc->enabled_irq_map[bank]; for_each_set_bit(offset, &events, 32) { gpio = (32 * bank) + offset; - /* FIXME: no clue why the code looks up the type here */ - type = pc->irq_type[gpio]; - generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irqdomain, gpio)); } diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 04e929f..fadbca9 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1577,6 +1577,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) struct gpio_chip *chip = &pctrl->chip; bool need_valid_mask = !dmi_check_system(chv_no_valid_mask); int ret, i, offset; + int irq_base; *chip = chv_gpio_chip; @@ -1622,7 +1623,18 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); - ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, + if (!need_valid_mask) { + irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0, + chip->ngpio, NUMA_NO_NODE); + if (irq_base < 0) { + dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n"); + return irq_base; + } + } else { + irq_base = 0; + } + + ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index d0e5d6e..e2c1988 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -523,7 +523,7 @@ int __init parse_cec_param(char *str) if (*str == '=') str++; - if (!strncmp(str, "cec_disable", 7)) + if (!strcmp(str, "cec_disable")) ce_arr.disabled = 1; else return 0; diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index df63e44..bf04479 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -109,6 +109,7 @@ config QCOM_Q6V5_PIL depends on OF && ARCH_QCOM depends on QCOM_SMEM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) + depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n select MFD_SYSCON select QCOM_RPROC_COMMON select QCOM_SCM @@ -120,6 +121,7 @@ config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) + depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SMEM select QCOM_MDT_LOADER select QCOM_RPROC_COMMON diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 612d914..633268e 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -264,15 +264,14 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, if (!(att->flags & ATT_OWN)) continue; - if (b > IMX7D_RPROC_MEM_MAX) + if (b >= IMX7D_RPROC_MEM_MAX) break; priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev, att->sa, att->size); - if (IS_ERR(priv->mem[b].cpu_addr)) { + if (!priv->mem[b].cpu_addr) { dev_err(dev, "devm_ioremap_resource failed\n"); - err = PTR_ERR(priv->mem[b].cpu_addr); - return err; + return -ENOMEM; } priv->mem[b].sys_addr = att->sa; priv->mem[b].size = att->size; @@ -296,7 +295,7 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, return err; } - if (b > IMX7D_RPROC_MEM_MAX) + if (b >= IMX7D_RPROC_MEM_MAX) break; priv->mem[b].cpu_addr = devm_ioremap_resource(&pdev->dev, &res); diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 5a5e927..5dcc9bf 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -635,19 +635,18 @@ qcom_glink_alloc_intent(struct qcom_glink *glink, unsigned long flags; intent = kzalloc(sizeof(*intent), GFP_KERNEL); - if (!intent) return NULL; intent->data = kzalloc(size, GFP_KERNEL); if (!intent->data) - return NULL; + goto free_intent; spin_lock_irqsave(&channel->intent_lock, flags); ret = idr_alloc_cyclic(&channel->liids, intent, 1, -1, GFP_ATOMIC); if (ret < 0) { spin_unlock_irqrestore(&channel->intent_lock, flags); - return NULL; + goto free_data; } spin_unlock_irqrestore(&channel->intent_lock, flags); @@ -656,6 +655,12 @@ qcom_glink_alloc_intent(struct qcom_glink *glink, intent->reuse = reuseable; return intent; + +free_data: + kfree(intent->data); +free_intent: + kfree(intent); + return NULL; } static void qcom_glink_handle_rx_done(struct qcom_glink *glink, @@ -1197,7 +1202,7 @@ static int qcom_glink_request_intent(struct qcom_glink *glink, ret = qcom_glink_tx(glink, &cmd, sizeof(cmd), NULL, 0, true); if (ret) - return ret; + goto unlock; ret = wait_for_completion_timeout(&channel->intent_req_comp, 10 * HZ); if (!ret) { @@ -1207,6 +1212,7 @@ static int qcom_glink_request_intent(struct qcom_glink *glink, ret = channel->intent_req_result ? 0 : -ECANCELED; } +unlock: mutex_unlock(&channel->intent_req_lock); return ret; } diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 785fb42..2799a6b 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3767,7 +3767,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) */ if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) { pr_err("write_pending failed since: %d\n", vscsi->flags); - return 0; + return -EIO; } rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 5203258..31d31aa 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -383,11 +383,11 @@ static void fc_rport_work(struct work_struct *work) fc_rport_enter_flogi(rdata); mutex_unlock(&rdata->rp_mutex); } else { + mutex_unlock(&rdata->rp_mutex); FC_RPORT_DBG(rdata, "work delete\n"); mutex_lock(&lport->disc.disc_mutex); list_del_rcu(&rdata->peers); mutex_unlock(&lport->disc.disc_mutex); - mutex_unlock(&rdata->rp_mutex); kref_put(&rdata->kref, fc_rport_destroy); } } else { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index bd4605a..f8dc160 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1728,7 +1728,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE; + sc->result = DID_REQUEUE << 16; goto fault; } @@ -2851,9 +2851,6 @@ EXPORT_SYMBOL_GPL(iscsi_session_setup); /** * iscsi_session_teardown - destroy session, host, and cls_session * @cls_session: iscsi session - * - * The driver must have called iscsi_remove_session before - * calling this. */ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) { @@ -2863,6 +2860,8 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) iscsi_pool_free(&session->cmdpool); + iscsi_remove_session(cls_session); + kfree(session->password); kfree(session->password_in); kfree(session->username); @@ -2877,7 +2876,8 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) kfree(session->portal_type); kfree(session->discovery_parent_type); - iscsi_destroy_session(cls_session); + iscsi_free_session(cls_session); + iscsi_host_dec_session_cnt(shost); module_put(owner); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5b2437a..9372098 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3175,6 +3175,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->can_queue, base_vha->req, base_vha->mgmt_svr_loop_id, host->sg_tablesize); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + if (ha->mqenable) { bool mq = false; bool startit = false; @@ -3223,7 +3225,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ qla2xxx_wake_dpc(base_vha); - INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error); if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) { diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index e7818af..15590a0 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -956,6 +956,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags & BLIST_NO_DIF) sdev->no_dif = 1; + if (*bflags & BLIST_UNMAP_LIMIT_WS) + sdev->unmap_limit_for_ws = 1; + sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; if (*bflags & BLIST_TRY_VPD_PAGES) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index bf53356..f796bd6 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1376,13 +1376,19 @@ static void __scsi_remove_target(struct scsi_target *starget) spin_lock_irqsave(shost->host_lock, flags); restart: list_for_each_entry(sdev, &shost->__devices, siblings) { + /* + * We cannot call scsi_device_get() here, as + * we might've been called from rmmod() causing + * scsi_device_get() to fail the module_is_live() + * check. + */ if (sdev->channel != starget->channel || sdev->id != starget->id || - scsi_device_get(sdev)) + !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); scsi_remove_device(sdev); - scsi_device_put(sdev); + put_device(&sdev->sdev_gendev); spin_lock_irqsave(shost->host_lock, flags); goto restart; } diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index cbd4495..8c46a6d 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3320,6 +3320,9 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd) { struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); + if (WARN_ON_ONCE(!rport)) + return FAST_IO_FAIL; + return fc_block_rport(rport); } EXPORT_SYMBOL(fc_block_scsi_eh); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0190aef..7404d26 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2211,22 +2211,6 @@ void iscsi_free_session(struct iscsi_cls_session *session) EXPORT_SYMBOL_GPL(iscsi_free_session); /** - * iscsi_destroy_session - destroy iscsi session - * @session: iscsi_session - * - * Can be called by a LLD or iscsi_transport. There must not be - * any running connections. - */ -int iscsi_destroy_session(struct iscsi_cls_session *session) -{ - iscsi_remove_session(session); - ISCSI_DBG_TRANS_SESSION(session, "Completing session destruction\n"); - iscsi_free_session(session); - return 0; -} -EXPORT_SYMBOL_GPL(iscsi_destroy_session); - -/** * iscsi_create_conn - create iscsi class connection * @session: iscsi cls session * @dd_size: private driver data size diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fb9f8b5..d175c5c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -715,13 +715,21 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) break; case SD_LBP_WS16: - max_blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS16_BLOCKS); + if (sdkp->device->unmap_limit_for_ws) + max_blocks = sdkp->max_unmap_blocks; + else + max_blocks = sdkp->max_ws_blocks; + + max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS10: - max_blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS10_BLOCKS); + if (sdkp->device->unmap_limit_for_ws) + max_blocks = sdkp->max_unmap_blocks; + else + max_blocks = sdkp->max_ws_blocks; + + max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS10_BLOCKS); break; case SD_LBP_ZERO: @@ -3099,8 +3107,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_security(sdkp, buffer); } - sdkp->first_scan = 0; - /* * We now have all cache related info, determine how we deal * with flush requests. @@ -3115,7 +3121,7 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); /* - * Use the device's preferred I/O size for reads and writes + * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, or * garbage. */ @@ -3129,8 +3135,19 @@ static int sd_revalidate_disk(struct gendisk *disk) rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), (sector_t)BLK_DEF_MAX_SECTORS); - /* Combine with controller limits */ - q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); + /* Do not exceed controller limit */ + rw_max = min(rw_max, queue_max_hw_sectors(q)); + + /* + * Only update max_sectors if previously unset or if the current value + * exceeds the capabilities of the hardware. + */ + if (sdkp->first_scan || + q->limits.max_sectors > q->limits.max_dev_sectors || + q->limits.max_sectors > q->limits.max_hw_sectors) + q->limits.max_sectors = rw_max; + + sdkp->first_scan = 0; set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c index d96f451..b55e5eb 100644 --- a/drivers/staging/media/imx/imx-media-dev.c +++ b/drivers/staging/media/imx/imx-media-dev.c @@ -400,10 +400,10 @@ static int imx_media_create_pad_vdev_lists(struct imx_media_dev *imxmd) struct media_link, list); ret = imx_media_add_vdev_to_pad(imxmd, vdev, link->source); if (ret) - break; + return ret; } - return ret; + return 0; } /* async subdev complete notifier */ diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 2fe216b..84a8ac2 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -694,10 +694,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - if (!WARN_ON(disc == N_TTY)) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; - } + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; } return retval; } @@ -752,8 +750,9 @@ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit) if (tty->ldisc) { if (reinit) { - if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0) - tty_ldisc_reinit(tty, N_TTY); + if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && + tty_ldisc_reinit(tty, N_TTY) < 0) + WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); } else tty_ldisc_kill(tty); } diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index dd74c99..5d061b3 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2026,6 +2026,8 @@ static DEVICE_ATTR_RO(suspended); static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) { struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct usb_gadget_strings *gstr = cdev->driver->strings[0]; + struct usb_string *dev_str = gstr->strings; /* composite_disconnect() must already have been called * by the underlying peripheral controller driver! @@ -2045,6 +2047,9 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) composite_dev_cleanup(cdev); + if (dev_str[USB_GADGET_MANUFACTURER_IDX].s == cdev->def_manufacturer) + dev_str[USB_GADGET_MANUFACTURER_IDX].s = ""; + kfree(cdev->def_manufacturer); kfree(cdev); set_gadget_data(gadget, NULL); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index a22a892..aeb9f3c 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1143,11 +1143,12 @@ static struct configfs_attribute *interf_grp_attrs[] = { NULL }; -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner) +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner) { struct config_group *os_desc_group; struct config_item_type *os_desc_type, *interface_type; @@ -1159,7 +1160,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, char *vlabuf = kzalloc(vla_group_size(data_chunk), GFP_KERNEL); if (!vlabuf) - return -ENOMEM; + return ERR_PTR(-ENOMEM); os_desc_group = vla_ptr(vlabuf, data_chunk, os_desc_group); os_desc_type = vla_ptr(vlabuf, data_chunk, os_desc_type); @@ -1184,7 +1185,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, configfs_add_default_group(&d->group, os_desc_group); } - return 0; + return os_desc_group; } EXPORT_SYMBOL(usb_os_desc_prepare_interf_dir); diff --git a/drivers/usb/gadget/configfs.h b/drivers/usb/gadget/configfs.h index 36c468c..540d5e9 100644 --- a/drivers/usb/gadget/configfs.h +++ b/drivers/usb/gadget/configfs.h @@ -5,11 +5,12 @@ void unregister_gadget_item(struct config_item *item); -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner); +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner); static inline struct usb_os_desc *to_usb_os_desc(struct config_item *item) { diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index e1d5853..c7c5b3c 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -908,6 +908,7 @@ static void rndis_free_inst(struct usb_function_instance *f) free_netdev(opts->net); } + kfree(opts->rndis_interf_group); /* single VLA chunk */ kfree(opts); } @@ -916,6 +917,7 @@ static struct usb_function_instance *rndis_alloc_inst(void) struct f_rndis_opts *opts; struct usb_os_desc *descs[1]; char *names[1]; + struct config_group *rndis_interf_group; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) @@ -940,8 +942,14 @@ static struct usb_function_instance *rndis_alloc_inst(void) names[0] = "rndis"; config_group_init_type_name(&opts->func_inst.group, "", &rndis_func_type); - usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, - names, THIS_MODULE); + rndis_interf_group = + usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, + names, THIS_MODULE); + if (IS_ERR(rndis_interf_group)) { + rndis_free_inst(&opts->func_inst); + return ERR_CAST(rndis_interf_group); + } + opts->rndis_interf_group = rndis_interf_group; return &opts->func_inst; } diff --git a/drivers/usb/gadget/function/u_rndis.h b/drivers/usb/gadget/function/u_rndis.h index a35ee3c..efdb7ac 100644 --- a/drivers/usb/gadget/function/u_rndis.h +++ b/drivers/usb/gadget/function/u_rndis.h @@ -26,6 +26,7 @@ struct f_rndis_opts { bool bound; bool borrowed_net; + struct config_group *rndis_interf_group; struct usb_os_desc rndis_os_desc; char rndis_ext_compat_id[16]; diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index b17618a..f04e91e 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -419,6 +419,7 @@ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) static void set_link_state(struct dummy_hcd *dum_hcd) { struct dummy *dum = dum_hcd->dum; + unsigned int power_bit; dum_hcd->active = 0; if (dum->pullup) @@ -429,17 +430,19 @@ static void set_link_state(struct dummy_hcd *dum_hcd) return; set_link_state_by_speed(dum_hcd); + power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ? + USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 || dum_hcd->active) dum_hcd->resuming = 0; /* Currently !connected or in reset */ - if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0 || + if ((dum_hcd->port_status & power_bit) == 0 || (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) { - unsigned disconnect = USB_PORT_STAT_CONNECTION & + unsigned int disconnect = power_bit & dum_hcd->old_status & (~dum_hcd->port_status); - unsigned reset = USB_PORT_STAT_RESET & + unsigned int reset = USB_PORT_STAT_RESET & (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index eee82ca..b3fc602 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -202,12 +202,13 @@ found: return tmp; } - if (in) { + if (in) dev->in_pipe = usb_rcvbulkpipe(udev, in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); + if (out) dev->out_pipe = usb_sndbulkpipe(udev, out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - } + if (iso_in) { dev->iso_in = &iso_in->desc; dev->in_iso_pipe = usb_rcvisocpipe(udev, @@ -1964,6 +1965,9 @@ test_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param, int status = 0; struct urb *urbs[param->sglen]; + if (!param->sglen || param->iterations > UINT_MAX / param->sglen) + return -EINVAL; + memset(&context, 0, sizeof(context)); context.count = param->iterations * param->sglen; context.dev = dev; @@ -2087,6 +2091,8 @@ usbtest_do_ioctl(struct usb_interface *intf, struct usbtest_param_32 *param) if (param->iterations <= 0) return -EINVAL; + if (param->sglen > MAX_SGLEN) + return -EINVAL; /* * Just a bunch of test cases that every HCD is expected to handle. * diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c index 5fe4a57..ccc2bf5 100644 --- a/drivers/usb/phy/phy-tegra-usb.c +++ b/drivers/usb/phy/phy-tegra-usb.c @@ -329,6 +329,14 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) unsigned long val; void __iomem *base = phy->regs; + /* + * The USB driver may have already initiated the phy clock + * disable so wait to see if the clock turns off and if not + * then proceed with gating the clock. + */ + if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, 0) == 0) + return; + if (phy->is_legacy_phy) { val = readl(base + USB_SUSP_CTRL); val |= USB_SUSP_SET; @@ -351,6 +359,15 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy) unsigned long val; void __iomem *base = phy->regs; + /* + * The USB driver may have already initiated the phy clock + * enable so wait to see if the clock turns on and if not + * then proceed with ungating the clock. + */ + if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, + USB_PHY_CLK_VALID) == 0) + return; + if (phy->is_legacy_phy) { val = readl(base + USB_SUSP_CTRL); val |= USB_SUSP_CLR; diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 68f2690..50285b0 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -857,9 +857,9 @@ static void xfer_work(struct work_struct *work) fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); usbhs_pipe_running(pipe, 1); - usbhsf_dma_start(pipe, fifo); usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); dma_async_issue_pending(chan); + usbhsf_dma_start(pipe, fifo); usbhs_pipe_enable(pipe); xfer_work_end: diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index fdf8980..43a862a 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -186,6 +186,7 @@ static int usb_console_setup(struct console *co, char *options) tty_kref_put(tty); reset_open_count: port->port.count = 0; + info->port = NULL; usb_autopm_put_interface(serial->interface); error_get_interface: usb_serial_put(serial); @@ -265,7 +266,7 @@ static struct console usbcons = { void usb_serial_console_disconnect(struct usb_serial *serial) { - if (serial->port[0] == usbcons_info.port) { + if (serial->port[0] && serial->port[0] == usbcons_info.port) { usb_serial_console_exit(); usb_serial_put(serial); } diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 2d945c9..412f812 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -177,6 +177,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ @@ -352,6 +353,7 @@ static struct usb_serial_driver * const serial_drivers[] = { #define CP210X_PARTNUM_CP2104 0x04 #define CP210X_PARTNUM_CP2105 0x05 #define CP210X_PARTNUM_CP2108 0x08 +#define CP210X_PARTNUM_UNKNOWN 0xFF /* CP210X_GET_COMM_STATUS returns these 0x13 bytes */ struct cp210x_comm_status { @@ -1491,8 +1493,11 @@ static int cp210x_attach(struct usb_serial *serial) result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST, CP210X_GET_PARTNUM, &priv->partnum, sizeof(priv->partnum)); - if (result < 0) - goto err_free_priv; + if (result < 0) { + dev_warn(&serial->interface->dev, + "querying part number failed\n"); + priv->partnum = CP210X_PARTNUM_UNKNOWN; + } usb_set_serial_data(serial, priv); @@ -1505,10 +1510,6 @@ static int cp210x_attach(struct usb_serial *serial) } return 0; -err_free_priv: - kfree(priv); - - return result; } static void cp210x_disconnect(struct usb_serial *serial) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1cec037..49d1b2d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1015,6 +1015,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4fcf1ce..f9d15bd 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -610,6 +610,13 @@ #define ADI_GNICEPLUS_PID 0xF001 /* + * Cypress WICED USB UART + */ +#define CYPRESS_VID 0x04B4 +#define CYPRESS_WICED_BT_USB_PID 0x009B +#define CYPRESS_WICED_WL_USB_PID 0xF900 + +/* * Microchip Technology, Inc. * * MICROCHIP_VID (0x04D8) and MICROCHIP_USB_BOARD_PID (0x000A) are diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 54bfef1..ba672cf 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -522,6 +522,7 @@ static void option_instat_callback(struct urb *urb); /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 +#define TPLINK_PRODUCT_LTE 0x000D #define TPLINK_PRODUCT_MA180 0x0201 /* Changhong products */ @@ -2011,6 +2012,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, + { USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, TPLINK_PRODUCT_LTE, 0xff, 0x00, 0x00) }, /* TP-Link LTE Module */ { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index ebc0bee..eb99289 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -174,6 +174,10 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81cf)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index adaf6f6..e1cbdfd 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -310,9 +310,13 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - if (unlikely(copied < len && !PageUptodate(page))) { - copied = 0; - goto out; + if (!PageUptodate(page)) { + if (unlikely(copied < len)) { + copied = 0; + goto out; + } else if (len == PAGE_SIZE) { + SetPageUptodate(page); + } } /* * No need to use i_size_read() here, the i_size diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 2a46762..a7c5a98 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; - if (e->flags & MISC_FMT_OPEN_FILE) + if (e && e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); clear_inode(inode); diff --git a/fs/block_dev.c b/fs/block_dev.c index 93d088f..789f55e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); - if (result) + if (result) { end_page_writeback(page); - else + } else { + clean_page_buffers(page); unlock_page(page); + } blk_queue_exit(bdev->bd_queue); return result; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 899ddae..8fc6903 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -722,7 +722,7 @@ struct btrfs_delayed_root; * Indicate that a whole-filesystem exclusive operation is running * (device replace, resize, device add/delete, balance) */ -#define BTRFS_FS_EXCL_OP 14 +#define BTRFS_FS_EXCL_OP 16 struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 12ab19a..970190c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, } } - bio = btrfs_bio_alloc(bdev, sector << 9); + bio = btrfs_bio_alloc(bdev, (u64)sector << 9); bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84edfc6..f23c820 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -734,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode = req->r_inode; ihold(inode); } else { - /* req->r_dentry is non-null for LSSNAP request. - * fall-thru */ - WARN_ON_ONCE(!req->r_dentry); + /* req->r_dentry is non-null for LSSNAP request */ + rcu_read_lock(); + inode = get_nonsnap_parent(req->r_dentry); + rcu_read_unlock(); + dout("__choose_mds using snapdir's parent %p\n", inode); } - } - if (!inode && req->r_dentry) { + } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ struct dentry *parent; struct inode *dir; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 1ffc8b4..7fc0b85 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm, realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); - if (realm->cached_context) { - ceph_put_snap_context(realm->cached_context); - /* queue realm for cap_snap creation */ - list_add_tail(&realm->dirty_item, dirty_realms); - } + ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; + /* queue realm for cap_snap creation */ + list_add_tail(&realm->dirty_item, dirty_realms); return 0; fail: diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812..b53e66d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -45,6 +45,12 @@ #define DIO_PAGES 64 /* + * Flags for dio_complete() + */ +#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */ +#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */ + +/* * This code generally works in units of "dio_blocks". A dio_block is * somewhere between the hard sector size and the filesystem block size. it * is determined on a per-invocation basis. When talking to the filesystem @@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; @@ -259,14 +265,27 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + if (dio->end_io) { + // XXX: ki_pos?? + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source * of the write was an mmap'ed region of the file we're writing. Either * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. */ - if (ret > 0 && dio->op == REQ_OP_WRITE && + if (flags & DIO_COMPLETE_INVALIDATE && + ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -274,18 +293,10 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) WARN_ON_ONCE(err); } - if (dio->end_io) { - - // XXX: ki_pos?? - err = dio->end_io(dio->iocb, offset, ret, dio->private); - if (err) - ret = err; - } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); - if (is_async) { + if (flags & DIO_COMPLETE_ASYNC) { /* * generic_write_sync expects ki_pos to have been updated * already, but the submission path only does this for @@ -306,7 +317,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE); } static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); @@ -348,7 +359,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC); } } } @@ -866,7 +877,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); put_page(sdio->cur_page); sdio->cur_page = NULL; } @@ -1359,7 +1371,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, retval, false); + retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE); } else BUG_ON(retval != -EIOCBQUEUED); @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a7c903..4b4a72f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2525,7 +2525,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 621b9b3..c695ff4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1210,11 +1210,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, false); + __wait_discard_cmd(sbi, !umount); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -2244,7 +2244,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, false); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89f61eb..933c3d5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, true); if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { @@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); + loff_t offset = iocb->ki_pos; ssize_t ret; - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (!dio->error && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - iocb->ki_pos >> PAGE_SHIFT, - (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) if (likely(!ret)) { ret = dio->size; /* check for short read */ - if (iocb->ki_pos + ret > dio->i_size && + if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) - ret = dio->i_size - iocb->ki_pos; + ret = dio->i_size - offset; iocb->ki_pos += ret; } + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + int err; + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(err); + } + inode_dio_end(file_inode(iocb->ki_filp)); kfree(dio); @@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) try_to_free_buffers(page); } +/* + * For situations where we want to clean all buffers attached to a page. + * We don't need to calculate how many buffers are attached to the page, + * we just need to specify a number larger than the maximum number of buffers. + */ +void clean_page_buffers(struct page *page) +{ + clean_buffers(page, ~0U); +} + static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { @@ -605,10 +615,8 @@ alloc_new: if (bio == NULL) { if (first_unmapped == blocks_per_page) { if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), - page, wbc)) { - clean_buffers(page, first_unmapped); + page, wbc)) goto out; - } } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); diff --git a/fs/namespace.c b/fs/namespace.c index 54059b1..3b601f1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file) /* File refers to upper, writable layer? */ upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); - if (upperdentry && file_inode(file) == d_inode(upperdentry)) + if (upperdentry && + (file_inode(file) == d_inode(upperdentry) || + file_inode(file) == d_inode(dentry))) return 0; /* Lower layer: can't write to real file, sorry... */ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index efebe6c..22880ef 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -218,7 +218,6 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) static void pnfs_init_server(struct nfs_server *server) { rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); - rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); } #else @@ -888,6 +887,7 @@ struct nfs_server *nfs_alloc_server(void) ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); + rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); return server; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 44c638b..508126e 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -745,7 +745,8 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); dprintk("--> %s\n", __func__); - nfs4_fl_put_deviceid(fl->dsaddr); + if (fl->dsaddr != NULL) + nfs4_fl_put_deviceid(fl->dsaddr); /* This assumes a single RW lseg */ if (lseg->pls_range.iomode == IOMODE_RW) { struct nfs4_filelayout *flo; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index dd5d27d..30426c1 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -274,7 +274,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, ssize_t ret; ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); - if (ret <= 0) + if (ret < 0) return ERR_PTR(ret); rkey = request_key(&key_type_id_resolver, desc, ""); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6c61e2b..f90090e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8399,8 +8399,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, lo = NFS_I(inode)->layout; /* If the open stateid was bad, then recover it. */ if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || - nfs4_stateid_match_other(&lgp->args.stateid, - &lgp->args.ctx->state->stateid)) { + !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { spin_unlock(&inode->i_lock); exception->state = lgp->args.ctx->state; exception->stateid = &lgp->args.stateid; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 37c8af0..14ed979 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1842,8 +1842,8 @@ static void encode_create_session(struct xdr_stream *xdr, * Assumes OPEN is the biggest non-idempotent compound. * 2 is the verifier. */ - max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + - RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; + max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + 2) + * XDR_UNIT + RPC_MAX_AUTH_SIZE; encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3c69db7..8487486 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -927,6 +927,13 @@ nfsd4_secinfo_release(union nfsd4_op_u *u) exp_put(u->secinfo.si_exp); } +static void +nfsd4_secinfo_no_name_release(union nfsd4_op_u *u) +{ + if (u->secinfo_no_name.sin_exp) + exp_put(u->secinfo_no_name.sin_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2375,7 +2382,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, - .op_release = nfsd4_secinfo_release, + .op_release = nfsd4_secinfo_no_name_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aad97b3..c441f93 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) c->tmpfile = true; err = ovl_copy_up_locked(c); } else { - err = -EIO; - if (lock_rename(c->workdir, c->destdir) != NULL) { - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - } else { + err = ovl_lock_rename_workdir(c->workdir, c->destdir); + if (!err) { err = ovl_copy_up_locked(c); unlock_rename(c->workdir, c->destdir); } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 3309b19..cc961a3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -216,26 +216,6 @@ out_unlock: return err; } -static int ovl_lock_rename_workdir(struct dentry *workdir, - struct dentry *upperdir) -{ - /* Workdir should not be the same as upperdir */ - if (workdir == upperdir) - goto err; - - /* Workdir should not be subdir of upperdir and vice versa */ - if (lock_rename(workdir, upperdir) != NULL) - goto err_unlock; - - return 0; - -err_unlock: - unlock_rename(workdir, upperdir); -err: - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - return -EIO; -} - static struct dentry *ovl_clear_empty(struct dentry *dentry, struct list_head *list) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index c3addd1..654bea1 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -506,6 +506,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { + err = PTR_ERR(index); pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d4e8c1a..c706a6f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry, bool *locked); void ovl_nlink_end(struct dentry *dentry, bool locked); +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); static inline bool ovl_is_impuredir(struct dentry *dentry) { diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 878a750..25d9b5a 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -37,6 +37,9 @@ struct ovl_fs { bool noxattr; /* sb common to all layers */ struct super_block *same_sb; + /* Did we take the inuse lock? */ + bool upperdir_locked; + bool workdir_locked; }; /* private information held for every overlayfs dentry */ diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 62e9b22..0f85ee9 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, struct path *lowerstack, unsigned int numlower) { int err; + struct dentry *index = NULL; struct inode *dir = dentry->d_inode; struct path path = { .mnt = mnt, .dentry = dentry }; LIST_HEAD(list); @@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, inode_lock_nested(dir, I_MUTEX_PARENT); list_for_each_entry(p, &list, l_node) { - struct dentry *index; - if (p->name[0] == '.') { if (p->len == 1) continue; @@ -1018,6 +1017,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, index = lookup_one_len(p->name, dentry, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); + index = NULL; break; } err = ovl_verify_index(index, lowerstack, numlower); @@ -1029,7 +1029,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, break; } dput(index); + index = NULL; } + dput(index); inode_unlock(dir); out: ovl_cache_free(&list); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fd5ea4f..092d150 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,9 +211,10 @@ static void ovl_put_super(struct super_block *sb) dput(ufs->indexdir); dput(ufs->workdir); - ovl_inuse_unlock(ufs->workbasedir); + if (ufs->workdir_locked) + ovl_inuse_unlock(ufs->workbasedir); dput(ufs->workbasedir); - if (ufs->upper_mnt) + if (ufs->upper_mnt && ufs->upperdir_locked) ovl_inuse_unlock(ufs->upper_mnt->mnt_root); mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) @@ -881,9 +882,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_put_upperpath; err = -EBUSY; - if (!ovl_inuse_trylock(upperpath.dentry)) { - pr_err("overlayfs: upperdir is in-use by another mount\n"); + if (ovl_inuse_trylock(upperpath.dentry)) { + ufs->upperdir_locked = true; + } else if (ufs->config.index) { + pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); goto out_put_upperpath; + } else { + pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); } err = ovl_mount_dir(ufs->config.workdir, &workpath); @@ -901,9 +906,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } err = -EBUSY; - if (!ovl_inuse_trylock(workpath.dentry)) { - pr_err("overlayfs: workdir is in-use by another mount\n"); + if (ovl_inuse_trylock(workpath.dentry)) { + ufs->workdir_locked = true; + } else if (ufs->config.index) { + pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); goto out_put_workpath; + } else { + pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); } ufs->workbasedir = workpath.dentry; @@ -1156,11 +1165,13 @@ out_put_lowerpath: out_free_lowertmp: kfree(lowertmp); out_unlock_workdentry: - ovl_inuse_unlock(workpath.dentry); + if (ufs->workdir_locked) + ovl_inuse_unlock(workpath.dentry); out_put_workpath: path_put(&workpath); out_unlock_upperdentry: - ovl_inuse_unlock(upperpath.dentry); + if (ufs->upperdir_locked) + ovl_inuse_unlock(upperpath.dentry); out_put_upperpath: path_put(&upperpath); out_free_config: diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 1177945..b9b239f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry) } } -/* Called must hold OVL_I(inode)->oi_lock */ +/* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; @@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry) err = PTR_ERR(index); if (!IS_ERR(index)) err = ovl_cleanup(dir, index); + else + index = NULL; + inode_unlock(dir); if (err) goto fail; @@ -557,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked) mutex_unlock(&OVL_I(d_inode(dentry))->lock); } } + +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) +{ + /* Workdir should not be the same as upperdir */ + if (workdir == upperdir) + goto err; + + /* Workdir should not be subdir of upperdir and vice versa */ + if (lock_rename(workdir, upperdir) != NULL) + goto err_unlock; + + return 0; + +err_unlock: + unlock_rename(workdir, upperdir); +err: + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + return -EIO; +} diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 50b0556..52ad151 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1297,21 +1297,18 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - goto add; + goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; - if (flags & DQUOT_SPACE_NOFAIL) - goto add; - if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1322,7 +1319,7 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1338,13 +1335,21 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, * be always printed */ ret = -EDQUOT; - goto out; + goto finish; } } -add: - dquot->dq_dqb.dqb_rsvspace += rsv_space; - dquot->dq_dqb.dqb_curspace += space; -out: +finish: + /* + * We have to be careful and go through warning generation & grace time + * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it + * only here... + */ + if (flags & DQUOT_SPACE_NOFAIL) + ret = 0; + if (!ret) { + dquot->dq_dqb.dqb_rsvspace += rsv_space; + dquot->dq_dqb.dqb_curspace += space; + } spin_unlock(&dquot->dq_dqb_lock); return ret; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 744dcae..f965ce8 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1584,6 +1584,10 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto error0; + } xfs_trans_binval(args->tp, bp); } args->len = 1; @@ -2141,6 +2145,10 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto out_agbp_relse; + } xfs_trans_binval(tp, bp); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 044a363..8926379 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1477,14 +1477,14 @@ xfs_bmap_isaeof( int is_empty; int error; - bma->aeof = 0; + bma->aeof = false; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); if (error) return error; if (is_empty) { - bma->aeof = 1; + bma->aeof = true; return 0; } @@ -3852,6 +3852,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a..502e0d8 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 988bb3f..dfd6439 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1962,7 +1962,7 @@ xfs_difree_inobt( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { - xic->deleted = 1; + xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); @@ -1989,7 +1989,7 @@ xfs_difree_inobt( xfs_difree_inode_chunk(mp, agno, &rec, dfops); } else { - xic->deleted = 0; + xic->deleted = false; error = xfs_inobt_update(cur, &rec); if (error) { diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8372e9b..71de185 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format { uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ + uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format { int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; -typedef struct xfs_inode_log_format_32 { - uint16_t ilf_type; /* inode log item type */ - uint16_t ilf_size; /* size of this item */ - uint32_t ilf_fields; /* flags for fields logged */ - uint16_t ilf_asize; /* size of attr d/ext/root */ - uint16_t ilf_dsize; /* size of data/ext/root */ - uint64_t ilf_ino; /* inode number */ - union { - uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ - } ilf_u; - int64_t ilf_blkno; /* blkno of inode buffer */ - int32_t ilf_len; /* len of inode buffer */ - int32_t ilf_boffset; /* off of inode in buffer */ -} __attribute__((packed)) xfs_inode_log_format_32_t; - -typedef struct xfs_inode_log_format_64 { +/* + * Old 32 bit systems will log in this format without the 64 bit + * alignment padding. Recovery will detect this and convert it to the + * correct format. + */ +struct xfs_inode_log_format_32 { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ - uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 { int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_64_t; +} __attribute__((packed)); /* diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7034e17..3354140 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode) int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + umode_t mode; + bool set_mode = false; int error = 0; if (!acl) @@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; if (type == ACL_TYPE_ACCESS) { - umode_t mode; - error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - error = xfs_set_mode(inode, mode); - if (error) - return error; + set_mode = true; } set_acl: - return __xfs_set_acl(inode, acl, type); + error = __xfs_set_acl(inode, acl, type); + if (error) + return error; + + /* + * We set the mode after successfully updating the ACL xattr because the + * xattr update can fail at ENOSPC and we don't want to change the mode + * if the ACL update hasn't been applied. + */ + if (set_mode) + error = xfs_set_mode(inode, mode); + + return error; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e593..a3eeaba 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -446,6 +446,19 @@ xfs_imap_valid( { offset >>= inode->i_blkbits; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(imap, XFS_I(inode)); + return offset >= imap->br_startoff && offset < imap->br_startoff + imap->br_blockcount; } @@ -735,6 +748,14 @@ xfs_vm_invalidatepage( { trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + + /* + * If we are invalidating the entire page, clear the dirty state from it + * so that we can check for attempts to release dirty cached pages in + * xfs_vm_releasepage(). + */ + if (offset == 0 && length >= PAGE_SIZE) + cancel_dirty_page(page); block_invalidatepage(page, offset, length); } @@ -1190,25 +1211,27 @@ xfs_vm_releasepage( * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty - * but otherwise have invalidated buffers. + * block_invalidatepage() can send pages that are still marked dirty but + * otherwise have invalidated buffers. * * We want to release the latter to avoid unnecessary buildup of the - * LRU, skip the former and warn if we've left any lingering - * delalloc/unwritten buffers on clean pages. Skip pages with delalloc - * or unwritten buffers and warn if the page is not dirty. Otherwise - * try to release the buffers. + * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages + * that are entirely invalidated and need to be released. Hence the + * only time we should get dirty pages here is through + * shrink_active_list() and so we can simply skip those now. + * + * warn if we've left any lingering delalloc/unwritten buffers on clean + * or invalidated pages we are about to release. */ + if (PageDirty(page)) + return 0; + xfs_count_page_state(page, &delalloc, &unwritten); - if (delalloc) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(delalloc)) return 0; - } - if (unwritten) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(unwritten)) return 0; - } return try_to_free_buffers(page); } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index ebd66b1..e3a950e 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -302,6 +302,8 @@ xfs_attr3_node_inactive( &bp, XFS_ATTR_FORK); if (error) return error; + node = bp->b_addr; + btree = dp->d_ops->node_tree_p(node); child_fsb = be32_to_cpu(btree[i + 1].before); xfs_trans_brelse(*trans, bp); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index bc6c6e1..6503cfa 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -84,6 +84,7 @@ xfs_zero_extent( GFP_NOFS, 0); } +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -190,6 +191,7 @@ xfs_bmap_rtalloc( } return 0; } +#endif /* CONFIG_XFS_RT */ /* * Check if the endoff is outside the last extent. If so the caller will grow @@ -2122,11 +2124,31 @@ xfs_swap_extents( ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; + } + + /* Swap the cow forks. */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + xfs_extnum_t extnum; + + ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); + ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); + + extnum = ip->i_cnextents; + ip->i_cnextents = tip->i_cnextents; + tip->i_cnextents = extnum; + cowfp = ip->i_cowfp; ip->i_cowfp = tip->i_cowfp; tip->i_cowfp = cowfp; - xfs_inode_set_cowblocks_tag(ip); - xfs_inode_set_cowblocks_tag(tip); + + if (ip->i_cowfp && ip->i_cnextents) + xfs_inode_set_cowblocks_tag(ip); + else + xfs_inode_clear_cowblocks_tag(ip); + if (tip->i_cowfp && tip->i_cnextents) + xfs_inode_set_cowblocks_tag(tip); + else + xfs_inode_clear_cowblocks_tag(tip); } xfs_trans_log_inode(tp, ip, src_log_flags); diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 0eaa81d..7d330b3 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -28,7 +28,20 @@ struct xfs_mount; struct xfs_trans; struct xfs_bmalloca; +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); +#else /* !CONFIG_XFS_RT */ +/* + * Attempts to allocate RT extents when RT is disable indicates corruption and + * should trigger a shutdown. + */ +static inline int +xfs_bmap_rtalloc(struct xfs_bmalloca *ap) +{ + return -EFSCORRUPTED; +} +#endif /* CONFIG_XFS_RT */ + int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 309e26c..56d0e52 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -764,7 +764,7 @@ xfs_file_fallocate( enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL; loff_t new_size = 0; - bool do_file_insert = 0; + bool do_file_insert = false; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -825,7 +825,7 @@ xfs_file_fallocate( error = -EINVAL; goto out_unlock; } - do_file_insert = 1; + do_file_insert = true; } else { flags |= XFS_PREALLOC_SET; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 814ed729..43cfc07 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); } -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_daddr_t rec_daddr; - - rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); - - irec.rm_startblock = rec->ar_startblock; - irec.rm_blockcount = rec->ar_blockcount; - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); -} - /* Transform a bnobt irec into a fsmap */ STATIC int xfs_getfsmap_datadev_bnobt_helper( @@ -475,6 +452,30 @@ xfs_getfsmap_logdev( return xfs_getfsmap_helper(tp, info, &rmap, 0); } +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + /* Execute a getfsmap query against the realtime device. */ STATIC int __xfs_getfsmap_rtdev( @@ -561,6 +562,7 @@ xfs_getfsmap_rtdev_rtbitmap( return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, info); } +#endif /* CONFIG_XFS_RT */ /* Execute a getfsmap query against the regular data device. */ STATIC int @@ -795,7 +797,15 @@ xfs_getfsmap_check_keys( return false; } +/* + * There are only two devices if we didn't configure RT devices at build time. + */ +#ifdef CONFIG_XFS_RT #define XFS_GETFSMAP_DEVS 3 +#else +#define XFS_GETFSMAP_DEVS 2 +#endif /* CONFIG_XFS_RT */ + /* * Get filesystem's extents as described in head, and format for * output. Calls formatter to fill the user's buffer until all @@ -853,10 +863,12 @@ xfs_getfsmap( handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_getfsmap_logdev; } +#ifdef CONFIG_XFS_RT if (mp->m_rtdev_targp) { handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; } +#endif /* CONFIG_XFS_RT */ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), xfs_getfsmap_dev_compare); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index a705f34..9bbc2d7 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -364,6 +364,9 @@ xfs_inode_to_log_dinode( to->di_dmstate = from->di_dmstate; to->di_flags = from->di_flags; + /* log a dummy value to ensure log structure is fully initialised */ + to->di_next_unlinked = NULLAGINO; + if (from->di_version == 3) { to->di_changecount = inode->i_version; to->di_crtime.t_sec = from->di_crtime.t_sec; @@ -404,6 +407,11 @@ xfs_inode_item_format_core( * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. + * + * Note: Always use the 64 bit inode log format structure so we don't + * leave an uninitialised hole in the format item on 64 bit systems. Log + * recovery on 32 bit systems handles this just fine, so there's no reason + * for not using an initialising the properly padded structure all the time. */ STATIC void xfs_inode_item_format( @@ -412,8 +420,8 @@ xfs_inode_item_format( { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + struct xfs_inode_log_format *ilf; ASSERT(ip->i_d.di_version > 1); @@ -425,7 +433,17 @@ xfs_inode_item_format( ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + + /* + * make sure we don't leak uninitialised data into the log in the case + * when we don't log every field in the inode. + */ + ilf->ilf_dsize = 0; + ilf->ilf_asize = 0; + ilf->ilf_pad = 0; + uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null); + + xlog_finish_iovec(lv, vecp, sizeof(*ilf)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); @@ -855,44 +873,29 @@ xfs_istale_done( } /* - * convert an xfs_inode_log_format struct from either 32 or 64 bit versions - * (which can have different field alignments) to the native version + * convert an xfs_inode_log_format struct from the old 32 bit version + * (which can have different field alignments) to the native 64 bit version */ int xfs_inode_item_format_convert( - xfs_log_iovec_t *buf, - xfs_inode_log_format_t *in_f) + struct xfs_log_iovec *buf, + struct xfs_inode_log_format *in_f) { - if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { - xfs_inode_log_format_32_t *in_f32 = buf->i_addr; - - in_f->ilf_type = in_f32->ilf_type; - in_f->ilf_size = in_f32->ilf_size; - in_f->ilf_fields = in_f32->ilf_fields; - in_f->ilf_asize = in_f32->ilf_asize; - in_f->ilf_dsize = in_f32->ilf_dsize; - in_f->ilf_ino = in_f32->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f32->ilf_blkno; - in_f->ilf_len = in_f32->ilf_len; - in_f->ilf_boffset = in_f32->ilf_boffset; - return 0; - } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ - xfs_inode_log_format_64_t *in_f64 = buf->i_addr; - - in_f->ilf_type = in_f64->ilf_type; - in_f->ilf_size = in_f64->ilf_size; - in_f->ilf_fields = in_f64->ilf_fields; - in_f->ilf_asize = in_f64->ilf_asize; - in_f->ilf_dsize = in_f64->ilf_dsize; - in_f->ilf_ino = in_f64->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f64->ilf_blkno; - in_f->ilf_len = in_f64->ilf_len; - in_f->ilf_boffset = in_f64->ilf_boffset; - return 0; - } - return -EFSCORRUPTED; + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + + if (buf->i_len != sizeof(*in_f32)) + return -EFSCORRUPTED; + + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; + in_f->ilf_fields = in_f32->ilf_fields; + in_f->ilf_asize = in_f32->ilf_asize; + in_f->ilf_dsize = in_f32->ilf_dsize; + in_f->ilf_ino = in_f32->ilf_ino; + /* copy biggest field of ilf_u */ + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); + in_f->ilf_blkno = in_f32->ilf_blkno; + in_f->ilf_len = in_f32->ilf_len; + in_f->ilf_boffset = in_f32->ilf_boffset; + return 0; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c5107c7..dc95a49 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2515,7 +2515,7 @@ next_lv: if (lv) vecp = lv->lv_iovecp; } - if (record_cnt == 0 && ordered == false) { + if (record_cnt == 0 && !ordered) { if (!lv) return 0; break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ea7d4b4..e9727d0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -704,7 +704,7 @@ xfs_mountfs( xfs_set_maxicount(mp); /* enable fail_at_unmount as default */ - mp->m_fail_unmount = 1; + mp->m_fail_unmount = true; error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 0c381d7..0492436 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); - XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3246815..37e603b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -736,7 +736,13 @@ xfs_reflink_end_cow( /* If there is a hole at end_fsb - 1 go to the previous extent */ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || got.br_startoff > end_fsb) { - ASSERT(idx > 0); + /* + * In case of racing, overlapping AIO writes no COW extents + * might be left by the time I/O completes for the loser of + * the race. In that case we are done. + */ + if (idx <= 0) + goto out_cancel; xfs_iext_get_extent(ifp, --idx, &got); } @@ -809,6 +815,7 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); +out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859..f1af7d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline int bpf_obj_get_user(const char __user *pathname) +{ + return -EOPNOTSUPP; +} + static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c8dae555..446b24c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); +void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c458d7b..6431087 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1403,7 +1403,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); +void hv_process_channel_removal(u32 relid); void vmbus_setevent(struct vmbus_channel *channel); /* diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0ad4c30..91189bb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -44,6 +44,12 @@ #define STACK_MAGIC 0xdeadbeef +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) /* @a is a power of 2 value */ @@ -57,6 +63,10 @@ #define READ 0 #define WRITE 1 +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ @@ -76,7 +86,15 @@ #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * FIELD_SIZEOF - get the size of a struct's field + * @t: the target struct + * @f: the target struct's field + * Return: the size of @f in the struct definition without having a + * declared instance of @t. + */ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ @@ -107,7 +125,7 @@ /* * Divide positive or negative dividend by positive or negative divisor * and round to closest integer. Result is undefined for negative - * divisors if he dividend variable type is unsigned and for negative + * divisors if the dividend variable type is unsigned and for negative * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ @@ -247,13 +265,13 @@ extern int _cond_resched(void); * @ep_ro: right open interval endpoint * * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, ep_ro), where the upper interval endpoint is right-open. + * range [0, @ep_ro), where the upper interval endpoint is right-open. * This is useful, e.g. for accessing a index of an array containing - * ep_ro elements, for example. Think of it as sort of modulus, only that + * @ep_ro elements, for example. Think of it as sort of modulus, only that * the result isn't that of modulo. ;) Note that if initial input is a * small value, then result will return 0. * - * Return: a result based on val in interval [0, ep_ro). + * Return: a result based on @val in interval [0, @ep_ro). */ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) { @@ -618,8 +636,8 @@ do { \ * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __trace_printk is an internal function for trace_printk and - * the @ip is passed in via the trace_printk macro. + * Note: __trace_printk is an internal function for trace_printk() and + * the @ip is passed in via the trace_printk() macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -629,7 +647,7 @@ do { \ * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_printk() is used) + * allocated when trace_printk() is used.) * * A little optization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. @@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * the @ip is passed in via the trace_puts macro. * * This is similar to trace_printk() but is made for those really fast - * paths that a developer wants the least amount of "Heisenbug" affects, + * paths that a developer wants the least amount of "Heisenbug" effects, * where the processing of the print format is still too much. * * This function allows a kernel developer to debug fast path sections @@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_puts scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_puts() is used) + * allocated when trace_puts() is used.) * * Returns: 0 if nothing was written, positive # if string was. * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) @@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 min2 = (y); \ (void) (&min1 == &min2); \ min1 < min2 ? min1 : min2; }) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define min(x, y) \ __min(typeof(x), typeof(y), \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 max2 = (y); \ (void) (&max1 == &max2); \ max1 > max2 ? max1 : max2; }) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define max(x, y) \ __max(typeof(x), typeof(y), \ __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ x, y) +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define max3(x, y, z) max((typeof(x))max(x, y), z) /** @@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of lo/hi to make sure they are of the - * same type as val. See the unnecessary pointer comparisons. + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. */ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) @@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * * Or not use min/max/clamp at all, of course. */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define min_t(type, x, y) \ __min(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define max_t(type, x, y) \ __max(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of type - * 'type' to make all the comparisons. + * @type to make all the comparisons. */ #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) @@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of whatever - * type the input argument 'val' is. This is useful when val is an unsigned - * type and min and max are literals that will otherwise be assigned a signed + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed * integer type. */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) -/* - * swap - swap value of @a and @b +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value */ #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 46f4ecf5..1861ea8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -445,6 +445,9 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ +#ifdef CONFIG_MEMBARRIER + atomic_t membarrier_state; +#endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f3f2d07..9a43763 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -316,7 +316,7 @@ struct mmc_host { #define MMC_CAP_UHS_SDR50 (1 << 18) /* Host supports UHS SDR50 mode */ #define MMC_CAP_UHS_SDR104 (1 << 19) /* Host supports UHS SDR104 mode */ #define MMC_CAP_UHS_DDR50 (1 << 20) /* Host supports UHS DDR50 mode */ -#define MMC_CAP_NO_BOUNCE_BUFF (1 << 21) /* Disable bounce buffers on host */ +/* (1 << 21) is free for reuse */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2c2a551..528b24c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -108,9 +108,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) -extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table, - const struct nf_hook_ops *); +extern int ebt_register_table(struct net *net, + const struct ebt_table *table, + const struct nf_hook_ops *ops, + struct ebt_table **res); extern void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a36abe2..27e249e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -12,11 +12,31 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_soft_poweroff(void); +void lockup_detector_cleanup(void); +bool is_hardlockup(void); + +extern int watchdog_user_enabled; +extern int nmi_watchdog_user_enabled; +extern int soft_watchdog_user_enabled; +extern int watchdog_thresh; +extern unsigned long watchdog_enabled; + +extern struct cpumask watchdog_cpumask; +extern unsigned long *watchdog_cpumask_bits; +#ifdef CONFIG_SMP +extern int sysctl_softlockup_all_cpu_backtrace; +extern int sysctl_hardlockup_all_cpu_backtrace; #else -static inline void lockup_detector_init(void) -{ -} -#endif +#define sysctl_softlockup_all_cpu_backtrace 0 +#define sysctl_hardlockup_all_cpu_backtrace 0 +#endif /* !CONFIG_SMP */ + +#else /* CONFIG_LOCKUP_DETECTOR */ +static inline void lockup_detector_init(void) { } +static inline void lockup_detector_soft_poweroff(void) { } +static inline void lockup_detector_cleanup(void) { } +#endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); @@ -24,29 +44,17 @@ extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern int soft_watchdog_enabled; -extern atomic_t watchdog_park_in_progress; #else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} +static inline void touch_softlockup_watchdog_sched(void) { } +static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) { } +static inline void touch_all_softlockup_watchdogs(void) { } #endif #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else -static inline void reset_hung_task_detector(void) -{ -} +static inline void reset_hung_task_detector(void) { } #endif /* @@ -54,12 +62,12 @@ static inline void reset_hung_task_detector(void) * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * - * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' - * are variables that are only used as an 'interface' between the parameters - * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The - * 'watchdog_thresh' variable is handled differently because its value is not - * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' - * is equal zero. + * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and + * 'soft_watchdog_user_enabled' are variables that are only used as an + * 'interface' between the parameters in /proc/sys/kernel and the internal + * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is + * handled differently because its value is not boolean, and the lockup + * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 @@ -73,17 +81,41 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +# define NMI_WATCHDOG_SYSCTL_PERM 0644 +#else +# define NMI_WATCHDOG_SYSCTL_PERM 0444 +#endif + #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); +extern void hardlockup_detector_perf_stop(void); +extern void hardlockup_detector_perf_restart(void); +extern void hardlockup_detector_perf_disable(void); +extern void hardlockup_detector_perf_enable(void); +extern void hardlockup_detector_perf_cleanup(void); +extern int hardlockup_detector_perf_init(void); #else -#if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline void hardlockup_detector_perf_stop(void) { } +static inline void hardlockup_detector_perf_restart(void) { } +static inline void hardlockup_detector_perf_disable(void) { } +static inline void hardlockup_detector_perf_enable(void) { } +static inline void hardlockup_detector_perf_cleanup(void) { } +# if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} +# else +static inline int hardlockup_detector_perf_init(void) { return 0; } +# endif #endif -#endif + +void watchdog_nmi_stop(void); +void watchdog_nmi_start(void); +int watchdog_nmi_probe(void); /** * touch_nmi_watchdog - restart NMI watchdog timeout. - * + * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. @@ -153,22 +185,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu) u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int nmi_watchdog_enabled; -extern int watchdog_user_enabled; -extern int watchdog_thresh; -extern unsigned long watchdog_enabled; -extern struct cpumask watchdog_cpumask; -extern unsigned long *watchdog_cpumask_bits; -extern int __read_mostly watchdog_suspended; -#ifdef CONFIG_SMP -extern int sysctl_softlockup_all_cpu_backtrace; -extern int sysctl_hardlockup_all_cpu_backtrace; -#else -#define sysctl_softlockup_all_cpu_backtrace 0 -#define sysctl_hardlockup_all_cpu_backtrace 0 -#endif - #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); @@ -176,7 +192,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -188,18 +203,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_cpumask(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int lockup_detector_suspend(void); -extern void lockup_detector_resume(void); -#else -static inline int lockup_detector_suspend(void) -{ - return 0; -} - -static inline void lockup_detector_resume(void) -{ -} -#endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> diff --git a/include/linux/of.h b/include/linux/of.h index cfc34117..b240ed6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu, return NULL; } +static inline int of_n_addr_cells(struct device_node *np) +{ + return 0; + +} +static inline int of_n_size_cells(struct device_node *np) +{ + return 0; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b1fd8bf..2bea1d5 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -276,7 +276,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, #define list_entry_rcu(ptr, type, member) \ container_of(lockless_dereference(ptr), type, member) -/** +/* * Where are list_empty_rcu() and list_first_entry_rcu()? * * Implementing those functions following their counterparts list_empty() and diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de50d8a..1a9f70d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -523,7 +523,7 @@ static inline void rcu_preempt_sleep_check(void) { } * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the - * pointer from changing. Please note that this primitive does -not- + * pointer from changing. Please note that this primitive does *not* * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. @@ -568,7 +568,7 @@ static inline void rcu_preempt_sleep_check(void) { } * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows: - * + * `` * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -579,6 +579,7 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); + *`` */ #define rcu_pointer_handoff(p) (p) @@ -778,18 +779,21 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * - * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent - * RCU readers from concurrently accessing this pointer -or- + * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to - * readers either at compile time or via rcu_assign_pointer() -and- - * a. You have not made -any- reader-visible changes to - * this structure since then -or- + * readers either at compile time or via rcu_assign_pointer() *and* + * + * a. You have not made *any* reader-visible changes to + * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to @@ -805,7 +809,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the - * external-to-structure pointer -after- you have completely initialized + * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no @@ -819,6 +823,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae53e41..ab9bf7b 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_MEMBARRIER +enum { + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_SWITCH_MM = (1U << 1), +}; + +static inline void membarrier_execve(struct task_struct *t) +{ + atomic_set(&t->mm->membarrier_state, 0); +} +#else +static inline void membarrier_execve(struct task_struct *t) +{ +} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index d7b6dab..7d065ab 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -71,14 +71,6 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; - - /* - * Some variables from the most recent sd_lb_stats for this domain, - * used by wake_affine(). - */ - unsigned long nr_running; - unsigned long load; - unsigned long capacity; }; struct sched_domain { diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index 12910cf..c149aa7 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) } void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *); +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *); #endif diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 39af9bc..62be896 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? + * @sp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 905d769..5f7eeab 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,7 +42,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#ifdef CONFIG_DEBUG_STACK_USAGE +#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ __GFP_ZERO) #else diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 82e93ee..67c5a9f 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -192,6 +192,7 @@ struct scsi_device { unsigned no_dif:1; /* T10 PI (DIF) should be disabled */ unsigned broken_fua:1; /* Don't set FUA bit */ unsigned lun_in_cdb:1; /* Store LUN bits in CDB[1] */ + unsigned unmap_limit_for_ws:1; /* Use the UNMAP limit for WRITE SAME */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 9592570..36b0301 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -29,5 +29,6 @@ #define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */ #define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */ #define BLIST_MAX_1024 0x40000000 /* maximum 1024 sector cdb length */ +#define BLIST_UNMAP_LIMIT_WS 0x80000000 /* Use UNMAP limit for WRITE SAME */ #endif diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 6183d20..b266d2a 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -434,7 +434,6 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, unsigned int target_id); extern void iscsi_remove_session(struct iscsi_cls_session *session); extern void iscsi_free_session(struct iscsi_cls_session *session); -extern int iscsi_destroy_session(struct iscsi_cls_session *session); extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, int dd_size, uint32_t cid); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); diff --git a/include/sound/control.h b/include/sound/control.h index bd7246d..a1f1152 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -248,6 +248,9 @@ int snd_ctl_add_vmaster_hook(struct snd_kcontrol *kctl, void *private_data); void snd_ctl_sync_vmaster(struct snd_kcontrol *kctl, bool hook_only); #define snd_ctl_sync_vmaster_hook(kctl) snd_ctl_sync_vmaster(kctl, true) +int snd_ctl_apply_vmaster_slaves(struct snd_kcontrol *kctl, + int (*func)(struct snd_kcontrol *, void *), + void *arg); /* * Helper functions for jack-detection controls diff --git a/include/sound/hda_verbs.h b/include/sound/hda_verbs.h index d0509db..f89cd5e 100644 --- a/include/sound/hda_verbs.h +++ b/include/sound/hda_verbs.h @@ -95,6 +95,7 @@ enum { #define AC_VERB_SET_EAPD_BTLENABLE 0x70c #define AC_VERB_SET_DIGI_CONVERT_1 0x70d #define AC_VERB_SET_DIGI_CONVERT_2 0x70e +#define AC_VERB_SET_DIGI_CONVERT_3 0x73e #define AC_VERB_SET_VOLUME_KNOB_CONTROL 0x70f #define AC_VERB_SET_GPIO_DATA 0x715 #define AC_VERB_SET_GPIO_MASK 0x716 diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h index a03acd0..695257a 100644 --- a/include/sound/seq_virmidi.h +++ b/include/sound/seq_virmidi.h @@ -60,6 +60,7 @@ struct snd_virmidi_dev { int port; /* created/attached port */ unsigned int flags; /* SNDRV_VIRMIDI_* */ rwlock_t filelist_lock; + struct rw_semaphore filelist_sem; struct list_head filelist; }; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 412c06a..ccaea52 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -269,9 +269,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 36 +#define DM_VERSION_MINOR 37 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2017-06-09)" +#define DM_VERSION_EXTRA "-ioctl (2017-09-20)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index 6d47b32..4e01ad7 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -52,21 +52,30 @@ * (non-running threads are de facto in such a * state). This only covers threads from the * same processes as the caller thread. This - * command returns 0. The "expedited" commands - * complete faster than the non-expedited ones, - * they never block, but have the downside of - * causing extra overhead. + * command returns 0 on success. The + * "expedited" commands complete faster than + * the non-expedited ones, they never block, + * but have the downside of causing extra + * overhead. A process needs to register its + * intent to use the private expedited command + * prior to using it, otherwise this command + * returns -EPERM. + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + * Register the process intent to use + * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always + * returns 0. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ - MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index b97725a..da161b5 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -23,6 +23,7 @@ enum xt_bpf_modes { XT_BPF_MODE_FD_PINNED, XT_BPF_MODE_FD_ELF, }; +#define XT_BPF_MODE_PATH_PINNED XT_BPF_MODE_FD_PINNED struct xt_bpf_info_v1 { __u16 mode; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index e833ed9..be1dde9 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -363,6 +363,7 @@ out: putname(pname); return ret; } +EXPORT_SYMBOL_GPL(bpf_obj_get_user); static void bpf_evict_inode(struct inode *inode) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b914fbe..8b8d6ba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -653,6 +653,10 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) { struct bpf_verifier_state *parent = state->parent; + if (regno == BPF_REG_FP) + /* We don't need to worry about FP liveness because it's read-only */ + return; + while (parent) { /* if read wasn't screened by an earlier write ... */ if (state->regs[regno].live & REG_LIVE_WRITTEN) @@ -2345,6 +2349,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * copy register state to dest reg */ regs[insn->dst_reg] = regs[insn->src_reg]; + regs[insn->dst_reg].live |= REG_LIVE_WRITTEN; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 8de11a2..d851df2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -24,6 +24,7 @@ #include <linux/lockdep.h> #include <linux/tick.h> #include <linux/irq.h> +#include <linux/nmi.h> #include <linux/smpboot.h> #include <linux/relay.h> #include <linux/slab.h> @@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, out: cpus_write_unlock(); + /* + * Do post unplug cleanup. This is still protected against + * concurrent CPU hotplug via cpu_add_remove_lock. + */ + lockup_detector_cleanup(); return ret; } diff --git a/kernel/exit.c b/kernel/exit.c index f2cd53e..cf28528 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1610,6 +1610,9 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, if (!infop) return err; + if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) + goto Efault; + user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); @@ -1735,6 +1738,9 @@ COMPAT_SYSCALL_DEFINE5(waitid, if (!infop) return err; + if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) + goto Efault; + user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); diff --git a/kernel/fork.c b/kernel/fork.c index e702cb9..07cc743 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -215,6 +215,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; +#ifdef CONFIG_DEBUG_KMEMLEAK + /* Clear stale pointers from reused stack. */ + memset(s->addr, 0, THREAD_SIZE); +#endif tsk->stack_vm_area = s; return s->addr; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6fc89fd..5a2ef92c 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -265,8 +265,8 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) irq_setup_affinity(desc); break; case IRQ_STARTUP_MANAGED: + irq_do_set_affinity(d, aff, false); ret = __irq_startup(desc); - irq_set_affinity_locked(d, aff, false); break; case IRQ_STARTUP_ABORT: return 0; diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 638eb9c..9eb09ae 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -18,8 +18,34 @@ static inline bool irq_needs_fixup(struct irq_data *d) { const struct cpumask *m = irq_data_get_effective_affinity_mask(d); + unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(smp_processor_id(), m); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + /* + * The cpumask_empty() check is a workaround for interrupt chips, + * which do not implement effective affinity, but the architecture has + * enabled the config switch. Use the general affinity mask instead. + */ + if (cpumask_empty(m)) + m = irq_data_get_affinity_mask(d); + + /* + * Sanity check. If the mask is not empty when excluding the outgoing + * CPU then it must contain at least one online CPU. The outgoing CPU + * has been removed from the online mask already. + */ + if (cpumask_any_but(m, cpu) < nr_cpu_ids && + cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) { + /* + * If this happens then there was a missed IRQ fixup at some + * point. Warn about it and enforce fixup. + */ + pr_warn("Eff. affinity %*pbl of IRQ %u contains only offline CPUs after offlining CPU %u\n", + cpumask_pr_args(m), d->irq, cpu); + return true; + } +#endif + return cpumask_test_cpu(cpu, m); } static bool migrate_one_irq(struct irq_desc *desc) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d00132b..4bff6a1 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -168,6 +168,19 @@ void irq_set_thread_affinity(struct irq_desc *desc) set_bit(IRQTF_AFFINITY, &action->thread_flags); } +static void irq_validate_effective_affinity(struct irq_data *data) +{ +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + const struct cpumask *m = irq_data_get_effective_affinity_mask(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (!cpumask_empty(m)) + return; + pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", + chip->name, data->irq); +#endif +} + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -175,12 +188,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); case IRQ_SET_MASK_OK_NOCOPY: + irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); ret = 0; } diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index b9628e4..bf8c8fd 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -830,6 +830,41 @@ int klp_register_patch(struct klp_patch *patch) } EXPORT_SYMBOL_GPL(klp_register_patch); +/* + * Remove parts of patches that touch a given kernel module. The list of + * patches processed might be limited. When limit is NULL, all patches + * will be handled. + */ +static void klp_cleanup_module_patches_limited(struct module *mod, + struct klp_patch *limit) +{ + struct klp_patch *patch; + struct klp_object *obj; + + list_for_each_entry(patch, &klp_patches, list) { + if (patch == limit) + break; + + klp_for_each_object(patch, obj) { + if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) + continue; + + /* + * Only unpatch the module if the patch is enabled or + * is in transition. + */ + if (patch->enabled || patch == klp_transition_patch) { + pr_notice("reverting patch '%s' on unloading module '%s'\n", + patch->mod->name, obj->mod->name); + klp_unpatch_object(obj); + } + + klp_free_object_loaded(obj); + break; + } + } +} + int klp_module_coming(struct module *mod) { int ret; @@ -894,7 +929,7 @@ err: pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n", patch->mod->name, obj->mod->name, obj->mod->name); mod->klp_alive = false; - klp_free_object_loaded(obj); + klp_cleanup_module_patches_limited(mod, patch); mutex_unlock(&klp_mutex); return ret; @@ -902,9 +937,6 @@ err: void klp_module_going(struct module *mod) { - struct klp_patch *patch; - struct klp_object *obj; - if (WARN_ON(mod->state != MODULE_STATE_GOING && mod->state != MODULE_STATE_COMING)) return; @@ -917,25 +949,7 @@ void klp_module_going(struct module *mod) */ mod->klp_alive = false; - list_for_each_entry(patch, &klp_patches, list) { - klp_for_each_object(patch, obj) { - if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) - continue; - - /* - * Only unpatch the module if the patch is enabled or - * is in transition. - */ - if (patch->enabled || patch == klp_transition_patch) { - pr_notice("reverting patch '%s' on unloading module '%s'\n", - patch->mod->name, obj->mod->name); - klp_unpatch_object(obj); - } - - klp_free_object_loaded(obj); - break; - } - } + klp_cleanup_module_patches_limited(mod, NULL); mutex_unlock(&klp_mutex); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 44c8d0d..e36e652 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1873,10 +1873,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, struct held_lock *next, int distance, struct stack_trace *trace, int (*save)(struct stack_trace *trace)) { + struct lock_list *uninitialized_var(target_entry); struct lock_list *entry; - int ret; struct lock_list this; - struct lock_list *uninitialized_var(target_entry); + int ret; /* * Prove that the new <prev> -> <next> dependency would not @@ -1890,8 +1890,17 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(next); this.parent = NULL; ret = check_noncircular(&this, hlock_class(prev), &target_entry); - if (unlikely(!ret)) + if (unlikely(!ret)) { + if (!trace->entries) { + /* + * If @save fails here, the printing might trigger + * a WARN but because of the !nr_entries it should + * not do bad things. + */ + save(trace); + } return print_circular_bug(&this, target_entry, next, prev, trace); + } else if (unlikely(ret < 0)) return print_bfs_bug(ret); @@ -1938,7 +1947,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return print_bfs_bug(ret); - if (save && !save(trace)) + if (!trace->entries && !save(trace)) return 0; /* @@ -1958,20 +1967,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!ret) return 0; - /* - * Debugging printouts: - */ - if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { - graph_unlock(); - printk("\n new dependency: "); - print_lock_name(hlock_class(prev)); - printk(KERN_CONT " => "); - print_lock_name(hlock_class(next)); - printk(KERN_CONT "\n"); - dump_stack(); - if (!graph_lock()) - return 0; - } return 2; } @@ -1986,8 +1981,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; struct held_lock *hlock; - struct stack_trace trace; - int (*save)(struct stack_trace *trace) = save_trace; + struct stack_trace trace = { + .nr_entries = 0, + .max_entries = 0, + .entries = NULL, + .skip = 0, + }; /* * Debugging checks. @@ -2018,18 +2017,11 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) */ if (hlock->read != 2 && hlock->check) { int ret = check_prev_add(curr, hlock, next, - distance, &trace, save); + distance, &trace, save_trace); if (!ret) return 0; /* - * Stop saving stack_trace if save_trace() was - * called at least once: - */ - if (save && ret == 2) - save = NULL; - - /* * Stop after the first non-trylock entry, * as non-trylock entries have added their * own direct dependencies already, so this diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 3e2b4f5..ccd2d20 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -120,22 +120,26 @@ static void s2idle_loop(void) * frozen processes + suspended devices + idle processors. * Thus s2idle_enter() should be called right after * all devices have been suspended. + * + * Wakeups during the noirq suspend of devices may be spurious, + * so prevent them from terminating the loop right away. */ error = dpm_noirq_suspend_devices(PMSG_SUSPEND); if (!error) s2idle_enter(); + else if (error == -EBUSY && pm_wakeup_pending()) + error = 0; - dpm_noirq_resume_devices(PMSG_RESUME); - if (error && (error != -EBUSY || !pm_wakeup_pending())) { - dpm_noirq_end(); - break; - } - - if (s2idle_ops && s2idle_ops->wake) + if (!error && s2idle_ops && s2idle_ops->wake) s2idle_ops->wake(); + dpm_noirq_resume_devices(PMSG_RESUME); + dpm_noirq_end(); + if (error) + break; + if (s2idle_ops && s2idle_ops->sync) s2idle_ops->sync(); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 729a870..6d58800 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, /** * call_srcu() - Queue a callback for invocation after an SRCU grace period * @sp: srcu_struct in queue the callback - * @head: structure to be used for queueing the SRCU callback. + * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * * The callback function will be invoked some time after a full SRCU diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 50d1861..3f943ef 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) } /** + * rcu_sync_enter_start - Force readers onto slow path for multiple updates + * @rsp: Pointer to rcu_sync structure to use for synchronization + * * Must be called after rcu_sync_init() and before first use. * * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() @@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) /** * rcu_sync_func() - Callback function managing reader access to fastpath - * @rsp: Pointer to rcu_sync structure to use for synchronization + * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization * * This function is passed to one of the call_rcu() functions by * rcu_sync_exit(), so that it is invoked after a grace period following the @@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp) * rcu_sync_exit(). Otherwise, set all state back to idle so that readers * can again use their fastpaths. */ -static void rcu_sync_func(struct rcu_head *rcu) +static void rcu_sync_func(struct rcu_head *rhp) { - struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); + struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; BUG_ON(rsp->gp_state != GP_PASSED); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b0ad62b..3e3650e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, * read-side critical sections have completed. call_rcu_sched() assumes * that the read-side critical sections end on enabling of preemption * or on voluntary preemption. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR - * - anything that disables preemption. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR + * - anything that disables preemption. * * These may be nested. * @@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); * handler. This means that read-side critical sections in process * context must not be interrupted by softirqs. This interface is to be * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * + * These may be nested. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 70ba32e..d3f3094 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5356,91 +5356,62 @@ static int wake_wide(struct task_struct *p) return 1; } -struct llc_stats { - unsigned long nr_running; - unsigned long load; - unsigned long capacity; - int has_capacity; -}; +/* + * The purpose of wake_affine() is to quickly determine on which CPU we can run + * soonest. For the purpose of speed we only consider the waking and previous + * CPU. + * + * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or + * will be) idle. + * + * wake_affine_weight() - considers the weight to reflect the average + * scheduling latency of the CPUs. This seems to work + * for the overloaded case. + */ -static bool get_llc_stats(struct llc_stats *stats, int cpu) +static bool +wake_affine_idle(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int prev_cpu, int sync) { - struct sched_domain_shared *sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - - if (!sds) - return false; + if (idle_cpu(this_cpu)) + return true; - stats->nr_running = READ_ONCE(sds->nr_running); - stats->load = READ_ONCE(sds->load); - stats->capacity = READ_ONCE(sds->capacity); - stats->has_capacity = stats->nr_running < per_cpu(sd_llc_size, cpu); + if (sync && cpu_rq(this_cpu)->nr_running == 1) + return true; - return true; + return false; } -/* - * Can a task be moved from prev_cpu to this_cpu without causing a load - * imbalance that would trigger the load balancer? - * - * Since we're running on 'stale' values, we might in fact create an imbalance - * but recomputing these values is expensive, as that'd mean iteration 2 cache - * domains worth of CPUs. - */ static bool -wake_affine_llc(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int prev_cpu, int sync) +wake_affine_weight(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int prev_cpu, int sync) { - struct llc_stats prev_stats, this_stats; s64 this_eff_load, prev_eff_load; unsigned long task_load; - if (!get_llc_stats(&prev_stats, prev_cpu) || - !get_llc_stats(&this_stats, this_cpu)) - return false; + this_eff_load = target_load(this_cpu, sd->wake_idx); + prev_eff_load = source_load(prev_cpu, sd->wake_idx); - /* - * If sync wakeup then subtract the (maximum possible) - * effect of the currently running task from the load - * of the current LLC. - */ if (sync) { unsigned long current_load = task_h_load(current); - /* in this case load hits 0 and this LLC is considered 'idle' */ - if (current_load > this_stats.load) + if (current_load > this_eff_load) return true; - this_stats.load -= current_load; + this_eff_load -= current_load; } - /* - * The has_capacity stuff is not SMT aware, but by trying to balance - * the nr_running on both ends we try and fill the domain at equal - * rates, thereby first consuming cores before siblings. - */ - - /* if the old cache has capacity, stay there */ - if (prev_stats.has_capacity && prev_stats.nr_running < this_stats.nr_running+1) - return false; - - /* if this cache has capacity, come here */ - if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running) - return true; - - /* - * Check to see if we can move the load without causing too much - * imbalance. - */ task_load = task_h_load(p); - this_eff_load = 100; - this_eff_load *= prev_stats.capacity; - - prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; - prev_eff_load *= this_stats.capacity; + this_eff_load += task_load; + if (sched_feat(WA_BIAS)) + this_eff_load *= 100; + this_eff_load *= capacity_of(prev_cpu); - this_eff_load *= this_stats.load + task_load; - prev_eff_load *= prev_stats.load - task_load; + prev_eff_load -= task_load; + if (sched_feat(WA_BIAS)) + prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; + prev_eff_load *= capacity_of(this_cpu); return this_eff_load <= prev_eff_load; } @@ -5449,22 +5420,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int prev_cpu, int sync) { int this_cpu = smp_processor_id(); - bool affine; + bool affine = false; - /* - * Default to no affine wakeups; wake_affine() should not effect a task - * placement the load-balancer feels inclined to undo. The conservative - * option is therefore to not move tasks when they wake up. - */ - affine = false; + if (sched_feat(WA_IDLE) && !affine) + affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync); - /* - * If the wakeup is across cache domains, try to evaluate if movement - * makes sense, otherwise rely on select_idle_siblings() to do - * placement inside the cache domain. - */ - if (!cpus_share_cache(prev_cpu, this_cpu)) - affine = wake_affine_llc(sd, p, this_cpu, prev_cpu, sync); + if (sched_feat(WA_WEIGHT) && !affine) + affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync); schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts); if (affine) { @@ -7600,7 +7562,6 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) */ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { - struct sched_domain_shared *shared = env->sd->shared; struct sched_domain *child = env->sd->child; struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; @@ -7672,22 +7633,6 @@ next_group: if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload; } - - if (!shared) - return; - - /* - * Since these are sums over groups they can contain some CPUs - * multiple times for the NUMA domains. - * - * Currently only wake_affine_llc() and find_busiest_group() - * uses these numbers, only the last is affected by this problem. - * - * XXX fix that. - */ - WRITE_ONCE(shared->nr_running, sds->total_running); - WRITE_ONCE(shared->load, sds->total_load); - WRITE_ONCE(shared->capacity, sds->total_capacity); } /** @@ -8098,6 +8043,13 @@ static int should_we_balance(struct lb_env *env) int cpu, balance_cpu = -1; /* + * Ensure the balancing environment is consistent; can happen + * when the softirq triggers 'during' hotplug. + */ + if (!cpumask_test_cpu(env->dst_cpu, env->cpus)) + return 0; + + /* * In the newly idle case, we will allow all the cpu's * to do the newly idle load balance. */ diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d3fb155..319ed0e 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -81,3 +81,6 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) +SCHED_FEAT(WA_IDLE, true) +SCHED_FEAT(WA_WEIGHT, true) +SCHED_FEAT(WA_BIAS, true) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc..dd79087 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -18,6 +18,7 @@ #include <linux/membarrier.h> #include <linux/tick.h> #include <linux/cpumask.h> +#include <linux/atomic.h> #include "sched.h" /* for cpu_rq(). */ @@ -26,21 +27,26 @@ * except MEMBARRIER_CMD_QUERY. */ #define MEMBARRIER_CMD_BITMASK \ - (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) + (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ } -static void membarrier_private_expedited(void) +static int membarrier_private_expedited(void) { int cpu; bool fallback = false; cpumask_var_t tmpmask; + if (!(atomic_read(¤t->mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) + return -EPERM; + if (num_online_cpus() == 1) - return; + return 0; /* * Matches memory barriers around rq->curr modification in @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) * rq->curr modification in scheduler. */ smp_mb(); /* exit from system call is not a mb */ + return 0; +} + +static void membarrier_register_private_expedited(void) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + /* + * We need to consider threads belonging to different thread + * groups, which use the same mm. (CLONE_VM but not + * CLONE_THREAD). + */ + if (atomic_read(&mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) + return; + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, + &mm->membarrier_state); } /** @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) synchronize_sched(); return 0; case MEMBARRIER_CMD_PRIVATE_EXPEDITED: - membarrier_private_expedited(); + return membarrier_private_expedited(); + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + membarrier_register_private_expedited(); return 0; default: return -EINVAL; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bb3a380..0ae832e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -473,7 +473,7 @@ static long seccomp_attach_filter(unsigned int flags, return 0; } -void __get_seccomp_filter(struct seccomp_filter *filter) +static void __get_seccomp_filter(struct seccomp_filter *filter) { /* Reference count is bounded by the number of total processes. */ refcount_inc(&filter->usage); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 1d71c05..5043e74 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); * by the client, but only by calling this function. * This function can only be called on a registered smp_hotplug_thread. */ -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *new) +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *new) { struct cpumask *old = plug_thread->cpumask; - cpumask_var_t tmp; + static struct cpumask tmp; unsigned int cpu; - if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) - return -ENOMEM; - - get_online_cpus(); + lockdep_assert_cpus_held(); mutex_lock(&smpboot_threads_lock); /* Park threads that were exclusively enabled on the old mask. */ - cpumask_andnot(tmp, old, new); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, old, new); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_park_thread(plug_thread, cpu); /* Unpark threads that are exclusively enabled on the new mask. */ - cpumask_andnot(tmp, new, old); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, new, old); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_unpark_thread(plug_thread, cpu); cpumask_copy(old, new); mutex_unlock(&smpboot_threads_lock); - put_online_cpus(); - - free_cpumask_var(tmp); - - return 0; } -EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread); static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4da9e62..d9c31bc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = { #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", - .data = &watchdog_user_enabled, - .maxlen = sizeof (int), - .mode = 0644, + .data = &watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_watchdog, .extra1 = &zero, .extra2 = &one, @@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = { }, { .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, + .data = &nmi_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = NMI_WATCHDOG_SYSCTL_PERM, .proc_handler = proc_nmi_watchdog, .extra1 = &zero, -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) .extra2 = &one, -#else - .extra2 = &zero, -#endif }, { .procname = "watchdog_cpumask", @@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_SOFTLOCKUP_DETECTOR { .procname = "soft_watchdog", - .data = &soft_watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, + .data = &soft_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_soft_watchdog, .extra1 = &zero, .extra2 = &one, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f5d5202..6bcb854 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,20 +29,29 @@ #include <linux/kvm_para.h> #include <linux/kthread.h> -/* Watchdog configuration */ -static DEFINE_MUTEX(watchdog_proc_mutex); - -int __read_mostly nmi_watchdog_enabled; +static DEFINE_MUTEX(watchdog_mutex); #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) -unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | - NMI_WATCHDOG_ENABLED; +# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) +# define NMI_WATCHDOG_DEFAULT 1 #else -unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; +# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) +# define NMI_WATCHDOG_DEFAULT 0 #endif +unsigned long __read_mostly watchdog_enabled; +int __read_mostly watchdog_user_enabled = 1; +int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; +int __read_mostly soft_watchdog_user_enabled = 1; +int __read_mostly watchdog_thresh = 10; +int __read_mostly nmi_watchdog_available; + +struct cpumask watchdog_allowed_mask __read_mostly; + +struct cpumask watchdog_cpumask __read_mostly; +unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); + #ifdef CONFIG_HARDLOCKUP_DETECTOR -/* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: */ @@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic = * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ -void hardlockup_detector_disable(void) +void __init hardlockup_detector_disable(void) { - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 0; } static int __init hardlockup_panic_setup(char *str) @@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 0; else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 1; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); -#endif - -#ifdef CONFIG_SOFTLOCKUP_DETECTOR -int __read_mostly soft_watchdog_enabled; -#endif - -int __read_mostly watchdog_user_enabled; -int __read_mostly watchdog_thresh = 10; - -#ifdef CONFIG_SMP -int __read_mostly sysctl_softlockup_all_cpu_backtrace; +# ifdef CONFIG_SMP int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#endif -struct cpumask watchdog_cpumask __read_mostly; -unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -/* - * The 'watchdog_running' variable is set to 1 when the watchdog threads - * are registered/started and is set to 0 when the watchdog threads are - * unregistered/stopped, so it is an indicator whether the threads exist. - */ -static int __read_mostly watchdog_running; -/* - * If a subsystem has a need to deactivate the watchdog temporarily, it - * can use the suspend/resume interface to achieve this. The content of - * the 'watchdog_suspended' variable reflects this state. Existing threads - * are parked/unparked by the lockup_detector_{suspend|resume} functions - * (see comment blocks pertaining to those functions for further details). - * - * 'watchdog_suspended' also prevents threads from being registered/started - * or unregistered/stopped via parameters in /proc/sys/kernel, so the state - * of 'watchdog_running' cannot change while the watchdog is deactivated - * temporarily (see related code in 'proc' handlers). - */ -int __read_mostly watchdog_suspended; +static int __init hardlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); +# endif /* CONFIG_SMP */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* * These functions can be overridden if an architecture implements its @@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended; */ int __weak watchdog_nmi_enable(unsigned int cpu) { + hardlockup_detector_perf_enable(); return 0; } + void __weak watchdog_nmi_disable(unsigned int cpu) { + hardlockup_detector_perf_disable(); } -/* - * watchdog_nmi_reconfigure can be implemented to be notified after any - * watchdog configuration change. The arch hardlockup watchdog should - * respond to the following variables: - * - nmi_watchdog_enabled +/* Return 0, if a NMI watchdog is available. Error code otherwise */ +int __weak __init watchdog_nmi_probe(void) +{ + return hardlockup_detector_perf_init(); +} + +/** + * watchdog_nmi_stop - Stop the watchdog for reconfiguration + * + * The reconfiguration steps are: + * watchdog_nmi_stop(); + * update_variables(); + * watchdog_nmi_start(); + */ +void __weak watchdog_nmi_stop(void) { } + +/** + * watchdog_nmi_start - Start the watchdog after reconfiguration + * + * Counterpart to watchdog_nmi_stop(). + * + * The following variables have been updated in update_variables() and + * contain the currently valid configuration: + * - watchdog_enabled * - watchdog_thresh * - watchdog_cpumask - * - sysctl_hardlockup_all_cpu_backtrace - * - hardlockup_panic - * - watchdog_suspended */ -void __weak watchdog_nmi_reconfigure(void) +void __weak watchdog_nmi_start(void) { } + +/** + * lockup_detector_update_enable - Update the sysctl enable bit + * + * Caller needs to make sure that the NMI/perf watchdogs are off, so this + * can't race with watchdog_nmi_disable(). + */ +static void lockup_detector_update_enable(void) { + watchdog_enabled = 0; + if (!watchdog_user_enabled) + return; + if (nmi_watchdog_available && nmi_watchdog_user_enabled) + watchdog_enabled |= NMI_WATCHDOG_ENABLED; + if (soft_watchdog_user_enabled) + watchdog_enabled |= SOFT_WATCHDOG_ENABLED; } - #ifdef CONFIG_SOFTLOCKUP_DETECTOR -/* Helper for online, unparked cpus. */ -#define for_each_watchdog_cpu(cpu) \ - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) - -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); +/* Global variables, exported for sysctl */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; +static bool softlockup_threads_initialized __read_mostly; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; -unsigned int __read_mostly softlockup_panic = - CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; - static int __init softlockup_panic_setup(char *str) { softlockup_panic = simple_strtoul(str, NULL, 0); - return 1; } __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); static int __init nosoftlockup_setup(char *str) { - watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; + soft_watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); #ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; + static int __init softlockup_all_cpu_backtrace_setup(char *str) { - sysctl_softlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); + sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static int __init hardlockup_all_cpu_backtrace_setup(char *str) -{ - sysctl_hardlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); - return 1; -} -__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); -#endif #endif +static void __lockup_detector_cleanup(void); + /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally @@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void) int cpu; /* - * this is done lockless - * do we care if a 0 races with a timestamp? - * all it means is the softlock check starts one cycle later + * watchdog_mutex cannpt be taken here, as this might be called + * from (soft)interrupt context, so the access to + * watchdog_allowed_cpumask might race with a concurrent update. + * + * The watchdog time stamp can race against a concurrent real + * update as well, the only side effect might be a cycle delay for + * the softlockup check. */ - for_each_watchdog_cpu(cpu) + for_each_cpu(cpu, &watchdog_allowed_mask) per_cpu(watchdog_touch_ts, cpu) = 0; wq_watchdog_touch(-1); } @@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } -static int watchdog_enable_all_cpus(void); -static void watchdog_disable_all_cpus(void); - /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - if (atomic_read(&watchdog_park_in_progress) != 0) + if (!watchdog_enabled) return HRTIMER_NORESTART; /* kick the hardlockup detector */ @@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) static void watchdog_enable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); - /* kick off the timer for the hardlockup detector */ + /* + * Start the timer first to prevent the NMI watchdog triggering + * before the timer has a chance to fire. + */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - - /* Enable the perf event */ - watchdog_nmi_enable(cpu); - - /* done here because hrtimer_start can only pin to smp_processor_id() */ hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED); - /* initialize timestamp */ - watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); + /* Initialize timestamp */ __touch_watchdog(); + /* Enable the perf event */ + if (watchdog_enabled & NMI_WATCHDOG_ENABLED) + watchdog_nmi_enable(cpu); + + watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); } static void watchdog_disable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); watchdog_set_prio(SCHED_NORMAL, 0); - hrtimer_cancel(hrtimer); - /* disable the perf event */ + /* + * Disable the perf event first. That prevents that a large delay + * between disabling the timer and disabling the perf event causes + * the perf NMI to detect a false positive. + */ watchdog_nmi_disable(cpu); + hrtimer_cancel(hrtimer); } static void watchdog_cleanup(unsigned int cpu, bool online) @@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu) __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); - - /* - * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the - * failure path. Check for failures that can occur asynchronously - - * for example, when CPUs are on-lined - and shut down the hardware - * perf event on each CPU accordingly. - * - * The only non-obvious place this bit can be cleared is through - * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a - * pr_info here would be too noisy as it would result in a message - * every few seconds if the hardlockup was disabled but the softlockup - * enabled. - */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - watchdog_nmi_disable(cpu); } static struct smp_hotplug_thread watchdog_threads = { @@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = { .unpark = watchdog_enable, }; -/* - * park all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function returns an error if kthread_park() of a watchdog thread - * fails. In this situation, the watchdog threads of some CPUs can already - * be parked and the watchdog threads of other CPUs can still be runnable. - * Callers are expected to handle this special condition as appropriate in - * their context. - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static int watchdog_park_threads(void) +static void softlockup_update_smpboot_threads(void) { - int cpu, ret = 0; + lockdep_assert_held(&watchdog_mutex); - atomic_set(&watchdog_park_in_progress, 1); + if (!softlockup_threads_initialized) + return; - for_each_watchdog_cpu(cpu) { - ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); - if (ret) - break; - } - - atomic_set(&watchdog_park_in_progress, 0); - - return ret; + smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_allowed_mask); } -/* - * unpark all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static void watchdog_unpark_threads(void) +/* Temporarily park all watchdog threads */ +static void softlockup_park_all_threads(void) { - int cpu; - - for_each_watchdog_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + cpumask_clear(&watchdog_allowed_mask); + softlockup_update_smpboot_threads(); } -static int update_watchdog_all_cpus(void) +/* Unpark enabled threads */ +static void softlockup_unpark_threads(void) { - int ret; - - ret = watchdog_park_threads(); - if (ret) - return ret; - - watchdog_unpark_threads(); - - return 0; + cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); + softlockup_update_smpboot_threads(); } -static int watchdog_enable_all_cpus(void) +static void lockup_detector_reconfigure(void) { - int err = 0; - - if (!watchdog_running) { - err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, - &watchdog_cpumask); - if (err) - pr_err("Failed to create watchdog threads, disabled\n"); - else - watchdog_running = 1; - } else { - /* - * Enable/disable the lockup detectors or - * change the sample period 'on the fly'. - */ - err = update_watchdog_all_cpus(); - - if (err) { - watchdog_disable_all_cpus(); - pr_err("Failed to update lockup detectors, disabled\n"); - } - } - - if (err) - watchdog_enabled = 0; - - return err; + cpus_read_lock(); + watchdog_nmi_stop(); + softlockup_park_all_threads(); + set_sample_period(); + lockup_detector_update_enable(); + if (watchdog_enabled && watchdog_thresh) + softlockup_unpark_threads(); + watchdog_nmi_start(); + cpus_read_unlock(); + /* + * Must be called outside the cpus locked section to prevent + * recursive locking in the perf code. + */ + __lockup_detector_cleanup(); } -static void watchdog_disable_all_cpus(void) +/* + * Create the watchdog thread infrastructure and configure the detector(s). + * + * The threads are not unparked as watchdog_allowed_mask is empty. When + * the threads are sucessfully initialized, take the proper locks and + * unpark the threads in the watchdog_cpumask if the watchdog is enabled. + */ +static __init void lockup_detector_setup(void) { - if (watchdog_running) { - watchdog_running = 0; - smpboot_unregister_percpu_thread(&watchdog_threads); - } -} + int ret; -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) -{ - return smpboot_update_cpumask_percpu_thread( - &watchdog_threads, &watchdog_cpumask); -} -#endif + /* + * If sysctl is off and watchdog got disabled on the command line, + * nothing to do here. + */ + lockup_detector_update_enable(); -#else /* SOFTLOCKUP */ -static int watchdog_park_threads(void) -{ - return 0; -} + if (!IS_ENABLED(CONFIG_SYSCTL) && + !(watchdog_enabled && watchdog_thresh)) + return; -static void watchdog_unpark_threads(void) -{ -} + ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, + &watchdog_allowed_mask); + if (ret) { + pr_err("Failed to initialize soft lockup detector threads\n"); + return; + } -static int watchdog_enable_all_cpus(void) -{ - return 0; + mutex_lock(&watchdog_mutex); + softlockup_threads_initialized = true; + lockup_detector_reconfigure(); + mutex_unlock(&watchdog_mutex); } -static void watchdog_disable_all_cpus(void) +#else /* CONFIG_SOFTLOCKUP_DETECTOR */ +static inline int watchdog_park_threads(void) { return 0; } +static inline void watchdog_unpark_threads(void) { } +static inline int watchdog_enable_all_cpus(void) { return 0; } +static inline void watchdog_disable_all_cpus(void) { } +static void lockup_detector_reconfigure(void) { + cpus_read_lock(); + watchdog_nmi_stop(); + lockup_detector_update_enable(); + watchdog_nmi_start(); + cpus_read_unlock(); } - -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) +static inline void lockup_detector_setup(void) { - return 0; + lockup_detector_reconfigure(); } -#endif +#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ -static void set_sample_period(void) +static void __lockup_detector_cleanup(void) { + lockdep_assert_held(&watchdog_mutex); + hardlockup_detector_perf_cleanup(); } -#endif /* SOFTLOCKUP */ -/* - * Suspend the hard and soft lockup detector by parking the watchdog threads. +/** + * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes + * + * Caller must not hold the cpu hotplug rwsem. */ -int lockup_detector_suspend(void) +void lockup_detector_cleanup(void) { - int ret = 0; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - /* - * Multiple suspend requests can be active in parallel (counted by - * the 'watchdog_suspended' variable). If the watchdog threads are - * running, the first caller takes care that they will be parked. - * The state of 'watchdog_running' cannot change while a suspend - * request is active (see related code in 'proc' handlers). - */ - if (watchdog_running && !watchdog_suspended) - ret = watchdog_park_threads(); - - if (ret == 0) - watchdog_suspended++; - else { - watchdog_disable_all_cpus(); - pr_err("Failed to suspend lockup detectors, disabled\n"); - watchdog_enabled = 0; - } - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - - return ret; + mutex_lock(&watchdog_mutex); + __lockup_detector_cleanup(); + mutex_unlock(&watchdog_mutex); } -/* - * Resume the hard and soft lockup detector by unparking the watchdog threads. +/** + * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) + * + * Special interface for parisc. It prevents lockup detector warnings from + * the default pm_poweroff() function which busy loops forever. */ -void lockup_detector_resume(void) +void lockup_detector_soft_poweroff(void) { - mutex_lock(&watchdog_proc_mutex); - - watchdog_suspended--; - /* - * The watchdog threads are unparked if they were previously running - * and if there is no more active suspend request. - */ - if (watchdog_running && !watchdog_suspended) - watchdog_unpark_threads(); - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + watchdog_enabled = 0; } #ifdef CONFIG_SYSCTL -/* - * Update the run state of the lockup detectors. - */ -static int proc_watchdog_update(void) +/* Propagate any changes to the watchdog threads */ +static void proc_watchdog_update(void) { - int err = 0; - - /* - * Watchdog threads won't be started if they are already active. - * The 'watchdog_running' variable in watchdog_*_all_cpus() takes - * care of this. If those threads are already active, the sample - * period will be updated and the lockup detectors will be enabled - * or disabled 'on the fly'. - */ - if (watchdog_enabled && watchdog_thresh) - err = watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); - - watchdog_nmi_reconfigure(); - - return err; - + /* Remove impossible cpus to keep sysctl output clean. */ + cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); + lockup_detector_reconfigure(); } /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * - * caller | table->data points to | 'which' contains the flag(s) - * -------------------|-----------------------|----------------------------- - * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed - * | | with SOFT_WATCHDOG_ENABLED - * -------------------|-----------------------|----------------------------- - * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED - * -------------------|-----------------------|----------------------------- - * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED + * caller | table->data points to | 'which' + * -------------------|----------------------------|-------------------------- + * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED | + * | | SOFT_WATCHDOG_ENABLED + * -------------------|----------------------------|-------------------------- + * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED + * -------------------|----------------------------|-------------------------- + * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED */ static int proc_watchdog_common(int which, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old, new; - int *watchdog_param = (int *)table->data; + int err, old, *param = table->data; - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - - /* - * If the parameter is being read return the state of the corresponding - * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the - * run state of the lockup detectors. - */ if (!write) { - *watchdog_param = (watchdog_enabled & which) != 0; + /* + * On read synchronize the userspace interface. This is a + * racy snapshot. + */ + *param = (watchdog_enabled & which) != 0; err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); } else { + old = READ_ONCE(*param); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (err) - goto out; - - /* - * There is a race window between fetching the current value - * from 'watchdog_enabled' and storing the new value. During - * this race window, watchdog_nmi_enable() can sneak in and - * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. - * The 'cmpxchg' detects this race and the loop retries. - */ - do { - old = watchdog_enabled; - /* - * If the parameter value is not zero set the - * corresponding bit(s), else clear it(them). - */ - if (*watchdog_param) - new = old | which; - else - new = old & ~which; - } while (cmpxchg(&watchdog_enabled, old, new) != old); - - /* - * Update the run state of the lockup detectors. There is _no_ - * need to check the value returned by proc_watchdog_update() - * and to restore the previous value of 'watchdog_enabled' as - * both lockup detectors are disabled if proc_watchdog_update() - * returns an error. - */ - if (old == new) - goto out; - - err = proc_watchdog_update(); + if (!err && old != READ_ONCE(*param)) + proc_watchdog_update(); } -out: - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + mutex_unlock(&watchdog_mutex); return err; } @@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write, int proc_nmi_watchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + if (!nmi_watchdog_available && write) + return -ENOTSUPP; return proc_watchdog_common(NMI_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); } @@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write, int proc_watchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old, new; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); + int err, old; - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } + mutex_lock(&watchdog_mutex); - old = ACCESS_ONCE(watchdog_thresh); + old = READ_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (err || !write) - goto out; - - /* - * Update the sample period. Restore on failure. - */ - new = ACCESS_ONCE(watchdog_thresh); - if (old == new) - goto out; + if (!err && write && old != READ_ONCE(watchdog_thresh)) + proc_watchdog_update(); - set_sample_period(); - err = proc_watchdog_update(); - if (err) { - watchdog_thresh = old; - set_sample_period(); - } -out: - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + mutex_unlock(&watchdog_mutex); return err; } @@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, { int err; - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } + mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); - if (!err && write) { - /* Remove impossible cpus to keep sysctl output cleaner. */ - cpumask_and(&watchdog_cpumask, &watchdog_cpumask, - cpu_possible_mask); - - if (watchdog_running) { - /* - * Failure would be due to being unable to allocate - * a temporary cpumask, so we are likely not in a - * position to do much else to make things better. - */ - if (watchdog_update_cpus() != 0) - pr_err("cpumask update failed\n"); - } + if (!err && write) + proc_watchdog_update(); - watchdog_nmi_reconfigure(); - } -out: - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + mutex_unlock(&watchdog_mutex); return err; } - #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) { - set_sample_period(); - #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_enabled()) { pr_info("Disabling watchdog on nohz_full cores by default\n"); @@ -951,6 +783,7 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, cpu_possible_mask); #endif - if (watchdog_enabled) - watchdog_enable_all_cpus(); + if (!watchdog_nmi_probe()) + nmi_watchdog_available = true; + lockup_detector_setup(); } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 3a09ea1..71a62ce 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -21,8 +21,10 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; +static unsigned int watchdog_cpus; void arch_touch_nmi_watchdog(void) { @@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = { /* Callback function for perf event subsystem */ static void watchdog_overflow_callback(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) + struct perf_sample_data *data, + struct pt_regs *regs) { /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (atomic_read(&watchdog_park_in_progress) != 0) - return; - if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; @@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } -/* - * People like the simple clean cpu node info on boot. - * Reduce the watchdog noise by only printing messages - * that are different from what cpu0 displayed. - */ -static unsigned long firstcpu_err; -static atomic_t watchdog_cpus; - -int watchdog_nmi_enable(unsigned int cpu) +static int hardlockup_detector_event_create(void) { + unsigned int cpu = smp_processor_id(); struct perf_event_attr *wd_attr; - struct perf_event *event = per_cpu(watchdog_ev, cpu); - int firstcpu = 0; - - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - - /* is it already setup and enabled? */ - if (event && event->state > PERF_EVENT_STATE_OFF) - goto out; - - /* it is setup but not enabled */ - if (event != NULL) - goto out_enable; - - if (atomic_inc_return(&watchdog_cpus) == 1) - firstcpu = 1; + struct perf_event *evt; wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); /* Try to register using hardware perf events */ - event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, + watchdog_overflow_callback, NULL); + if (IS_ERR(evt)) { + pr_info("Perf event create on CPU %d failed with %ld\n", cpu, + PTR_ERR(evt)); + return PTR_ERR(evt); + } + this_cpu_write(watchdog_ev, evt); + return 0; +} - /* save the first cpu's error for future comparision */ - if (firstcpu && IS_ERR(event)) - firstcpu_err = PTR_ERR(event); +/** + * hardlockup_detector_perf_enable - Enable the local event + */ +void hardlockup_detector_perf_enable(void) +{ + if (hardlockup_detector_event_create()) + return; - if (!IS_ERR(event)) { - /* only print for the first cpu initialized */ - if (firstcpu || firstcpu_err) - pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); - goto out_save; - } + if (!watchdog_cpus++) + pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); - /* - * Disable the hard lockup detector if _any_ CPU fails to set up - * set up the hardware perf event. The watchdog() function checks - * the NMI_WATCHDOG_ENABLED bit periodically. - * - * The barriers are for syncing up watchdog_enabled across all the - * cpus, as clear_bit() does not use barriers. - */ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); - smp_mb__after_atomic(); - - /* skip displaying the same error again */ - if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) - return PTR_ERR(event); - - /* vary the KERN level based on the returned errno */ - if (PTR_ERR(event) == -EOPNOTSUPP) - pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); - else if (PTR_ERR(event) == -ENOENT) - pr_warn("disabled (cpu%i): hardware events not enabled\n", - cpu); - else - pr_err("disabled (cpu%i): unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); - - pr_info("Shutting down hard lockup detector on all cpus\n"); - - return PTR_ERR(event); - - /* success path */ -out_save: - per_cpu(watchdog_ev, cpu) = event; -out_enable: - perf_event_enable(per_cpu(watchdog_ev, cpu)); -out: - return 0; + perf_event_enable(this_cpu_read(watchdog_ev)); } -void watchdog_nmi_disable(unsigned int cpu) +/** + * hardlockup_detector_perf_disable - Disable the local event + */ +void hardlockup_detector_perf_disable(void) { - struct perf_event *event = per_cpu(watchdog_ev, cpu); + struct perf_event *event = this_cpu_read(watchdog_ev); if (event) { perf_event_disable(event); + cpumask_set_cpu(smp_processor_id(), &dead_events_mask); + watchdog_cpus--; + } +} + +/** + * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them + * + * Called from lockup_detector_cleanup(). Serialized by the caller. + */ +void hardlockup_detector_perf_cleanup(void) +{ + int cpu; + + for_each_cpu(cpu, &dead_events_mask) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + /* + * Required because for_each_cpu() reports unconditionally + * CPU0 as set on UP kernels. Sigh. + */ + if (event) + perf_event_release_kernel(event); per_cpu(watchdog_ev, cpu) = NULL; + } + cpumask_clear(&dead_events_mask); +} + +/** + * hardlockup_detector_perf_stop - Globally stop watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_stop(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_disable(event); + } +} - /* should be in cleanup, but blocks oprofile */ - perf_event_release_kernel(event); +/** + * hardlockup_detector_perf_restart - Globally restart watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_restart(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return; + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_enable(event); + } +} + +/** + * hardlockup_detector_perf_init - Probe whether NMI event is available at all + */ +int __init hardlockup_detector_perf_init(void) +{ + int ret = hardlockup_detector_event_create(); - /* watchdog_nmi_enable() expects this to be zero initially. */ - if (atomic_dec_and_test(&watchdog_cpus)) - firstcpu_err = 0; + if (ret) { + pr_info("Perf NMI watchdog permanently disabled\n"); + } else { + perf_event_release_kernel(this_cpu_read(watchdog_ev)); + this_cpu_write(watchdog_ev, NULL); } + return ret; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2689b7c..dfdad67 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1092,8 +1092,8 @@ config PROVE_LOCKING select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC - select LOCKDEP_CROSSRELEASE - select LOCKDEP_COMPLETIONS + select LOCKDEP_CROSSRELEASE if BROKEN + select LOCKDEP_COMPLETIONS if BROKEN select TRACE_IRQFLAGS default n help @@ -1590,6 +1590,54 @@ config LATENCYTOP source kernel/trace/Kconfig +config PROVIDE_OHCI1394_DMA_INIT + bool "Remote debugging over FireWire early on boot" + depends on PCI && X86 + help + If you want to debug problems which hang or crash the kernel early + on boot and the crashing machine has a FireWire port, you can use + this feature to remotely access the memory of the crashed machine + over FireWire. This employs remote DMA as part of the OHCI1394 + specification which is now the standard for FireWire controllers. + + With remote DMA, you can monitor the printk buffer remotely using + firescope and access all memory below 4GB using fireproxy from gdb. + Even controlling a kernel debugger is possible using remote DMA. + + Usage: + + If ohci1394_dma=early is used as boot parameter, it will initialize + all OHCI1394 controllers which are found in the PCI config space. + + As all changes to the FireWire bus such as enabling and disabling + devices cause a bus reset and thereby disable remote DMA for all + devices, be sure to have the cable plugged and FireWire enabled on + the debugging host before booting the debug target for debugging. + + This code (~1k) is freed after boot. By then, the firewire stack + in charge of the OHCI-1394 controllers should be used instead. + + See Documentation/debugging-via-ohci1394.txt for more information. + +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. + menu "Runtime Testing" config LKDTM @@ -1749,56 +1797,6 @@ config TEST_PARMAN If unsure, say N. -endmenu # runtime tests - -config PROVIDE_OHCI1394_DMA_INIT - bool "Remote debugging over FireWire early on boot" - depends on PCI && X86 - help - If you want to debug problems which hang or crash the kernel early - on boot and the crashing machine has a FireWire port, you can use - this feature to remotely access the memory of the crashed machine - over FireWire. This employs remote DMA as part of the OHCI1394 - specification which is now the standard for FireWire controllers. - - With remote DMA, you can monitor the printk buffer remotely using - firescope and access all memory below 4GB using fireproxy from gdb. - Even controlling a kernel debugger is possible using remote DMA. - - Usage: - - If ohci1394_dma=early is used as boot parameter, it will initialize - all OHCI1394 controllers which are found in the PCI config space. - - As all changes to the FireWire bus such as enabling and disabling - devices cause a bus reset and thereby disable remote DMA for all - devices, be sure to have the cable plugged and FireWire enabled on - the debugging host before booting the debug target for debugging. - - This code (~1k) is freed after boot. By then, the firewire stack - in charge of the OHCI-1394 controllers should be used instead. - - See Documentation/debugging-via-ohci1394.txt for more information. - -config DMA_API_DEBUG - bool "Enable debugging of DMA-API usage" - depends on HAVE_DMA_API_DEBUG - help - Enable this option to debug the use of the DMA API by device drivers. - With this option you will be able to detect common bugs in device - drivers like double-freeing of DMA mappings or freeing mappings that - were never allocated. - - This also attempts to catch cases where a page owned by DMA is - accessed by the cpu in a way that could cause data corruption. For - example, this enables cow_user_page() to check that the source page is - not undergoing DMA. - - This option causes a performance degradation. Use only if you want to - debug device drivers and dma interactions. - - If unsure, say N. - config TEST_LKM tristate "Test module loading with 'hello world' module" default n @@ -1873,18 +1871,6 @@ config TEST_UDELAY If unsure, say N. -config MEMTEST - bool "Memtest" - depends on HAVE_MEMBLOCK - ---help--- - This option adds a kernel parameter 'memtest', which allows memtest - to be set. - memtest=0, mean disabled; -- default - memtest=1, mean do 1 test pattern; - ... - memtest=17, mean do 17 test patterns. - If you are unsure how to answer this question, answer N. - config TEST_STATIC_KEYS tristate "Test static keys" default n @@ -1894,16 +1880,6 @@ config TEST_STATIC_KEYS If unsure, say N. -config BUG_ON_DATA_CORRUPTION - bool "Trigger a BUG when data corruption is detected" - select DEBUG_LIST - help - Select this option if the kernel should BUG when it encounters - data corruption in kernel memory structures when they get checked - for validity. - - If unsure, say N. - config TEST_KMOD tristate "kmod stress tester" default n @@ -1941,6 +1917,29 @@ config TEST_DEBUG_VIRTUAL If unsure, say N. +endmenu # runtime tests + +config MEMTEST + bool "Memtest" + depends on HAVE_MEMBLOCK + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=17, mean do 17 test patterns. + If you are unsure how to answer this question, answer N. + +config BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + select DEBUG_LIST + help + Select this option if the kernel should BUG when it encounters + data corruption in kernel memory structures when they get checked + for validity. + + If unsure, say N. source "samples/Kconfig" diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index cd0b5c9..2b827b8 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -2031,11 +2031,13 @@ void locking_selftest(void) print_testname("mixed read-lock/lock-write ABBA"); pr_cont(" |"); dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK); +#ifdef CONFIG_PROVE_LOCKING /* * Lockdep does indeed fail here, but there's nothing we can do about * that now. Don't kill lockdep for it. */ unexpected_testcase_failures--; +#endif pr_cont(" |"); dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM); @@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, trace_cma_alloc(pfn, page, count, align); - if (ret) { + if (ret && !(gfp_mask & __GFP_NOWARN)) { pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", __func__, count, ret); cma_debug_show_areas(cma); diff --git a/mm/madvise.c b/mm/madvise.c index 25bade3..fd70d6a 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -757,6 +757,9 @@ madvise_behavior_valid(int behavior) * MADV_DONTFORK - omit this area from child's address space when forking: * typically, to avoid COWing pages pinned by get_user_pages(). * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. + * MADV_WIPEONFORK - present the child process with zero-filled memory in this + * range after a fork. + * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK * MADV_HWPOISON - trigger memory error handler as if the given memory range * were corrupted by unrecoverable hardware memory failure. * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. @@ -777,7 +780,9 @@ madvise_behavior_valid(int behavior) * zero - success * -EINVAL - start + len < 0, start is not page-aligned, * "behavior" is not a valid value, or application - * is attempting to release locked or shared pages. + * is attempting to release locked or shared pages, + * or the specified address range includes file, Huge TLB, + * MAP_SHARED or VMPFNMAP range. * -ENOMEM - addresses in the specified range are not currently * mapped, or are outside the AS of the process. * -EIO - an I/O error occurred while paging in data. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 006ba62..a2af6d5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, struct page *page; page = __alloc_pages(gfp, order, nid); - if (page && page_to_nid(page) == nid) - inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); + if (page && page_to_nid(page) == nid) { + preempt_disable(); + __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); + preempt_enable(); + } return page; } diff --git a/mm/migrate.c b/mm/migrate.c index 6954c14..e00814c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start, unsigned long addr; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; + migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; migrate->dst[migrate->npages] = 0; + migrate->npages++; migrate->cpages++; } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 6a03946..53afbb9 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -6,17 +6,6 @@ #include "internal.h" -static inline bool check_pmd(struct page_vma_mapped_walk *pvmw) -{ - pmd_t pmde; - /* - * Make sure we don't re-load pmd between present and !trans_huge check. - * We need a consistent view. - */ - pmde = READ_ONCE(*pvmw->pmd); - return pmd_present(pmde) && !pmd_trans_huge(pmde); -} - static inline bool not_found(struct page_vma_mapped_walk *pvmw) { page_vma_mapped_walk_done(pvmw); @@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) pgd_t *pgd; p4d_t *p4d; pud_t *pud; + pmd_t pmde; /* The only possible pmd mapping has been handled on last iteration */ if (pvmw->pmd && !pvmw->pte) @@ -148,7 +138,13 @@ restart: if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); - if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { + /* + * Make sure the pmd value isn't cached in a register by the + * compiler and used as a stale value after we've observed a + * subsequent update. + */ + pmde = READ_ONCE(*pvmw->pmd); + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); if (likely(pmd_trans_huge(*pvmw->pmd))) { if (pvmw->flags & PVMW_MIGRATION) @@ -167,17 +163,15 @@ restart: return not_found(pvmw); return true; } - } else - WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + } return not_found(pvmw); } else { /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } - } else { - if (!check_pmd(pvmw)) - return false; + } else if (!pmd_present(pmde)) { + return false; } if (!map_pte(pvmw)) goto next_pte; diff --git a/mm/swap_state.c b/mm/swap_state.c index ed91091..05b6803 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES]; static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; bool swap_vma_readahead = true; -#define SWAP_RA_MAX_ORDER_DEFAULT 3 - -static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT; - #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK @@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, pte_t *tpte; #endif + max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), + SWAP_RA_ORDER_CEILING); + if (max_win == 1) { + swap_ra->win = 1; + return NULL; + } + faddr = vmf->address; entry = pte_to_swp_entry(vmf->orig_pte); if ((unlikely(non_swap_entry(entry)))) @@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, if (page) return page; - max_win = 1 << READ_ONCE(swap_ra_max_order); - if (max_win == 1) { - swap_ra->win = 1; - return NULL; - } - fpfn = PFN_DOWN(faddr); swap_ra_info = GET_SWAP_RA_VAL(vma); pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); @@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr = __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, vma_ra_enabled_store); -static ssize_t vma_ra_max_order_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", swap_ra_max_order); -} -static ssize_t vma_ra_max_order_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - int err, v; - - err = kstrtoint(buf, 10, &v); - if (err || v > SWAP_RA_ORDER_CEILING || v <= 0) - return -EINVAL; - - swap_ra_max_order = v; - - return count; -} -static struct kobj_attribute vma_ra_max_order_attr = - __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show, - vma_ra_max_order_store); - static struct attribute *swap_attrs[] = { &vma_ra_enabled_attr.attr, - &vma_ra_max_order_attr.attr, NULL, }; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8a43db6..6739420 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, for (i = 0; i < area->nr_pages; i++) { struct page *page; - if (fatal_signal_pending(current)) { - area->nr_pages = i; - goto fail_no_warn; - } - if (node == NUMA_NO_NODE) page = alloc_page(alloc_mask|highmem_mask); else @@ -1723,7 +1718,6 @@ fail: warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); -fail_no_warn: vfree(area->addr); return NULL; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 2585b10..276b602 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); - return PTR_ERR_OR_ZERO(net->xt.broute_table); + return ebt_register_table(net, &broute_table, NULL, + &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 45a00db..c41da5f 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_filter[] = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); - return PTR_ERR_OR_ZERO(net->xt.frame_filter); + return ebt_register_table(net, &frame_filter, ebt_ops_filter, + &net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 57cd5bb..08df740 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_nat[] = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); - return PTR_ERR_OR_ZERO(net->xt.frame_nat); + return ebt_register_table(net, &frame_nat, ebt_ops_nat, + &net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83951f9..3b3dcf7 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1169,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) kfree(table); } -struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table, - const struct nf_hook_ops *ops) +int ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops, struct ebt_table **res) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1183,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* Don't add one table to multiple lists. */ @@ -1252,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + WRITE_ONCE(*res, table); + if (!ops) - return table; + return 0; ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); - return ERR_PTR(ret); + *res = NULL; } - return table; + return ret; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1276,7 +1277,7 @@ free_newinfo: free_table: kfree(table); out: - return ERR_PTR(ret); + return ret; } void ebt_unregister_table(struct net *net, struct ebt_table *table, diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 416bb30..1859c47 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 811689e..f75fc6b 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (synproxy == NULL) return NF_ACCEPT; - if (nf_is_loopback_packet(skb)) + if (nf_is_loopback_packet(skb) || + ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6fde5..3d9f1c2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2513,7 +2513,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5676237..e45177c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2240,20 +2240,16 @@ int udp_v4_early_demux(struct sk_buff *skb) iph = ip_hdr(skb); uh = udp_hdr(skb); - if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) { + if (skb->pkt_type == PACKET_MULTICAST) { in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; - /* we are supposed to accept bcast packets */ - if (skb->pkt_type == PACKET_MULTICAST) { - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return 0; - } + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 97658bf..e360d55 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96861c7..4a96ebb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3820,8 +3820,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - dev_net(dev)->ipv6.devconf_all->accept_dad < 1 || - idev->cnf.accept_dad < 1 || + (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bump_id = ifp->flags & IFA_F_TENTATIVE; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index cdb3728..4a87f94 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (gso_partial) + if (gso_partial && skb_is_gso(skb)) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a5cd43d..437af8c 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, nexthdr = ipv6_hdr(skb)->nexthdr; thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (thoff < 0) + if (thoff < 0 || nexthdr != IPPROTO_TCP) return NF_ACCEPT; th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 26cc9f4..a96d5b3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1325,7 +1325,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct dst_entry *new = NULL; rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, - DST_OBSOLETE_NONE, 0); + DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e495b5e..cf84f7b 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; - if (from->ref_netlink || to->ref_netlink) + write_lock_bh(&ip_set_ref_lock); + + if (from->ref_netlink || to->ref_netlink) { + write_unlock_bh(&ip_set_ref_lock); return -EBUSY; + } strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); ip_set(inst, from_id) = to; ip_set(inst, to_id) = from; @@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = { static int __init ip_set_init(void) { - int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + int ret = register_pernet_subsys(&ip_set_net_ops); + + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + return ret; + } + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } + ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } - ret = register_pernet_subsys(&ip_set_net_ops); - if (ret) { - pr_err("ip_set: cannot register pernet_subsys.\n"); - nf_unregister_sockopt(&so_set); - nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - return ret; - } + pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2098,9 +2104,10 @@ ip_set_init(void) static void __exit ip_set_fini(void) { - unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + + unregister_pernet_subsys(&ip_set_net_ops); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 20bfbd3..613eb21 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ip &= ip_set_hostmask(h->netmask); + e.ip = htonl(ip); + if (e.ip == 0) + return -IPSET_ERR_HASH_ELEM; - if (adt == IPSET_TEST) { - e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); - } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { @@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - if (retried) + if (retried) { ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip += hosts) { e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + } + for (; ip <= ip_to;) { ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; + ip += hosts; + e.ip = htonl(ip); + if (e.ip == 0) + return 0; + ret = 0; } return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index b64cf14..f3ba834 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index f438740..ddb8039 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6215fb8..a7f4d7a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99..a2f19b9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; @@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip == ntohl(h->next.ip) && p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &cidr); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5d9e8954..1c67a17 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 44cf119..d417074 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index db614e1..7f9ae2e 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ip2 = (retried && ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 54b64b6..e6ef382 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index aff8469..8602f25 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) @@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = (retried && ip == ntohl(h->next.ip[0]) && p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 90d3968..4527921 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -921,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, { struct sk_buff *new_skb = NULL; struct iphdr *old_iph = NULL; + __u8 old_dsfield; #ifdef CONFIG_IP_VS_IPV6 struct ipv6hdr *old_ipv6h = NULL; #endif @@ -945,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, *payload_len = ntohs(old_ipv6h->payload_len) + sizeof(*old_ipv6h); - *dsfield = ipv6_get_dsfield(old_ipv6h); + old_dsfield = ipv6_get_dsfield(old_ipv6h); *ttl = old_ipv6h->hop_limit; if (df) *df = 0; @@ -960,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, /* fix old IP header checksum */ ip_send_check(old_iph); - *dsfield = ipv4_get_dsfield(old_iph); + old_dsfield = ipv4_get_dsfield(old_iph); *ttl = old_iph->ttl; if (payload_len) *payload_len = ntohs(old_iph->tot_len); } + /* Implement full-functionality option for ECN encapsulation */ + *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield); + return skb; error: kfree_skb(skb); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9299271..64e1ee0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1048,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + if (basechain->stats && nft_dump_stats(skb, basechain->stats)) goto nla_put_failure; } @@ -1487,8 +1487,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); - if (IS_ERR(chain2)) - return PTR_ERR(chain2); + if (!IS_ERR(chain2)) + return -EEXIST; } if (nla[NFTA_CHAIN_COUNTERS]) { @@ -2741,8 +2741,10 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; - if (!strcmp(set->name, i->name)) + if (!strcmp(set->name, i->name)) { + kfree(set->name); return -ENFILE; + } } return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c83a3b5..d8571f4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); - strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; user += sizeof(compat_tmp); } else @@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(info, user, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - info->name[sizeof(info->name) - 1] = '\0'; user += sizeof(*info); } + info->name[sizeof(info->name) - 1] = '\0'; size = sizeof(struct xt_counters); size *= info->num_counters; diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 38986a9..2912393 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -8,6 +8,7 @@ */ #include <linux/module.h> +#include <linux/syscalls.h> #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> @@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) return 0; } +static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) +{ + mm_segment_t oldfs = get_fs(); + int retval, fd; + + set_fs(KERNEL_DS); + fd = bpf_obj_get_user(path); + set_fs(oldfs); + if (fd < 0) + return fd; + + retval = __bpf_mt_check_fd(fd, ret); + sys_close(fd); + return retval; +} + static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; @@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); - else if (info->mode == XT_BPF_MODE_FD_PINNED || - info->mode == XT_BPF_MODE_FD_ELF) + else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); + else if (info->mode == XT_BPF_MODE_PATH_PINNED) + return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e75ef39..575d215 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) @@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 94c11cf..f347506 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2266,16 +2266,17 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->min_dump_alloc = control->min_dump_alloc; cb->skb = skb; + if (cb->start) { + ret = cb->start(cb); + if (ret) + goto error_unlock; + } + nlk->cb_running = true; mutex_unlock(nlk->cb_mutex); - ret = 0; - if (cb->start) - ret = cb->start(cb); - - if (!ret) - ret = netlink_dump(sk); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b5de31..c1841f2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2203,7 +2203,7 @@ static void xs_udp_setup_socket(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; + struct socket *sock; int status = -EIO; sock = xs_create_sock(xprt, transport, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 7d99029..a140dd4 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - __skb_queue_head_init(&xmitq); + skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -263,7 +263,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dst, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - __skb_queue_head_init(&_pkts); + skb_queue_head_init(&_pkts); list_for_each_entry_safe(n, tmp, &dests->list, list) { dst = n->value; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 121e59a..17146c1 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -568,6 +568,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); *err = TIPC_OK; + + if (!skb_cloned(skb)) + return true; + + /* Unclone buffer in case it was bundled */ + if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) + return false; + return true; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6908742..d396cb6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; +/* policy for packet pattern attributes */ +static const struct nla_policy +nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { + [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10532,7 +10540,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL, info->extack); + nl80211_packet_pattern_policy, + info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10781,7 +10790,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index acf0010..30e5746 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -91,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + xso->dev = NULL; dev_put(dev); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 2515cd2..8ac9d32 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -429,7 +429,8 @@ resume: nf_reset(skb); if (decaps) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; @@ -440,7 +441,8 @@ resume: err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0dab1cd..1221347 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -732,12 +732,12 @@ restart: } } } +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (cnt) { err = 0; xfrm_policy_cache_flush(); } -out: - spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2bfbd91..b997f13 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -657,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) { x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); goto out; } diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index bc7fcf0..446beb7 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -78,29 +78,37 @@ static int simple_thread_fn(void *arg) } static DEFINE_MUTEX(thread_mutex); +static bool simple_thread_cnt; int foo_bar_reg(void) { + mutex_lock(&thread_mutex); + if (simple_thread_cnt++) + goto out; + pr_info("Starting thread for foo_bar_fn\n"); /* * We shouldn't be able to start a trace when the module is * unloading (there's other locks to prevent that). But * for consistency sake, we still take the thread_mutex. */ - mutex_lock(&thread_mutex); simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); + out: mutex_unlock(&thread_mutex); return 0; } void foo_bar_unreg(void) { - pr_info("Killing thread for foo_bar_fn\n"); - /* protect against module unloading */ mutex_lock(&thread_mutex); + if (--simple_thread_cnt) + goto out; + + pr_info("Killing thread for foo_bar_fn\n"); if (simple_tsk_fn) kthread_stop(simple_tsk_fn); simple_tsk_fn = NULL; + out: mutex_unlock(&thread_mutex); } diff --git a/scripts/faddr2line b/scripts/faddr2line index 29df825..2f6ce80 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -103,11 +103,12 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. + file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} local sym_type=${fields[1]} - local sym_end=0x${fields[3]} + local sym_end=${fields[3]} # calculate the size local sym_size=$(($sym_end - $sym_base)) @@ -157,7 +158,7 @@ __faddr2line() { addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }') + done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 5d55441..9ee9bf7 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -158,7 +158,7 @@ static int read_symbol(FILE *in, struct sym_entry *s) else if (str[0] == '$') return -1; /* exclude debugging symbols */ - else if (stype == 'N') + else if (stype == 'N' || stype == 'n') return -1; /* include the type field in the symbol name, so that it gets diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index fec1dfd..4490a69 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -948,14 +948,13 @@ static const struct file_operations snd_compr_file_ops = { static int snd_compress_dev_register(struct snd_device *device) { int ret = -EINVAL; - char str[16]; struct snd_compr *compr; if (snd_BUG_ON(!device || !device->device_data)) return -EBADFD; compr = device->device_data; - pr_debug("reg %s for device %s, direction %d\n", str, compr->name, + pr_debug("reg device %s, direction %d\n", compr->name, compr->direction); /* register compressed device */ ret = snd_register_device(SNDRV_DEVICE_TYPE_COMPRESS, diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 3a1cc7b..b719d0b 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -547,6 +547,7 @@ struct snd_pcm_mmap_status_x32 { u32 pad2; /* alignment */ struct timespec tstamp; s32 suspended_state; + s32 pad3; struct timespec audio_tstamp; } __packed; diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index ea2d0ae..6c9cba2 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1259,6 +1259,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) struct snd_seq_port_info *info = arg; struct snd_seq_client_port *port; struct snd_seq_port_callback *callback; + int port_idx; /* it is not allowed to create the port for an another client */ if (info->addr.client != client->number) @@ -1269,7 +1270,9 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) return -ENOMEM; if (client->type == USER_CLIENT && info->kernel) { - snd_seq_delete_port(client, port->addr.port); + port_idx = port->addr.port; + snd_seq_port_unlock(port); + snd_seq_delete_port(client, port_idx); return -EINVAL; } if (client->type == KERNEL_CLIENT) { @@ -1290,6 +1293,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) snd_seq_set_port_info(port, info); snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port); + snd_seq_port_unlock(port); return 0; } diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c index 0ff7926..cda64b4 100644 --- a/sound/core/seq/seq_lock.c +++ b/sound/core/seq/seq_lock.c @@ -23,8 +23,6 @@ #include <sound/core.h> #include "seq_lock.h" -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - /* wait until all locks are released */ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) { @@ -41,5 +39,3 @@ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) } } EXPORT_SYMBOL(snd_use_lock_sync_helper); - -#endif diff --git a/sound/core/seq/seq_lock.h b/sound/core/seq/seq_lock.h index 54044bc..ac38031 100644 --- a/sound/core/seq/seq_lock.h +++ b/sound/core/seq/seq_lock.h @@ -3,8 +3,6 @@ #include <linux/sched.h> -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - typedef atomic_t snd_use_lock_t; /* initialize lock */ @@ -20,14 +18,4 @@ typedef atomic_t snd_use_lock_t; void snd_use_lock_sync_helper(snd_use_lock_t *lock, const char *file, int line); #define snd_use_lock_sync(lockp) snd_use_lock_sync_helper(lockp, __BASE_FILE__, __LINE__) -#else /* SMP || CONFIG_SND_DEBUG */ - -typedef spinlock_t snd_use_lock_t; /* dummy */ -#define snd_use_lock_init(lockp) /**/ -#define snd_use_lock_use(lockp) /**/ -#define snd_use_lock_free(lockp) /**/ -#define snd_use_lock_sync(lockp) /**/ - -#endif /* SMP || CONFIG_SND_DEBUG */ - #endif /* __SND_SEQ_LOCK_H */ diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index 0a7020c..d21ece9 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -122,7 +122,9 @@ static void port_subs_info_init(struct snd_seq_port_subs_info *grp) } -/* create a port, port number is returned (-1 on failure) */ +/* create a port, port number is returned (-1 on failure); + * the caller needs to unref the port via snd_seq_port_unlock() appropriately + */ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, int port) { @@ -151,6 +153,7 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, snd_use_lock_init(&new_port->use_lock); port_subs_info_init(&new_port->c_src); port_subs_info_init(&new_port->c_dest); + snd_use_lock_use(&new_port->use_lock); num = port >= 0 ? port : 0; mutex_lock(&client->ports_mutex); @@ -165,9 +168,9 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, list_add_tail(&new_port->list, &p->list); client->num_ports++; new_port->addr.port = num; /* store the port number in the port */ + sprintf(new_port->name, "port-%d", num); write_unlock_irqrestore(&client->ports_lock, flags); mutex_unlock(&client->ports_mutex); - sprintf(new_port->name, "port-%d", num); return new_port; } diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 8d93a40..f48a4cd 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -77,13 +77,17 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, * decode input event and put to read buffer of each opened file */ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, - struct snd_seq_event *ev) + struct snd_seq_event *ev, + bool atomic) { struct snd_virmidi *vmidi; unsigned char msg[4]; int len; - read_lock(&rdev->filelist_lock); + if (atomic) + read_lock(&rdev->filelist_lock); + else + down_read(&rdev->filelist_sem); list_for_each_entry(vmidi, &rdev->filelist, list) { if (!vmidi->trigger) continue; @@ -97,7 +101,10 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, snd_rawmidi_receive(vmidi->substream, msg, len); } } - read_unlock(&rdev->filelist_lock); + if (atomic) + read_unlock(&rdev->filelist_lock); + else + up_read(&rdev->filelist_sem); return 0; } @@ -115,7 +122,7 @@ int snd_virmidi_receive(struct snd_rawmidi *rmidi, struct snd_seq_event *ev) struct snd_virmidi_dev *rdev; rdev = rmidi->private_data; - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, true); } #endif /* 0 */ @@ -130,7 +137,7 @@ static int snd_virmidi_event_input(struct snd_seq_event *ev, int direct, rdev = private_data; if (!(rdev->flags & SNDRV_VIRMIDI_USE)) return 0; /* ignored */ - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, atomic); } /* @@ -209,7 +216,6 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_virmidi *vmidi; - unsigned long flags; vmidi = kzalloc(sizeof(*vmidi), GFP_KERNEL); if (vmidi == NULL) @@ -223,9 +229,11 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) vmidi->client = rdev->client; vmidi->port = rdev->port; runtime->private_data = vmidi; - write_lock_irqsave(&rdev->filelist_lock, flags); + down_write(&rdev->filelist_sem); + write_lock_irq(&rdev->filelist_lock); list_add_tail(&vmidi->list, &rdev->filelist); - write_unlock_irqrestore(&rdev->filelist_lock, flags); + write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); vmidi->rdev = rdev; return 0; } @@ -264,9 +272,11 @@ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; + down_write(&rdev->filelist_sem); write_lock_irq(&rdev->filelist_lock); list_del(&vmidi->list); write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); @@ -520,6 +530,7 @@ int snd_virmidi_new(struct snd_card *card, int device, struct snd_rawmidi **rrmi rdev->rmidi = rmidi; rdev->device = device; rdev->client = -1; + init_rwsem(&rdev->filelist_sem); rwlock_init(&rdev->filelist_lock); INIT_LIST_HEAD(&rdev->filelist); rdev->seq_mode = SNDRV_VIRMIDI_SEQ_DISPATCH; diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c index 6c58e6f..e43af18 100644 --- a/sound/core/vmaster.c +++ b/sound/core/vmaster.c @@ -484,3 +484,34 @@ void snd_ctl_sync_vmaster(struct snd_kcontrol *kcontrol, bool hook_only) master->hook(master->hook_private_data, master->val); } EXPORT_SYMBOL_GPL(snd_ctl_sync_vmaster); + +/** + * snd_ctl_apply_vmaster_slaves - Apply function to each vmaster slave + * @kctl: vmaster kctl element + * @func: function to apply + * @arg: optional function argument + * + * Apply the function @func to each slave kctl of the given vmaster kctl. + * Returns 0 if successful, or a negative error code. + */ +int snd_ctl_apply_vmaster_slaves(struct snd_kcontrol *kctl, + int (*func)(struct snd_kcontrol *, void *), + void *arg) +{ + struct link_master *master; + struct link_slave *slave; + int err; + + master = snd_kcontrol_chip(kctl); + err = master_init(master); + if (err < 0) + return err; + list_for_each_entry(slave, &master->slaves, list) { + err = func(&slave->slave, arg); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_ctl_apply_vmaster_slaves); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 978dc18..f6d2985 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -284,6 +284,11 @@ int snd_hdac_bus_parse_capabilities(struct hdac_bus *bus) dev_dbg(bus->dev, "HDA capability ID: 0x%x\n", (cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF); + if (cur_cap == -1) { + dev_dbg(bus->dev, "Invalid capability reg read\n"); + break; + } + switch ((cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF) { case AZX_ML_CAP_ID: dev_dbg(bus->dev, "Found ML capability\n"); diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 7e3aa50..5badd08 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -103,6 +103,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) void __user *puhr; union hpi_message_buffer_v1 *hm; union hpi_response_buffer_v1 *hr; + u16 msg_size; u16 res_max_size; u32 uncopied_bytes; int err = 0; @@ -127,22 +128,25 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* Now read the message size and data from user space. */ - if (get_user(hm->h.size, (u16 __user *)puhm)) { + if (get_user(msg_size, (u16 __user *)puhm)) { err = -EFAULT; goto out; } - if (hm->h.size > sizeof(*hm)) - hm->h.size = sizeof(*hm); + if (msg_size > sizeof(*hm)) + msg_size = sizeof(*hm); /* printk(KERN_INFO "message size %d\n", hm->h.wSize); */ - uncopied_bytes = copy_from_user(hm, puhm, hm->h.size); + uncopied_bytes = copy_from_user(hm, puhm, msg_size); if (uncopied_bytes) { HPI_DEBUG_LOG(ERROR, "uncopied bytes %d\n", uncopied_bytes); err = -EFAULT; goto out; } + /* Override h.size in case it is changed between two userspace fetches */ + hm->h.size = msg_size; + if (get_user(res_max_size, (u16 __user *)puhr)) { err = -EFAULT; goto out; diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 7326695..d68f99e 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1272,11 +1272,11 @@ static int snd_echo_mixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_busses_in(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1344,11 +1344,11 @@ static int snd_echo_vmixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_pipes_out(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1728,6 +1728,7 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 96; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = 0; #ifdef ECHOCARD_HAS_VMIXER @@ -1737,7 +1738,6 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, #endif uinfo->dimen.d[1] = 16; /* 16 channels */ uinfo->dimen.d[2] = 2; /* 0=level, 1=peak */ - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1] * uinfo->dimen.d[2]; return 0; } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3db26c4..a0989d2 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1803,36 +1803,6 @@ static int check_slave_present(struct hda_codec *codec, return 1; } -/* guess the value corresponding to 0dB */ -static int get_kctl_0dB_offset(struct hda_codec *codec, - struct snd_kcontrol *kctl, int *step_to_check) -{ - int _tlv[4]; - const int *tlv = NULL; - int val = -1; - - if ((kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) && - kctl->tlv.c == snd_hda_mixer_amp_tlv) { - get_ctl_amp_tlv(kctl, _tlv); - tlv = _tlv; - } else if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) - tlv = kctl->tlv.p; - if (tlv && tlv[0] == SNDRV_CTL_TLVT_DB_SCALE) { - int step = tlv[3]; - step &= ~TLV_DB_SCALE_MUTE; - if (!step) - return -1; - if (*step_to_check && *step_to_check != step) { - codec_err(codec, "Mismatching dB step for vmaster slave (%d!=%d)\n", -- *step_to_check, step); - return -1; - } - *step_to_check = step; - val = -tlv[2] / step; - } - return val; -} - /* call kctl->put with the given value(s) */ static int put_kctl_with_value(struct snd_kcontrol *kctl, int val) { @@ -1847,19 +1817,58 @@ static int put_kctl_with_value(struct snd_kcontrol *kctl, int val) return 0; } -/* initialize the slave volume with 0dB */ -static int init_slave_0dB(struct hda_codec *codec, - void *data, struct snd_kcontrol *slave) +struct slave_init_arg { + struct hda_codec *codec; + int step; +}; + +/* initialize the slave volume with 0dB via snd_ctl_apply_vmaster_slaves() */ +static int init_slave_0dB(struct snd_kcontrol *kctl, void *_arg) { - int offset = get_kctl_0dB_offset(codec, slave, data); - if (offset > 0) - put_kctl_with_value(slave, offset); + struct slave_init_arg *arg = _arg; + int _tlv[4]; + const int *tlv = NULL; + int step; + int val; + + if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + if (kctl->tlv.c != snd_hda_mixer_amp_tlv) { + codec_err(arg->codec, + "Unexpected TLV callback for slave %s:%d\n", + kctl->id.name, kctl->id.index); + return 0; /* ignore */ + } + get_ctl_amp_tlv(kctl, _tlv); + tlv = _tlv; + } else if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) + tlv = kctl->tlv.p; + + if (!tlv || tlv[0] != SNDRV_CTL_TLVT_DB_SCALE) + return 0; + + step = tlv[3]; + step &= ~TLV_DB_SCALE_MUTE; + if (!step) + return 0; + if (arg->step && arg->step != step) { + codec_err(arg->codec, + "Mismatching dB step for vmaster slave (%d!=%d)\n", + arg->step, step); + return 0; + } + + arg->step = step; + val = -tlv[2] / step; + if (val > 0) { + put_kctl_with_value(kctl, val); + return val; + } + return 0; } -/* unmute the slave */ -static int init_slave_unmute(struct hda_codec *codec, - void *data, struct snd_kcontrol *slave) +/* unmute the slave via snd_ctl_apply_vmaster_slaves() */ +static int init_slave_unmute(struct snd_kcontrol *slave, void *_arg) { return put_kctl_with_value(slave, 1); } @@ -1919,9 +1928,13 @@ int __snd_hda_add_vmaster(struct hda_codec *codec, char *name, /* init with master mute & zero volume */ put_kctl_with_value(kctl, 0); if (init_slave_vol) { - int step = 0; - map_slaves(codec, slaves, suffix, - tlv ? init_slave_0dB : init_slave_unmute, &step); + struct slave_init_arg arg = { + .codec = codec, + .step = 0, + }; + snd_ctl_apply_vmaster_slaves(kctl, + tlv ? init_slave_0dB : init_slave_unmute, + &arg); } if (ctl_ret) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 2b64fab..c19c81d 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -906,6 +906,7 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, hda_nid_t pin_nid, u32 stream_tag, int format) { struct hdmi_spec *spec = codec->spec; + unsigned int param; int err; err = spec->ops.pin_hbr_setup(codec, pin_nid, is_hbr_format(format)); @@ -915,6 +916,26 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, return err; } + if (is_haswell_plus(codec)) { + + /* + * on recent platforms IEC Coding Type is required for HBR + * support, read current Digital Converter settings and set + * ICT bitfield if needed. + */ + param = snd_hda_codec_read(codec, cvt_nid, 0, + AC_VERB_GET_DIGI_CONVERT_1, 0); + + param = (param >> 16) & ~(AC_DIG3_ICT); + + /* on recent platforms ICT mode is required for HBR support */ + if (is_hbr_format(format)) + param |= 0x1; + + snd_hda_codec_write(codec, cvt_nid, 0, + AC_VERB_SET_DIGI_CONVERT_3, param); + } + snd_hda_codec_setup_stream(codec, cvt_nid, stream_tag, 0, format); return 0; } diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 0fb6b1b7..d8409d9 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -469,10 +469,12 @@ static int init_card(struct snd_usb_caiaqdev *cdev) err = snd_usb_caiaq_send_command(cdev, EP1_CMD_GET_DEVICE_INFO, NULL, 0); if (err) - return err; + goto err_kill_urb; - if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) - return -ENODEV; + if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) { + err = -ENODEV; + goto err_kill_urb; + } usb_string(usb_dev, usb_dev->descriptor.iManufacturer, cdev->vendor_name, CAIAQ_USB_STR_LEN); @@ -507,6 +509,10 @@ static int init_card(struct snd_usb_caiaqdev *cdev) setup_card(cdev); return 0; + + err_kill_urb: + usb_kill_urb(&cdev->ep1_in_urb); + return err; } static int snd_probe(struct usb_interface *intf, diff --git a/sound/usb/card.c b/sound/usb/card.c index 3dc36d9..23d1d23 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -221,6 +221,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) struct usb_interface_descriptor *altsd; void *control_header; int i, protocol; + int rest_bytes; /* find audiocontrol interface */ host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0]; @@ -235,6 +236,15 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) return -EINVAL; } + rest_bytes = (void *)(host_iface->extra + host_iface->extralen) - + control_header; + + /* just to be sure -- this shouldn't hit at all */ + if (rest_bytes <= 0) { + dev_err(&dev->dev, "invalid control header\n"); + return -EINVAL; + } + switch (protocol) { default: dev_warn(&dev->dev, @@ -245,11 +255,21 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1 = control_header; + if (rest_bytes < sizeof(*h1)) { + dev_err(&dev->dev, "too short v1 buffer descriptor\n"); + return -EINVAL; + } + if (!h1->bInCollection) { dev_info(&dev->dev, "skipping empty audio interface (v1)\n"); return -EINVAL; } + if (rest_bytes < h1->bLength) { + dev_err(&dev->dev, "invalid buffer length (v1)\n"); + return -EINVAL; + } + if (h1->bLength < sizeof(*h1) + h1->bInCollection) { dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n"); return -EINVAL; diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 0ff5a7d..c8f723c 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -779,9 +779,10 @@ int line6_probe(struct usb_interface *interface, return 0; error: - if (line6->disconnect) - line6->disconnect(line6); - snd_card_free(card); + /* we can call disconnect callback here because no close-sync is + * needed yet at this point + */ + line6_disconnect(interface); return ret; } EXPORT_SYMBOL_GPL(line6_probe); diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 956f847..451007c 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -301,7 +301,8 @@ static void podhd_disconnect(struct usb_line6 *line6) intf = usb_ifnum_to_if(line6->usbdev, pod->line6.properties->ctrl_if); - usb_driver_release_interface(&podhd_driver, intf); + if (intf) + usb_driver_release_interface(&podhd_driver, intf); } } @@ -317,6 +318,9 @@ static int podhd_init(struct usb_line6 *line6, line6->disconnect = podhd_disconnect; + init_timer(&pod->startup_timer); + INIT_WORK(&pod->startup_work, podhd_startup_workqueue); + if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL) { /* claim the data interface */ intf = usb_ifnum_to_if(line6->usbdev, @@ -358,8 +362,6 @@ static int podhd_init(struct usb_line6 *line6, } /* init device and delay registering */ - init_timer(&pod->startup_timer); - INIT_WORK(&pod->startup_work, podhd_startup_workqueue); podhd_startup(pod); return 0; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9732edf..91bc8f1 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2234,6 +2234,9 @@ static int parse_audio_unit(struct mixer_build *state, int unitid) static void snd_usb_mixer_free(struct usb_mixer_interface *mixer) { + /* kill pending URBs */ + snd_usb_mixer_disconnect(mixer); + kfree(mixer->id_elems); if (mixer->urb) { kfree(mixer->urb->transfer_buffer); @@ -2584,8 +2587,13 @@ _error: void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer) { - usb_kill_urb(mixer->urb); - usb_kill_urb(mixer->rc_urb); + if (mixer->disconnected) + return; + if (mixer->urb) + usb_kill_urb(mixer->urb); + if (mixer->rc_urb) + usb_kill_urb(mixer->rc_urb); + mixer->disconnected = true; } #ifdef CONFIG_PM diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 2b4b067..545d99b 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -22,6 +22,8 @@ struct usb_mixer_interface { struct urb *rc_urb; struct usb_ctrlrequest *rc_setup_packet; u8 rc_buffer[6]; + + bool disconnected; }; #define MAX_CHANNELS 16 /* max logical channels */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 9135520..4f5f18f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1137,6 +1137,9 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */ case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ + case USB_ID(0x047F, 0xC022): /* Plantronics C310 */ + case USB_ID(0x047F, 0xC02F): /* Plantronics P610 */ + case USB_ID(0x047F, 0xC036): /* Plantronics C520-M */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ @@ -1351,6 +1354,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */ + case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index 4dab490..e229abd 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -191,7 +191,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, } pg = get_order(read_size); - sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->s) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); goto out; @@ -211,7 +212,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, pg = get_order(write_size); sk->write_page = - (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->write_page) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); usb_stream_free(sk); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dafba2..bd9c6b3 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -92,7 +92,6 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -303,9 +302,6 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { - if (!do_migrate) - return 0; - CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -5007,7 +5003,6 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, - {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -5019,7 +5014,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5062,9 +5057,6 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; - case 'm': - do_migrate = 1; - break; case 'o': outf = fopen_or_die(optarg, "w"); break; diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 0f5e347..152823b 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk override define RUN_TESTS - $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" + @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef override define EMIT_TESTS diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c index 00f2866..dd4162f 100644 --- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c @@ -341,7 +341,7 @@ int main(int argc, char **argv) return 0; case 'n': t = atoi(optarg); - if (t > ARRAY_SIZE(test_cases)) + if (t >= ARRAY_SIZE(test_cases)) error(1, 0, "Invalid test case: %d", t); all_tests = false; test_cases[t].enabled = true; diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index a2c53a3..de2f9ec 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -397,7 +397,7 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, } } -static int copy_page(int ufd, unsigned long offset) +static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; @@ -418,7 +418,7 @@ static int copy_page(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", uffdio_copy.copy), exit(1); } else { - if (test_uffdio_copy_eexist) { + if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; retry_copy_page(ufd, &uffdio_copy, offset); } @@ -427,6 +427,16 @@ static int copy_page(int ufd, unsigned long offset) return 0; } +static int copy_page_retry(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, true); +} + +static int copy_page(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, false); +} + static void *uffd_poll_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -544,7 +554,7 @@ static void *background_thread(void *arg) for (page_nr = cpu * nr_pages_per_cpu; page_nr < (cpu+1) * nr_pages_per_cpu; page_nr++) - copy_page(uffd, page_nr * page_size); + copy_page_retry(uffd, page_nr * page_size); return NULL; } @@ -779,7 +789,7 @@ static void retry_uffdio_zeropage(int ufd, } } -static int uffdio_zeropage(int ufd, unsigned long offset) +static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) { struct uffdio_zeropage uffdio_zeropage; int ret; @@ -814,7 +824,7 @@ static int uffdio_zeropage(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", uffdio_zeropage.zeropage), exit(1); } else { - if (test_uffdio_zeropage_eexist) { + if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; retry_uffdio_zeropage(ufd, &uffdio_zeropage, offset); @@ -830,6 +840,11 @@ static int uffdio_zeropage(int ufd, unsigned long offset) return 0; } +static int uffdio_zeropage(int ufd, unsigned long offset) +{ + return __uffdio_zeropage(ufd, offset, false); +} + /* exercise UFFDIO_ZEROPAGE */ static int userfaultfd_zeropage_test(void) { diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 97f187e..0a74a20 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -20,7 +20,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) |