summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/io.h14
-rw-r--r--arch/alpha/include/uapi/asm/mman.h1
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/bugs.c45
-rw-r--r--arch/alpha/kernel/entry.S1
-rw-r--r--arch/alpha/kernel/pci-noop.c15
-rw-r--r--arch/alpha/kernel/pci.c5
-rw-r--r--arch/alpha/kernel/rtc.c101
-rw-r--r--arch/arc/kernel/troubleshoot.c1
-rw-r--r--arch/arc/mm/cache.c2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/compressed/decompress.c5
-rw-r--r--arch/arm/boot/compressed/misc.c16
-rw-r--r--arch/arm/boot/compressed/misc.h10
-rw-r--r--arch/arm/boot/dts/ls1021a.dtsi3
-rw-r--r--arch/arm/include/asm/cacheflush.h6
-rw-r--r--arch/arm/include/asm/kvm_asm.h5
-rw-r--r--arch/arm/include/asm/kvm_emulate.h21
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm/include/asm/memory.h6
-rw-r--r--arch/arm/include/uapi/asm/kvm.h9
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S166
-rw-r--r--arch/arm/kernel/vmlinux.lds.S172
-rw-r--r--arch/arm/kernel/vmlinux.lds.h135
-rw-r--r--arch/arm/kvm/coproc.c61
-rw-r--r--arch/arm/kvm/emulate.c4
-rw-r--r--arch/arm/kvm/hyp/Makefile1
-rw-r--r--arch/arm/kvm/hyp/switch.c16
-rw-r--r--arch/arm/mach-sa1100/Kconfig4
-rw-r--r--arch/arm/mach-sa1100/assabet.c39
-rw-r--r--arch/arm/mach-sa1100/cerf.c18
-rw-r--r--arch/arm/mach-sa1100/clock.c2
-rw-r--r--arch/arm/mach-sa1100/generic.c44
-rw-r--r--arch/arm/mach-sa1100/generic.h8
-rw-r--r--arch/arm/mach-sa1100/h3xxx.c17
-rw-r--r--arch/arm/mach-sa1100/include/mach/assabet.h6
-rw-r--r--arch/arm/mach-sa1100/nanoengine.c23
-rw-r--r--arch/arm/mach-sa1100/shannon.c38
-rw-r--r--arch/arm/mach-sa1100/simpad.c11
-rw-r--r--arch/arm/mm/copypage-v4mc.c2
-rw-r--r--arch/arm/mm/copypage-v6.c2
-rw-r--r--arch/arm/mm/copypage-xscale.c2
-rw-r--r--arch/arm/mm/dma-mapping.c16
-rw-r--r--arch/arm/mm/fault-armv.c2
-rw-r--r--arch/arm/mm/flush.c6
-rw-r--r--arch/arm/mm/init.c11
-rw-r--r--arch/arm/mm/mmap.c14
-rw-r--r--arch/arm/mm/proc-v7.S11
-rw-r--r--arch/arm64/Kconfig16
-rw-r--r--arch/arm64/include/asm/alternative.h41
-rw-r--r--arch/arm64/include/asm/cacheflush.h6
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/insn.h16
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_asm.h19
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h78
-rw-r--r--arch/arm64/include/asm/kvm_host.h53
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h29
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h165
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/mmu.h8
-rw-r--r--arch/arm64/include/asm/sysreg.h6
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c43
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/bpi.S67
-rw-r--r--arch/arm64/kernel/cpu_errata.c25
-rw-r--r--arch/arm64/kernel/cpufeature.c19
-rw-r--r--arch/arm64/kernel/head.S7
-rw-r--r--arch/arm64/kernel/insn.c190
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c29
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c88
-rw-r--r--arch/arm64/kvm/hyp/entry.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S86
-rw-r--r--arch/arm64/kvm/hyp/switch.c382
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c172
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c78
-rw-r--r--arch/arm64/kvm/inject_fault.c24
-rw-r--r--arch/arm64/kvm/regmap.c67
-rw-r--r--arch/arm64/kvm/sys_regs.c199
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c4
-rw-r--r--arch/arm64/kvm/va_layout.c227
-rw-r--r--arch/arm64/mm/mmap.c14
-rw-r--r--arch/c6x/Makefile1
-rw-r--r--arch/c6x/kernel/asm-offsets.c1
-rw-r--r--arch/c6x/platforms/plldata.c1
-rw-r--r--arch/ia64/pci/pci.c4
-rw-r--r--arch/m68k/coldfire/device.c12
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/mips/Makefile64
-rw-r--r--arch/mips/alchemy/board-gpr.c2
-rw-r--r--arch/mips/alchemy/board-mtx1.c2
-rw-r--r--arch/mips/ar7/platform.c14
-rw-r--r--arch/mips/bcm47xx/buttons.c2
-rw-r--r--arch/mips/bcm47xx/leds.c21
-rw-r--r--arch/mips/boot/compressed/decompress.c9
-rw-r--r--arch/mips/boot/dts/Makefile1
-rw-r--r--arch/mips/boot/dts/brcm/bcm7125.dtsi7
-rw-r--r--arch/mips/boot/dts/brcm/bcm7346.dtsi62
-rw-r--r--arch/mips/boot/dts/brcm/bcm7358.dtsi17
-rw-r--r--arch/mips/boot/dts/brcm/bcm7360.dtsi62
-rw-r--r--arch/mips/boot/dts/brcm/bcm7362.dtsi62
-rw-r--r--arch/mips/boot/dts/brcm/bcm7420.dtsi7
-rw-r--r--arch/mips/boot/dts/brcm/bcm7425.dtsi89
-rw-r--r--arch/mips/boot/dts/brcm/bcm7435.dtsi89
-rw-r--r--arch/mips/boot/dts/brcm/bcm97125cbmb.dts4
-rw-r--r--arch/mips/boot/dts/brcm/bcm97346dbsmb.dts8
-rw-r--r--arch/mips/boot/dts/brcm/bcm97358svmb.dts8
-rw-r--r--arch/mips/boot/dts/brcm/bcm97360svmb.dts8
-rw-r--r--arch/mips/boot/dts/brcm/bcm97362svmb.dts8
-rw-r--r--arch/mips/boot/dts/brcm/bcm97420c.dts4
-rw-r--r--arch/mips/boot/dts/brcm/bcm97425svmb.dts8
-rw-r--r--arch/mips/boot/dts/brcm/bcm97435svmb.dts8
-rw-r--r--arch/mips/boot/dts/img/boston.dts2
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts8
-rw-r--r--arch/mips/boot/dts/mscc/Makefile3
-rw-r--r--arch/mips/boot/dts/mscc/ocelot.dtsi117
-rw-r--r--arch/mips/boot/dts/mscc/ocelot_pcb123.dts27
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c10
-rw-r--r--arch/mips/configs/bmips_stb_defconfig1
-rw-r--r--arch/mips/configs/generic/32r6.config2
-rw-r--r--arch/mips/configs/generic/64r6.config2
-rw-r--r--arch/mips/configs/generic/board-ocelot.config35
-rw-r--r--arch/mips/crypto/Makefile6
-rw-r--r--arch/mips/crypto/crc32-mips.c348
-rw-r--r--arch/mips/generic/Kconfig16
-rw-r--r--arch/mips/generic/Makefile1
-rw-r--r--arch/mips/generic/board-ocelot.c78
-rw-r--r--arch/mips/include/asm/cpu-features.h5
-rw-r--r--arch/mips/include/asm/isa-rev.h24
-rw-r--r--arch/mips/include/asm/kvm_para.h5
-rw-r--r--arch/mips/include/asm/mach-ath79/ar71xx_regs.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h1
-rw-r--r--arch/mips/include/uapi/asm/hwcap.h1
-rw-r--r--arch/mips/include/uapi/asm/mman.h1
-rw-r--r--arch/mips/kernel/cpu-probe.c3
-rw-r--r--arch/mips/kernel/pm-cps.c31
-rw-r--r--arch/mips/kernel/reset.c68
-rw-r--r--arch/mips/kernel/setup.c5
-rw-r--r--arch/mips/mm/cache.c2
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/mmap.c14
-rw-r--r--arch/mips/net/bpf_jit_asm.S9
-rw-r--r--arch/mips/pci/pci-mt7620.c1
-rw-r--r--arch/mips/txx9/rbtx4927/setup.c2
-rw-r--r--arch/mips/vdso/elf.S10
-rw-r--r--arch/nds32/include/asm/cacheflush.h4
-rw-r--r--arch/nios2/include/asm/cacheflush.h6
-rw-r--r--arch/nios2/kernel/time.c4
-rw-r--r--arch/nios2/mm/cacheflush.c4
-rw-r--r--arch/parisc/include/asm/cacheflush.h6
-rw-r--r--arch/parisc/include/uapi/asm/mman.h1
-rw-r--r--arch/parisc/kernel/cache.c5
-rw-r--r--arch/parisc/kernel/sys_parisc.c16
-rw-r--r--arch/parisc/kernel/time.c2
-rw-r--r--arch/powerpc/Makefile14
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/boot/dts/adder875-redboot.dts2
-rw-r--r--arch/powerpc/boot/dts/adder875-uboot.dts2
-rw-r--r--arch/powerpc/boot/dts/akebono.dts2
-rw-r--r--arch/powerpc/boot/dts/amigaone.dts2
-rw-r--r--arch/powerpc/boot/dts/asp834x-redboot.dts2
-rw-r--r--arch/powerpc/boot/dts/bamboo.dts2
-rw-r--r--arch/powerpc/boot/dts/c2k.dts2
-rw-r--r--arch/powerpc/boot/dts/currituck.dts2
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts2
-rw-r--r--arch/powerpc/boot/dts/ebony.dts2
-rw-r--r--arch/powerpc/boot/dts/eiger.dts2
-rw-r--r--arch/powerpc/boot/dts/ep405.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mvme7100.dts2
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts2
-rw-r--r--arch/powerpc/boot/dts/holly.dts2
-rw-r--r--arch/powerpc/boot/dts/hotfoot.dts2
-rw-r--r--arch/powerpc/boot/dts/icon.dts2
-rw-r--r--arch/powerpc/boot/dts/iss4xx-mpic.dts2
-rw-r--r--arch/powerpc/boot/dts/iss4xx.dts2
-rw-r--r--arch/powerpc/boot/dts/katmai.dts2
-rw-r--r--arch/powerpc/boot/dts/klondike.dts2
-rw-r--r--arch/powerpc/boot/dts/ksi8560.dts2
-rw-r--r--arch/powerpc/boot/dts/media5200.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8272ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc866ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc885ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mvme5100.dts2
-rw-r--r--arch/powerpc/boot/dts/obs600.dts2
-rw-r--r--arch/powerpc/boot/dts/pq2fads.dts2
-rw-r--r--arch/powerpc/boot/dts/rainier.dts2
-rw-r--r--arch/powerpc/boot/dts/redwood.dts2
-rw-r--r--arch/powerpc/boot/dts/sam440ep.dts2
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts2
-rw-r--r--arch/powerpc/boot/dts/storcenter.dts2
-rw-r--r--arch/powerpc/boot/dts/taishan.dts2
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml507.dts2
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml510.dts2
-rw-r--r--arch/powerpc/boot/dts/walnut.dts2
-rw-r--r--arch/powerpc/boot/dts/warp.dts2
-rw-r--r--arch/powerpc/boot/dts/wii.dts21
-rw-r--r--arch/powerpc/boot/dts/xpedite5200_xmon.dts2
-rw-r--r--arch/powerpc/boot/dts/yosemite.dts2
-rw-r--r--arch/powerpc/boot/libfdt_env.h2
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h15
-rw-r--r--arch/powerpc/include/asm/barrier.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h14
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h25
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h54
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h19
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h27
-rw-r--r--arch/powerpc/include/asm/cacheflush.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h263
-rw-r--r--arch/powerpc/include/asm/debug.h1
-rw-r--r--arch/powerpc/include/asm/eeh.h6
-rw-r--r--arch/powerpc/include/asm/eeh_event.h3
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h22
-rw-r--r--arch/powerpc/include/asm/hugetlb.h14
-rw-r--r--arch/powerpc/include/asm/hvcall.h4
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h5
-rw-r--r--arch/powerpc/include/asm/io.h4
-rw-r--r--arch/powerpc/include/asm/irq.h1
-rw-r--r--arch/powerpc/include/asm/irq_work.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h43
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_para.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h9
-rw-r--r--arch/powerpc/include/asm/lppaca.h29
-rw-r--r--arch/powerpc/include/asm/mmu-8xx.h21
-rw-r--r--arch/powerpc/include/asm/mmu.h6
-rw-r--r--arch/powerpc/include/asm/mmu_context.h39
-rw-r--r--arch/powerpc/include/asm/nohash/32/slice.h18
-rw-r--r--arch/powerpc/include/asm/nohash/64/slice.h12
-rw-r--r--arch/powerpc/include/asm/opal-api.h4
-rw-r--r--arch/powerpc/include/asm/opal.h4
-rw-r--r--arch/powerpc/include/asm/paca.h27
-rw-r--r--arch/powerpc/include/asm/page.h11
-rw-r--r--arch/powerpc/include/asm/page_64.h59
-rw-r--r--arch/powerpc/include/asm/pci.h9
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h24
-rw-r--r--arch/powerpc/include/asm/pmc.h13
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h6
-rw-r--r--arch/powerpc/include/asm/powernv.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h10
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h11
-rw-r--r--arch/powerpc/include/asm/processor.h16
-rw-r--r--arch/powerpc/include/asm/reg.h7
-rw-r--r--arch/powerpc/include/asm/security_features.h74
-rw-r--r--arch/powerpc/include/asm/setup.h3
-rw-r--r--arch/powerpc/include/asm/slice.h40
-rw-r--r--arch/powerpc/include/asm/smp.h5
-rw-r--r--arch/powerpc/include/asm/sparsemem.h2
-rw-r--r--arch/powerpc/include/asm/spinlock.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/include/asm/synch.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h1
-rw-r--r--arch/powerpc/include/asm/time.h4
-rw-r--r--arch/powerpc/include/asm/uaccess.h10
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cputable.c59
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c36
-rw-r--r--arch/powerpc/kernel/eeh.c19
-rw-r--r--arch/powerpc/kernel/eeh_cache.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c205
-rw-r--r--arch/powerpc/kernel/eeh_event.c6
-rw-r--r--arch/powerpc/kernel/entry_64.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S86
-rw-r--r--arch/powerpc/kernel/head_64.S19
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S50
-rw-r--r--arch/powerpc/kernel/iomap.c40
-rw-r--r--arch/powerpc/kernel/kprobes.c30
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c37
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c2
-rw-r--r--arch/powerpc/kernel/misc_64.S38
-rw-r--r--arch/powerpc/kernel/nvram_64.c9
-rw-r--r--arch/powerpc/kernel/paca.c242
-rw-r--r--arch/powerpc/kernel/pci-common.c106
-rw-r--r--arch/powerpc/kernel/process.c26
-rw-r--r--arch/powerpc/kernel/prom.c19
-rw-r--r--arch/powerpc/kernel/prom_init.c29
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh2
-rw-r--r--arch/powerpc/kernel/ptrace.c16
-rw-r--r--arch/powerpc/kernel/security.c88
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup.h9
-rw-r--r--arch/powerpc/kernel/setup_32.c8
-rw-r--r--arch/powerpc/kernel/setup_64.c113
-rw-r--r--arch/powerpc/kernel/signal.h5
-rw-r--r--arch/powerpc/kernel/signal_32.c4
-rw-r--r--arch/powerpc/kernel/smp.c23
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kernel/traps.c31
-rw-r--r--arch/powerpc/kernel/vdso.c12
-rw-r--r--arch/powerpc/kvm/Makefile7
-rw-r--r--arch/powerpc/kvm/book3s.c6
-rw-r--r--arch/powerpc/kvm/book3s.h1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c333
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c56
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S187
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c216
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c109
-rw-r--r--arch/powerpc/kvm/book3s_pr.c10
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/emulate.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/kvm/trace_pr.h15
-rw-r--r--arch/powerpc/lib/Makefile6
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copypage_power7.S3
-rw-r--r--arch/powerpc/lib/copyuser_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_power7.S3
-rw-r--r--arch/powerpc/lib/feature-fixups.c9
-rw-r--r--arch/powerpc/lib/memcpy_64.S2
-rw-r--r--arch/powerpc/lib/memcpy_power7.S3
-rw-r--r--arch/powerpc/lib/sstep.c4
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c28
-rw-r--r--arch/powerpc/mm/hash_native_64.c15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c34
-rw-r--r--arch/powerpc/mm/hugetlbpage.c31
-rw-r--r--arch/powerpc/mm/init_32.c7
-rw-r--r--arch/powerpc/mm/init_64.c8
-rw-r--r--arch/powerpc/mm/mem.c25
-rw-r--r--arch/powerpc/mm/mmap.c28
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c24
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c5
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c15
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/numa.c36
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c8
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c6
-rw-r--r--arch/powerpc/mm/pgtable-radix.c218
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c5
-rw-r--r--arch/powerpc/mm/pkeys.c17
-rw-r--r--arch/powerpc/mm/slb.c108
-rw-r--r--arch/powerpc/mm/slb_low.S19
-rw-r--r--arch/powerpc/mm/slice.c485
-rw-r--r--arch/powerpc/mm/tlb-radix.c14
-rw-r--r--arch/powerpc/mm/tlb_hash64.c2
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c2
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c4
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/core-book3s.c50
-rw-r--r--arch/powerpc/perf/power4-pmu.c622
-rw-r--r--arch/powerpc/perf/power9-events-list.h28
-rw-r--r--arch/powerpc/perf/power9-pmu.c48
-rw-r--r--arch/powerpc/platforms/4xx/msi.c5
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c2
-rw-r--r--arch/powerpc/platforms/85xx/smp.c8
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c8
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype20
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c2
-rw-r--r--arch/powerpc/platforms/cell/smp.c4
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c23
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c2
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c9
-rw-r--r--arch/powerpc/platforms/powernv/idle.c88
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c261
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c32
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c10
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-cxl.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.c135
-rw-r--r--arch/powerpc/platforms/powernv/setup.c114
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c19
-rw-r--r--arch/powerpc/platforms/powernv/vas-trace.h113
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c9
-rw-r--r--arch/powerpc/platforms/powernv/vas.c6
-rw-r--r--arch/powerpc/platforms/ps3/mm.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c7
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c3
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c85
-rw-r--r--arch/powerpc/platforms/pseries/smp.c6
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c1
-rw-r--r--arch/powerpc/sysdev/mpic.c2
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c1
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/xmon/xmon.c56
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/Makefile8
-rw-r--r--arch/s390/boot/compressed/Makefile16
-rw-r--r--arch/s390/boot/compressed/head.S6
-rw-r--r--arch/s390/boot/compressed/misc.c10
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S1
-rw-r--r--arch/s390/crypto/aes_s390.c5
-rw-r--r--arch/s390/include/asm/alternative-asm.h108
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/chpid.h2
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/cpu_mf.h4
-rw-r--r--arch/s390/include/asm/css_chars.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h28
-rw-r--r--arch/s390/include/asm/kvm_para.h5
-rw-r--r--arch/s390/include/asm/mmu.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/nospec-branch.h6
-rw-r--r--arch/s390/include/asm/pgalloc.h3
-rw-r--r--arch/s390/include/asm/scsw.h4
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/uapi/asm/dasd.h38
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/alternative.c24
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.S96
-rw-r--r--arch/s390/kernel/module.c11
-rw-r--r--arch/s390/kernel/nmi.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c120
-rw-r--r--arch/s390/kernel/setup.c22
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/suspend.c4
-rw-r--r--arch/s390/kvm/gaccess.c9
-rw-r--r--arch/s390/kvm/intercept.c17
-rw-r--r--arch/s390/kvm/interrupt.c26
-rw-r--r--arch/s390/kvm/kvm-s390.c102
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/mm/dump_pagetables.c4
-rw-r--r--arch/s390/mm/mmap.c15
-rw-r--r--arch/s390/mm/pgalloc.c293
-rw-r--r--arch/s390/tools/gen_facilities.c20
-rw-r--r--arch/sh/boards/board-sh7785lcr.c11
-rw-r--r--arch/sh/boot/compressed/misc.c9
-rw-r--r--arch/sh/mm/cache-sh4.c2
-rw-r--r--arch/sh/mm/cache-sh7705.c2
-rw-r--r--arch/sparc/kernel/entry.S15
-rw-r--r--arch/sparc/kernel/irq_64.c1
-rw-r--r--arch/sparc/kernel/pci.c6
-rw-r--r--arch/sparc/kernel/pci_common.c54
-rw-r--r--arch/sparc/kernel/pci_impl.h4
-rw-r--r--arch/sparc/kernel/smp_64.c8
-rw-r--r--arch/sparc/kernel/sys32.S38
-rw-r--r--arch/sparc/kernel/sys_sparc32.c89
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c31
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c22
-rw-r--r--arch/sparc/kernel/syscalls.S9
-rw-r--r--arch/sparc/kernel/systbls.h53
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S24
-rw-r--r--arch/sparc/mm/init_64.c6
-rw-r--r--arch/sparc/mm/tlb.c2
-rw-r--r--arch/unicore32/include/asm/cacheflush.h6
-rw-r--r--arch/unicore32/include/asm/memory.h6
-rw-r--r--arch/unicore32/mm/flush.c2
-rw-r--r--arch/unicore32/mm/mmu.c2
-rw-r--r--arch/x86/hyperv/hv_init.c45
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h (renamed from arch/x86/include/uapi/asm/hyperv.h)299
-rw-r--r--arch/x86/include/asm/kvm_host.h54
-rw-r--r--arch/x86/include/asm/kvm_para.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h94
-rw-r--r--arch/x86/include/asm/msr-index.h14
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h19
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h9
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c22
-rw-r--r--arch/x86/kernel/kvm.c18
-rw-r--r--arch/x86/kernel/pci-dma.c1
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/cpuid.c7
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/hyperv.c192
-rw-r--r--arch/x86/kvm/hyperv.h4
-rw-r--r--arch/x86/kvm/irq.c26
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h7
-rw-r--r--arch/x86/kvm/lapic.c10
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h11
-rw-r--r--arch/x86/kvm/pmu.c37
-rw-r--r--arch/x86/kvm/pmu.h6
-rw-r--r--arch/x86/kvm/pmu_amd.c142
-rw-r--r--arch/x86/kvm/svm.c312
-rw-r--r--arch/x86/kvm/vmx.c1091
-rw-r--r--arch/x86/kvm/vmx_evmcs.h324
-rw-r--r--arch/x86/kvm/x86.c374
-rw-r--r--arch/x86/kvm/x86.h86
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c34
-rw-r--r--arch/x86/mm/mmap.c18
-rw-r--r--arch/x86/xen/mmu_pv.c38
-rw-r--r--arch/xtensa/include/asm/pci.h7
-rw-r--r--arch/xtensa/include/uapi/asm/mman.h1
-rw-r--r--arch/xtensa/kernel/pci.c94
-rw-r--r--arch/xtensa/mm/cache.c2
533 files changed, 11327 insertions, 5077 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index e96adcb..b202288 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -18,6 +18,7 @@ config ALPHA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select AUDIT_ARCH
select GENERIC_CLOCKEVENTS
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index d123ff9..4c533fc 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -341,14 +341,14 @@ extern inline unsigned int ioread16(void __iomem *addr)
extern inline void iowrite8(u8 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX, iowrite8)(b, addr);
}
extern inline void iowrite16(u16 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX, iowrite16)(b, addr);
}
extern inline u8 inb(unsigned long port)
@@ -382,8 +382,8 @@ extern inline unsigned int ioread32(void __iomem *addr)
extern inline void iowrite32(u32 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX, iowrite32)(b, addr);
}
extern inline u32 inl(unsigned long port)
@@ -434,14 +434,14 @@ extern inline u16 readw(const volatile void __iomem *addr)
extern inline void writeb(u8 b, volatile void __iomem *addr)
{
- __raw_writeb(b, addr);
mb();
+ __raw_writeb(b, addr);
}
extern inline void writew(u16 b, volatile void __iomem *addr)
{
- __raw_writew(b, addr);
mb();
+ __raw_writew(b, addr);
}
#endif
@@ -482,14 +482,14 @@ extern inline u64 readq(const volatile void __iomem *addr)
extern inline void writel(u32 b, volatile void __iomem *addr)
{
- __raw_writel(b, addr);
mb();
+ __raw_writel(b, addr);
}
extern inline void writeq(u64 b, volatile void __iomem *addr)
{
- __raw_writeq(b, addr);
mb();
+ __raw_writeq(b, addr);
}
#endif
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 2dbdf59..f9d4e6b 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -32,6 +32,7 @@
#define MAP_NONBLOCK 0x40000 /* do not block on IO */
#define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x100000 /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
#define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_SYNC 2 /* synchronous memory sync */
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index bf7b41f..5a74581 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -9,7 +9,7 @@ ccflags-y := -Wno-sign-compare
obj-y := entry.o traps.o process.o osf_sys.o irq.o \
irq_alpha.o signal.o setup.o ptrace.o time.o \
- systbls.o err_common.o io.o
+ systbls.o err_common.o io.o bugs.o
obj-$(CONFIG_VGA_HOSE) += console.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/alpha/kernel/bugs.c b/arch/alpha/kernel/bugs.c
new file mode 100644
index 0000000..08cc10d
--- /dev/null
+++ b/arch/alpha/kernel/bugs.c
@@ -0,0 +1,45 @@
+
+#include <asm/hwrpb.h>
+#include <linux/device.h>
+
+
+#ifdef CONFIG_SYSFS
+
+static int cpu_is_ev6_or_later(void)
+{
+ struct percpu_struct *cpu;
+ unsigned long cputype;
+
+ cpu = (struct percpu_struct *)((char *)hwrpb + hwrpb->processor_offset);
+ cputype = cpu->type & 0xffffffff;
+ /* Include all of EV6, EV67, EV68, EV7, EV79 and EV69. */
+ return (cputype == EV6_CPU) || ((cputype >= EV67_CPU) && (cputype <= EV69_CPU));
+}
+
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (cpu_is_ev6_or_later())
+ return sprintf(buf, "Vulnerable\n");
+ else
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (cpu_is_ev6_or_later())
+ return sprintf(buf, "Vulnerable\n");
+ else
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (cpu_is_ev6_or_later())
+ return sprintf(buf, "Vulnerable\n");
+ else
+ return sprintf(buf, "Not affected\n");
+}
+#endif
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index d92abb0..c64806a 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -785,7 +785,6 @@ ret_from_kernel_thread:
mov $9, $27
mov $10, $16
jsr $26, ($9)
- mov $31, $19 /* to disable syscall restarts */
br $31, ret_to_user
.end ret_from_kernel_thread
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index b995987..b6ebb65 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
+#include <linux/syscalls.h>
#include "proto.h"
@@ -46,8 +47,8 @@ alloc_resource(void)
return alloc_bootmem(sizeof(struct resource));
}
-asmlinkage long
-sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+ unsigned long, dfn)
{
struct pci_controller *hose;
@@ -84,9 +85,8 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
return -EOPNOTSUPP;
}
-asmlinkage long
-sys_pciconfig_read(unsigned long bus, unsigned long dfn,
- unsigned long off, unsigned long len, void *buf)
+SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
+ unsigned long, off, unsigned long, len, void __user *, buf)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -94,9 +94,8 @@ sys_pciconfig_read(unsigned long bus, unsigned long dfn,
return -ENODEV;
}
-asmlinkage long
-sys_pciconfig_write(unsigned long bus, unsigned long dfn,
- unsigned long off, unsigned long len, void *buf)
+SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
+ unsigned long, off, unsigned long, len, void __user *, buf)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 2e86ebb..c668c3b 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
#include <asm/machvec.h>
#include "proto.h"
@@ -409,8 +410,8 @@ alloc_resource(void)
/* Provide information on locations of various I/O regions in physical
memory. Do this on a per-card basis so that we choose the right hose. */
-asmlinkage long
-sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+ unsigned long, dfn)
{
struct pci_controller *hose;
struct pci_dev *dev;
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index b3da0dc..1376a28 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -97,7 +97,7 @@ alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
tm->tm_year = year;
}
- return rtc_valid_tm(tm);
+ return 0;
}
static int
@@ -115,83 +115,6 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
}
static int
-alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
-{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char save_control, save_freq_select;
-
- /* Note: This code only updates minutes and seconds. Comments
- indicate this was to avoid messing with unknown time zones,
- and with the epoch nonsense described above. In order for
- this to work, the existing clock cannot be off by more than
- 15 minutes.
-
- ??? This choice is may be out of date. The x86 port does
- not have problems with timezones, and the epoch processing has
- now been fixed in alpha_set_rtc_time.
-
- In either case, one can always force a full rtc update with
- the userland hwclock program, so surely 15 minute accuracy
- is no real burden. */
-
- /* In order to set the CMOS clock precisely, we have to be called
- 500 ms after the second nowtime has started, because when
- nowtime is written into the registers of the CMOS clock, it will
- jump to the next second precisely 500 ms later. Check the Motorola
- MC146818A or Dallas DS12887 data sheet for details. */
-
- /* irq are locally disabled here */
- spin_lock(&rtc_lock);
- /* Tell the clock it's being set */
- save_control = CMOS_READ(RTC_CONTROL);
- CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
- /* Stop and reset prescaler */
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- cmos_minutes = bcd2bin(cmos_minutes);
-
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) {
- /* correct for half hour time zone */
- real_minutes += 30;
- }
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) < 30) {
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- real_seconds = bin2bcd(real_seconds);
- real_minutes = bin2bcd(real_minutes);
- }
- CMOS_WRITE(real_seconds,RTC_SECONDS);
- CMOS_WRITE(real_minutes,RTC_MINUTES);
- } else {
- printk_once(KERN_NOTICE
- "set_rtc_mmss: can't update from %d to %d\n",
- cmos_minutes, real_minutes);
- retval = -1;
- }
-
- /* The following flags have to be released exactly in this order,
- * otherwise the DS12887 (popular MC146818A clone with integrated
- * battery and quartz) will not reset the oscillator and will not
- * update precisely 500 ms later. You won't find this mentioned in
- * the Dallas Semiconductor data sheets, but who believes data
- * sheets anyway ... -- Markus Kuhn
- */
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- spin_unlock(&rtc_lock);
-
- return retval;
-}
-
-static int
alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -210,7 +133,6 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
static const struct rtc_class_ops alpha_rtc_ops = {
.read_time = alpha_rtc_read_time,
.set_time = alpha_rtc_set_time,
- .set_mmss64 = alpha_rtc_set_mmss,
.ioctl = alpha_rtc_ioctl,
};
@@ -225,7 +147,6 @@ static const struct rtc_class_ops alpha_rtc_ops = {
union remote_data {
struct rtc_time *tm;
- unsigned long now;
long retval;
};
@@ -267,29 +188,9 @@ remote_set_time(struct device *dev, struct rtc_time *tm)
return alpha_rtc_set_time(NULL, tm);
}
-static void
-do_remote_mmss(void *data)
-{
- union remote_data *x = data;
- x->retval = alpha_rtc_set_mmss(NULL, x->now);
-}
-
-static int
-remote_set_mmss(struct device *dev, time64_t now)
-{
- union remote_data x;
- if (smp_processor_id() != boot_cpuid) {
- x.now = now;
- smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1);
- return x.retval;
- }
- return alpha_rtc_set_mmss(NULL, now);
-}
-
static const struct rtc_class_ops remote_rtc_ops = {
.read_time = remote_read_time,
.set_time = remote_set_time,
- .set_mmss64 = remote_set_mmss,
.ioctl = alpha_rtc_ioctl,
};
#endif
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 6e9a0a9..783b2035 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -10,7 +10,6 @@
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
-#include <linux/fs_struct.h>
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <linux/sched/mm.h>
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 2072f34..9dbe645 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -833,7 +833,7 @@ void flush_dcache_page(struct page *page)
}
/* don't handle anon pages here */
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!mapping)
return;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1878083..a7f8e7f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index a2ac3fe..c16c182 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -6,10 +6,7 @@
#include <linux/stddef.h> /* for NULL */
#include <linux/linkage.h>
#include <asm/string.h>
-
-extern unsigned long free_mem_ptr;
-extern unsigned long free_mem_end_ptr;
-extern void error(char *);
+#include "misc.h"
#define STATIC static
#define STATIC_RW_DATA /* non-static please */
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 16a8a80..e1e9a5d 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -22,9 +22,9 @@ unsigned int __machine_arch_type;
#include <linux/compiler.h> /* for inline */
#include <linux/types.h>
#include <linux/linkage.h>
+#include "misc.h"
static void putstr(const char *ptr);
-extern void error(char *x);
#include CONFIG_UNCOMPRESS_INCLUDE
@@ -128,12 +128,7 @@ asmlinkage void __div0(void)
error("Attempting division by 0!");
}
-unsigned long __stack_chk_guard;
-
-void __stack_chk_guard_setup(void)
-{
- __stack_chk_guard = 0x000a0dff;
-}
+const unsigned long __stack_chk_guard = 0x000a0dff;
void __stack_chk_fail(void)
{
@@ -150,8 +145,6 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
{
int ret;
- __stack_chk_guard_setup();
-
output_data = (unsigned char *)output_start;
free_mem_ptr = free_mem_ptr_p;
free_mem_end_ptr = free_mem_ptr_end_p;
@@ -167,3 +160,8 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
else
putstr(" done, booting the kernel.\n");
}
+
+void fortify_panic(const char *name)
+{
+ error("detected buffer overflow");
+}
diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h
new file mode 100644
index 0000000..c958dcc
--- /dev/null
+++ b/arch/arm/boot/compressed/misc.h
@@ -0,0 +1,10 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include <linux/compiler.h>
+
+void error(char *x) __noreturn;
+extern unsigned long free_mem_ptr;
+extern unsigned long free_mem_end_ptr;
+
+#endif
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index fbd2897..c55d479 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -587,7 +587,8 @@
device_type = "mdio";
#address-cells = <1>;
#size-cells = <0>;
- reg = <0x0 0x2d24000 0x0 0x4000>;
+ reg = <0x0 0x2d24000 0x0 0x4000>,
+ <0x0 0x2d10030 0x0 0x4>;
};
ptp_clock@2d10e00 {
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 74504b1..869080b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
extern void flush_kernel_dcache_page(struct page *);
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_icache_user_range(vma,page,addr,len) \
flush_dcache_page(page)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 36dd296..5a953ec 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+/* no VHE on 32-bit :( */
+static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; }
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern void __init_stage2_translation(void);
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9003bd1..6493bd4 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num)
return vcpu_reg(vcpu, reg_num);
}
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
+
+static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
+{
+ return *__vcpu_spsr(vcpu);
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ *__vcpu_spsr(vcpu) = v;
+}
static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
u8 reg_num)
@@ -92,14 +102,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr = HCR_GUEST_MASK;
}
-static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hcr;
-}
-
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr = hcr;
+ return (unsigned long *)&vcpu->arch.hcr;
}
static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 248b930..c6a7495 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -155,9 +155,6 @@ struct kvm_vcpu_arch {
/* HYP trapping configuration */
u32 hcr;
- /* Interrupt related fields */
- u32 irq_lines; /* IRQ and FIQ levels */
-
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -315,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return false;
}
+static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 1ab8329..e93a0ca 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,6 +110,10 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index de1b919..707a1f0 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -28,6 +28,13 @@
*/
#define kern_hyp_va(kva) (kva)
+/* Contrary to arm64, there is no need to generate a PC-relative address */
+#define hyp_symbol_addr(s) \
+ ({ \
+ typeof(s) *addr = &(s); \
+ addr; \
+ })
+
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
*/
@@ -42,8 +49,15 @@
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
+/* Ensure compatibility with arm64 */
+#define VA_BITS 32
+
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4966677..ed8fd0d 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -22,12 +22,6 @@
#include <mach/memory.h>
#endif
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
/* PAGE_OFFSET - the virtual address of the start of the kernel image */
#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 6edd177..2ba95d6 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_CRM_SHIFT 7
#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
#define KVM_REG_ARM_32_CRN_SHIFT 11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT 28
#define ARM_CP15_REG_SHIFT_MASK(x,n) \
(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 12b8759..d32f5d3 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -15,38 +15,7 @@
#include <asm/memory.h>
#include <asm/page.h>
-#define PROC_INFO \
- . = ALIGN(4); \
- VMLINUX_SYMBOL(__proc_info_begin) = .; \
- *(.proc.info.init) \
- VMLINUX_SYMBOL(__proc_info_end) = .;
-
-#define IDMAP_TEXT \
- ALIGN_FUNCTION(); \
- VMLINUX_SYMBOL(__idmap_text_start) = .; \
- *(.idmap.text) \
- VMLINUX_SYMBOL(__idmap_text_end) = .; \
- . = ALIGN(PAGE_SIZE); \
- VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
- *(.hyp.idmap.text) \
- VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define ARM_CPU_DISCARD(x)
-#define ARM_CPU_KEEP(x) x
-#else
-#define ARM_CPU_DISCARD(x) x
-#define ARM_CPU_KEEP(x)
-#endif
-
-#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
- defined(CONFIG_GENERIC_BUG)
-#define ARM_EXIT_KEEP(x) x
-#define ARM_EXIT_DISCARD(x)
-#else
-#define ARM_EXIT_KEEP(x)
-#define ARM_EXIT_DISCARD(x) x
-#endif
+#include "vmlinux.lds.h"
OUTPUT_ARCH(arm)
ENTRY(stext)
@@ -69,20 +38,9 @@ SECTIONS
* unwind sections get included.
*/
/DISCARD/ : {
- *(.ARM.exidx.exit.text)
- *(.ARM.extab.exit.text)
- ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
- ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
- ARM_EXIT_DISCARD(EXIT_TEXT)
- ARM_EXIT_DISCARD(EXIT_DATA)
- EXIT_CALL
-#ifndef CONFIG_MMU
- *(.text.fixup)
- *(__ex_table)
-#endif
+ ARM_DISCARD
*(.alt.smp.init)
- *(.discard)
- *(.discard.*)
+ *(.pv_table)
}
. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
@@ -95,22 +53,7 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- IDMAP_TEXT
- __entry_text_start = .;
- *(.entry.text)
- __entry_text_end = .;
- IRQENTRY_TEXT
- TEXT_TEXT
- SCHED_TEXT
- CPUIDLE_TEXT
- LOCK_TEXT
- KPROBES_TEXT
- *(.gnu.warning)
- *(.glue_7)
- *(.glue_7t)
- . = ALIGN(4);
- *(.got) /* Global offset table */
- ARM_CPU_KEEP(PROC_INFO)
+ ARM_TEXT
}
RO_DATA(PAGE_SIZE)
@@ -118,53 +61,19 @@ SECTIONS
. = ALIGN(4);
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
-#ifdef CONFIG_MMU
- *(__ex_table)
-#endif
+ ARM_MMU_KEEP(*(__ex_table))
__stop___ex_table = .;
}
#ifdef CONFIG_ARM_UNWIND
- /*
- * Stack unwinding tables
- */
- . = ALIGN(8);
- .ARM.unwind_idx : {
- __start_unwind_idx = .;
- *(.ARM.exidx*)
- __stop_unwind_idx = .;
- }
- .ARM.unwind_tab : {
- __start_unwind_tab = .;
- *(.ARM.extab*)
- __stop_unwind_tab = .;
- }
+ ARM_UNWIND_SECTIONS
#endif
NOTES
_etext = .; /* End of text and rodata section */
- /*
- * The vectors and stubs are relocatable code, and the
- * only thing that matters is their relative offsets
- */
- __vectors_start = .;
- .vectors 0xffff0000 : AT(__vectors_start) {
- *(.vectors)
- }
- . = __vectors_start + SIZEOF(.vectors);
- __vectors_end = .;
-
- __stubs_start = .;
- .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
- *(.stubs)
- }
- . = __stubs_start + SIZEOF(.stubs);
- __stubs_end = .;
-
- PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
-
+ ARM_VECTORS
INIT_TEXT_SECTION(8)
.exit.text : {
ARM_EXIT_KEEP(EXIT_TEXT)
@@ -223,6 +132,10 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
#endif
+#ifdef CONFIG_HAVE_TCM
+ ARM_TCM
+#endif
+
/*
* End of copied data. We need a dummy section to get its LMA.
* Also located before final ALIGN() as trailing padding is not stored
@@ -234,63 +147,6 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_end = .;
-#ifdef CONFIG_HAVE_TCM
- /*
- * We align everything to a page boundary so we can
- * free it after init has commenced and TCM contents have
- * been copied to its destination.
- */
- .tcm_start : {
- . = ALIGN(PAGE_SIZE);
- __tcm_start = .;
- __itcm_start = .;
- }
-
- /*
- * Link these to the ITCM RAM
- * Put VMA to the TCM address and LMA to the common RAM
- * and we'll upload the contents from RAM to TCM and free
- * the used RAM after that.
- */
- .text_itcm ITCM_OFFSET : AT(__itcm_start)
- {
- __sitcm_text = .;
- *(.tcm.text)
- *(.tcm.rodata)
- . = ALIGN(4);
- __eitcm_text = .;
- }
-
- /*
- * Reset the dot pointer, this is needed to create the
- * relative __dtcm_start below (to be used as extern in code).
- */
- . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
-
- .dtcm_start : {
- __dtcm_start = .;
- }
-
- /* TODO: add remainder of ITCM as well, that can be used for data! */
- .data_dtcm DTCM_OFFSET : AT(__dtcm_start)
- {
- . = ALIGN(4);
- __sdtcm_data = .;
- *(.tcm.data)
- . = ALIGN(4);
- __edtcm_data = .;
- }
-
- /* Reset the dot pointer or the linker gets confused */
- . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
-
- /* End marker for freeing TCM copy in linked object */
- .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
- . = ALIGN(PAGE_SIZE);
- __tcm_end = .;
- }
-#endif
-
BSS_SECTION(0, 0, 8)
_end = .;
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 84a1ae3..b77dc67 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -15,43 +15,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
-#define PROC_INFO \
- . = ALIGN(4); \
- VMLINUX_SYMBOL(__proc_info_begin) = .; \
- *(.proc.info.init) \
- VMLINUX_SYMBOL(__proc_info_end) = .;
-
-#define HYPERVISOR_TEXT \
- VMLINUX_SYMBOL(__hyp_text_start) = .; \
- *(.hyp.text) \
- VMLINUX_SYMBOL(__hyp_text_end) = .;
-
-#define IDMAP_TEXT \
- ALIGN_FUNCTION(); \
- VMLINUX_SYMBOL(__idmap_text_start) = .; \
- *(.idmap.text) \
- VMLINUX_SYMBOL(__idmap_text_end) = .; \
- . = ALIGN(PAGE_SIZE); \
- VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
- *(.hyp.idmap.text) \
- VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define ARM_CPU_DISCARD(x)
-#define ARM_CPU_KEEP(x) x
-#else
-#define ARM_CPU_DISCARD(x) x
-#define ARM_CPU_KEEP(x)
-#endif
-
-#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
- defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
-#define ARM_EXIT_KEEP(x) x
-#define ARM_EXIT_DISCARD(x)
-#else
-#define ARM_EXIT_KEEP(x)
-#define ARM_EXIT_DISCARD(x) x
-#endif
+#include "vmlinux.lds.h"
OUTPUT_ARCH(arm)
ENTRY(stext)
@@ -74,22 +38,10 @@ SECTIONS
* unwind sections get included.
*/
/DISCARD/ : {
- *(.ARM.exidx.exit.text)
- *(.ARM.extab.exit.text)
- ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
- ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
- ARM_EXIT_DISCARD(EXIT_TEXT)
- ARM_EXIT_DISCARD(EXIT_DATA)
- EXIT_CALL
-#ifndef CONFIG_MMU
- *(.text.fixup)
- *(__ex_table)
-#endif
+ ARM_DISCARD
#ifndef CONFIG_SMP_ON_UP
*(.alt.smp.init)
#endif
- *(.discard)
- *(.discard.*)
}
. = PAGE_OFFSET + TEXT_OFFSET;
@@ -104,24 +56,7 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- IDMAP_TEXT
- __entry_text_start = .;
- *(.entry.text)
- __entry_text_end = .;
- IRQENTRY_TEXT
- SOFTIRQENTRY_TEXT
- TEXT_TEXT
- SCHED_TEXT
- CPUIDLE_TEXT
- LOCK_TEXT
- HYPERVISOR_TEXT
- KPROBES_TEXT
- *(.gnu.warning)
- *(.glue_7)
- *(.glue_7t)
- . = ALIGN(4);
- *(.got) /* Global offset table */
- ARM_CPU_KEEP(PROC_INFO)
+ ARM_TEXT
}
#ifdef CONFIG_DEBUG_ALIGN_RODATA
@@ -134,27 +69,12 @@ SECTIONS
. = ALIGN(4);
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
-#ifdef CONFIG_MMU
- *(__ex_table)
-#endif
+ ARM_MMU_KEEP(*(__ex_table))
__stop___ex_table = .;
}
#ifdef CONFIG_ARM_UNWIND
- /*
- * Stack unwinding tables
- */
- . = ALIGN(8);
- .ARM.unwind_idx : {
- __start_unwind_idx = .;
- *(.ARM.exidx*)
- __stop_unwind_idx = .;
- }
- .ARM.unwind_tab : {
- __start_unwind_tab = .;
- *(.ARM.extab*)
- __stop_unwind_tab = .;
- }
+ ARM_UNWIND_SECTIONS
#endif
NOTES
@@ -166,26 +86,7 @@ SECTIONS
#endif
__init_begin = .;
- /*
- * The vectors and stubs are relocatable code, and the
- * only thing that matters is their relative offsets
- */
- __vectors_start = .;
- .vectors 0xffff0000 : AT(__vectors_start) {
- *(.vectors)
- }
- . = __vectors_start + SIZEOF(.vectors);
- __vectors_end = .;
-
- __stubs_start = .;
- .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
- *(.stubs)
- }
- . = __stubs_start + SIZEOF(.stubs);
- __stubs_end = .;
-
- PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
-
+ ARM_VECTORS
INIT_TEXT_SECTION(8)
.exit.text : {
ARM_EXIT_KEEP(EXIT_TEXT)
@@ -226,6 +127,10 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
#endif
+#ifdef CONFIG_HAVE_TCM
+ ARM_TCM
+#endif
+
#ifdef CONFIG_STRICT_KERNEL_RWX
. = ALIGN(1<<SECTION_SHIFT);
#else
@@ -237,63 +142,6 @@ SECTIONS
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
-#ifdef CONFIG_HAVE_TCM
- /*
- * We align everything to a page boundary so we can
- * free it after init has commenced and TCM contents have
- * been copied to its destination.
- */
- .tcm_start : {
- . = ALIGN(PAGE_SIZE);
- __tcm_start = .;
- __itcm_start = .;
- }
-
- /*
- * Link these to the ITCM RAM
- * Put VMA to the TCM address and LMA to the common RAM
- * and we'll upload the contents from RAM to TCM and free
- * the used RAM after that.
- */
- .text_itcm ITCM_OFFSET : AT(__itcm_start)
- {
- __sitcm_text = .;
- *(.tcm.text)
- *(.tcm.rodata)
- . = ALIGN(4);
- __eitcm_text = .;
- }
-
- /*
- * Reset the dot pointer, this is needed to create the
- * relative __dtcm_start below (to be used as extern in code).
- */
- . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
-
- .dtcm_start : {
- __dtcm_start = .;
- }
-
- /* TODO: add remainder of ITCM as well, that can be used for data! */
- .data_dtcm DTCM_OFFSET : AT(__dtcm_start)
- {
- . = ALIGN(4);
- __sdtcm_data = .;
- *(.tcm.data)
- . = ALIGN(4);
- __edtcm_data = .;
- }
-
- /* Reset the dot pointer or the linker gets confused */
- . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
-
- /* End marker for freeing TCM copy in linked object */
- .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
- . = ALIGN(PAGE_SIZE);
- __tcm_end = .;
- }
-#endif
-
BSS_SECTION(0, 0, 0)
_end = .;
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
new file mode 100644
index 0000000..71281e0
--- /dev/null
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define ARM_CPU_DISCARD(x)
+#define ARM_CPU_KEEP(x) x
+#else
+#define ARM_CPU_DISCARD(x) x
+#define ARM_CPU_KEEP(x)
+#endif
+
+#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+ defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
+#define ARM_EXIT_KEEP(x) x
+#define ARM_EXIT_DISCARD(x)
+#else
+#define ARM_EXIT_KEEP(x)
+#define ARM_EXIT_DISCARD(x) x
+#endif
+
+#ifdef CONFIG_MMU
+#define ARM_MMU_KEEP(x) x
+#define ARM_MMU_DISCARD(x)
+#else
+#define ARM_MMU_KEEP(x)
+#define ARM_MMU_DISCARD(x) x
+#endif
+
+#define PROC_INFO \
+ . = ALIGN(4); \
+ VMLINUX_SYMBOL(__proc_info_begin) = .; \
+ *(.proc.info.init) \
+ VMLINUX_SYMBOL(__proc_info_end) = .;
+
+#define HYPERVISOR_TEXT \
+ VMLINUX_SYMBOL(__hyp_text_start) = .; \
+ *(.hyp.text) \
+ VMLINUX_SYMBOL(__hyp_text_end) = .;
+
+#define IDMAP_TEXT \
+ ALIGN_FUNCTION(); \
+ VMLINUX_SYMBOL(__idmap_text_start) = .; \
+ *(.idmap.text) \
+ VMLINUX_SYMBOL(__idmap_text_end) = .; \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+ *(.hyp.idmap.text) \
+ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+
+#define ARM_DISCARD \
+ *(.ARM.exidx.exit.text) \
+ *(.ARM.extab.exit.text) \
+ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \
+ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \
+ ARM_EXIT_DISCARD(EXIT_TEXT) \
+ ARM_EXIT_DISCARD(EXIT_DATA) \
+ EXIT_CALL \
+ ARM_MMU_DISCARD(*(.text.fixup)) \
+ ARM_MMU_DISCARD(*(__ex_table)) \
+ *(.discard) \
+ *(.discard.*)
+
+#define ARM_TEXT \
+ IDMAP_TEXT \
+ __entry_text_start = .; \
+ *(.entry.text) \
+ __entry_text_end = .; \
+ IRQENTRY_TEXT \
+ SOFTIRQENTRY_TEXT \
+ TEXT_TEXT \
+ SCHED_TEXT \
+ CPUIDLE_TEXT \
+ LOCK_TEXT \
+ HYPERVISOR_TEXT \
+ KPROBES_TEXT \
+ *(.gnu.warning) \
+ *(.glue_7) \
+ *(.glue_7t) \
+ . = ALIGN(4); \
+ *(.got) /* Global offset table */ \
+ ARM_CPU_KEEP(PROC_INFO)
+
+/* Stack unwinding tables */
+#define ARM_UNWIND_SECTIONS \
+ . = ALIGN(8); \
+ .ARM.unwind_idx : { \
+ __start_unwind_idx = .; \
+ *(.ARM.exidx*) \
+ __stop_unwind_idx = .; \
+ } \
+ .ARM.unwind_tab : { \
+ __start_unwind_tab = .; \
+ *(.ARM.extab*) \
+ __stop_unwind_tab = .; \
+ }
+
+/*
+ * The vectors and stubs are relocatable code, and the
+ * only thing that matters is their relative offsets
+ */
+#define ARM_VECTORS \
+ __vectors_start = .; \
+ .vectors 0xffff0000 : AT(__vectors_start) { \
+ *(.vectors) \
+ } \
+ . = __vectors_start + SIZEOF(.vectors); \
+ __vectors_end = .; \
+ \
+ __stubs_start = .; \
+ .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \
+ *(.stubs) \
+ } \
+ . = __stubs_start + SIZEOF(.stubs); \
+ __stubs_end = .; \
+ \
+ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
+
+#define ARM_TCM \
+ __itcm_start = ALIGN(4); \
+ .text_itcm ITCM_OFFSET : AT(__itcm_start - LOAD_OFFSET) { \
+ __sitcm_text = .; \
+ *(.tcm.text) \
+ *(.tcm.rodata) \
+ . = ALIGN(4); \
+ __eitcm_text = .; \
+ } \
+ . = __itcm_start + SIZEOF(.text_itcm); \
+ \
+ __dtcm_start = .; \
+ .data_dtcm DTCM_OFFSET : AT(__dtcm_start - LOAD_OFFSET) { \
+ __sdtcm_data = .; \
+ *(.tcm.data) \
+ . = ALIGN(4); \
+ __edtcm_data = .; \
+ } \
+ . = __dtcm_start + SIZEOF(.data_dtcm);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 6d1d2e2..3a02e76 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -270,6 +270,60 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_cntp_tval(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u64 now = kvm_phys_timer_read();
+ u64 val;
+
+ if (p->is_write) {
+ val = *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+ *vcpu_reg(vcpu, p->Rt1) = val - now;
+ }
+
+ return true;
+}
+
+static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u32 val;
+
+ if (p->is_write) {
+ val = *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+ *vcpu_reg(vcpu, p->Rt1) = val;
+ }
+
+ return true;
+}
+
+static bool access_cntp_cval(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u64 val;
+
+ if (p->is_write) {
+ val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
+ val |= *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+ *vcpu_reg(vcpu, p->Rt1) = val;
+ *vcpu_reg(vcpu, p->Rt2) = val >> 32;
+ }
+
+ return true;
+}
+
/*
* We could trap ID_DFR0 and tell the guest we don't support performance
* monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -423,10 +477,17 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
NULL, reset_unknown, c13_TID_PRIV },
+ /* CNTP */
+ { CRm64(14), Op1( 2), is64, access_cntp_cval},
+
/* CNTKCTL: swapped by interrupt.S. */
{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+ /* CNTP */
+ { CRn(14), CRm( 2), Op1( 0), Op2( 0), is32, access_cntp_tval },
+ { CRn(14), CRm( 2), Op1( 0), Op2( 1), is32, access_cntp_ctl },
+
/* The Configuration Base Address Register. */
{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index cdff963..9046b53 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
{
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
@@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
*/
void kvm_inject_vabt(struct kvm_vcpu *vcpu)
{
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA);
+ *vcpu_hcr(vcpu) |= HCR_VA;
}
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 63d6b40..7fc0638 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -9,7 +9,6 @@ KVM=../../../../virt/kvm
CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ae45ae9..acf1c37 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
isb();
}
- write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
+ write_sysreg(vcpu->arch.hcr, HCR);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(HSTR_T(15), HSTR);
write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
@@ -90,18 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
- else
- __vgic_v2_save_state(vcpu);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
}
static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
- else
- __vgic_v2_restore_state(vcpu);
+ }
}
static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
@@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
return true;
}
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 07df3a5..fde7ef1 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -6,6 +6,8 @@ config SA1100_ASSABET
bool "Assabet"
select ARM_SA1110_CPUFREQ
select GPIO_REG
+ select REGULATOR
+ select REGULATOR_FIXED_VOLTAGE
help
Say Y here if you are using the Intel(R) StrongARM(R) SA-1110
Microprocessor Development Board (also known as the Assabet).
@@ -137,6 +139,8 @@ config SA1100_PLEB
config SA1100_SHANNON
bool "Shannon"
select ARM_SA1100_CPUFREQ
+ select REGULATOR
+ select REGULATOR_FIXED_VOLTAGE
help
The Shannon (also known as a Tuxscreen, and also as a IS2630) was a
limited edition webphone produced by Philips. The Shannon is a SA1100
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index f68241d..575ec08 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -14,8 +14,11 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/gpio/gpio-reg.h>
+#include <linux/gpio/machine.h>
#include <linux/ioport.h>
#include <linux/platform_data/sa11x0-serial.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
#include <linux/serial_core.h>
#include <linux/platform_device.h>
#include <linux/mfd/ucb1x00.h>
@@ -445,6 +448,29 @@ static struct resource neponset_resources[] = {
};
#endif
+static struct gpiod_lookup_table assabet_cf_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.1",
+ .table = {
+ GPIO_LOOKUP("gpio", 21, "ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 22, "detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", 24, "bvd2", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 25, "bvd1", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("assabet", 1, "reset", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("assabet", 7, "bus-enable", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct regulator_consumer_supply assabet_cf_vcc_consumers[] = {
+ REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.1"),
+};
+
+static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = {
+ .supply_name = "cf-power",
+ .microvolts = 3300000,
+ .enable_high = 1,
+};
+
static void __init assabet_init(void)
{
/*
@@ -490,6 +516,11 @@ static void __init assabet_init(void)
platform_device_register_simple("neponset", 0,
neponset_resources, ARRAY_SIZE(neponset_resources));
#endif
+ } else {
+ sa11x0_register_fixed_regulator(0, &assabet_cf_vcc_pdata,
+ assabet_cf_vcc_consumers,
+ ARRAY_SIZE(assabet_cf_vcc_consumers));
+
}
#ifndef ASSABET_PAL_VIDEO
@@ -501,6 +532,9 @@ static void __init assabet_init(void)
ARRAY_SIZE(assabet_flash_resources));
sa11x0_register_irda(&assabet_irda_data);
sa11x0_register_mcp(&assabet_mcp_data);
+
+ if (!machine_has_neponset())
+ sa11x0_register_pcmcia(1, &assabet_cf_gpio_table);
}
/*
@@ -768,6 +802,7 @@ fs_initcall(assabet_leds_init);
void __init assabet_init_irq(void)
{
+ unsigned int assabet_gpio_base;
u32 def_val;
sa1100_init_irq();
@@ -782,7 +817,9 @@ void __init assabet_init_irq(void)
*
* This must precede any driver calls to BCR_set() or BCR_clear().
*/
- assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+ assabet_gpio_base = assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+
+ assabet_cf_vcc_pdata.gpio = assabet_gpio_base + 0;
}
MACHINE_START(ASSABET, "Intel-Assabet")
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 2d25ece..b2a4b41 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -11,6 +11,7 @@
*/
#include <linux/init.h>
+#include <linux/gpio/machine.h>
#include <linux/kernel.h>
#include <linux/tty.h>
#include <linux/platform_data/sa11x0-serial.h>
@@ -45,6 +46,19 @@ static struct platform_device cerfuart2_device = {
.resource = cerfuart2_resources,
};
+/* Compact Flash */
+static struct gpiod_lookup_table cerf_cf_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.1",
+ .table = {
+ GPIO_LOOKUP("gpio", 19, "bvd2", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 20, "bvd1", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 21, "reset", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 22, "ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 23, "detect", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
/* LEDs */
struct gpio_led cerf_gpio_leds[] = {
{
@@ -151,9 +165,6 @@ static void __init cerf_map_io(void)
sa1100_register_uart(0, 3);
sa1100_register_uart(1, 2); /* disable this and the uart2 device for sa1100_fir */
sa1100_register_uart(2, 1);
-
- /* set some GPDR bits here while it's safe */
- GPDR |= CERF_GPIO_CF_RESET;
}
static struct mcp_plat_data cerf_mcp_data = {
@@ -167,6 +178,7 @@ static void __init cerf_init(void)
platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1);
sa11x0_register_mcp(&cerf_mcp_data);
+ sa11x0_register_pcmcia(1, &cerf_cf_gpio_table);
}
MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index b2eb3d2..6199e87 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -163,6 +163,8 @@ static struct clk_lookup sa11xx_clkregs[] = {
CLKDEV_INIT("sa1100-rtc", NULL, NULL),
CLKDEV_INIT("sa11x0-fb", NULL, &clk_cpu),
CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
+ CLKDEV_INIT("sa11x0-pcmcia.0", NULL, &clk_cpu),
+ CLKDEV_INIT("sa11x0-pcmcia.1", NULL, &clk_cpu),
/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
CLKDEV_INIT("1800", NULL, &clk_cpu),
CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 2eb0069..7167ddf 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -10,6 +10,7 @@
* published by the Free Software Foundation.
*/
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -20,6 +21,8 @@
#include <linux/ioport.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
#include <linux/irqchip/irq-sa11x0.h>
#include <video/sa1100fb.h>
@@ -232,11 +235,20 @@ void sa11x0_register_lcd(struct sa1100fb_mach_info *inf)
sa11x0_register_device(&sa11x0fb_device, inf);
}
+static bool sa11x0pcmcia_legacy = true;
static struct platform_device sa11x0pcmcia_device = {
.name = "sa11x0-pcmcia",
.id = -1,
};
+void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *table)
+{
+ if (table)
+ gpiod_add_lookup_table(table);
+ platform_device_register_simple("sa11x0-pcmcia", socket, NULL, 0);
+ sa11x0pcmcia_legacy = false;
+}
+
static struct platform_device sa11x0mtd_device = {
.name = "sa1100-mtd",
.id = -1,
@@ -311,7 +323,6 @@ static struct platform_device *sa11x0_devices[] __initdata = {
&sa11x0uart1_device,
&sa11x0uart3_device,
&sa11x0ssp_device,
- &sa11x0pcmcia_device,
&sa11x0rtc_device,
&sa11x0dma_device,
};
@@ -319,6 +330,12 @@ static struct platform_device *sa11x0_devices[] __initdata = {
static int __init sa1100_init(void)
{
pm_power_off = sa1100_power_off;
+
+ if (sa11x0pcmcia_legacy)
+ platform_device_register(&sa11x0pcmcia_device);
+
+ regulator_has_full_constraints();
+
return platform_add_devices(sa11x0_devices, ARRAY_SIZE(sa11x0_devices));
}
@@ -329,6 +346,31 @@ void __init sa11x0_init_late(void)
sa11x0_pm_init();
}
+int __init sa11x0_register_fixed_regulator(int n,
+ struct fixed_voltage_config *cfg,
+ struct regulator_consumer_supply *supplies, unsigned num_supplies)
+{
+ struct regulator_init_data *id;
+
+ cfg->init_data = id = kzalloc(sizeof(*cfg->init_data), GFP_KERNEL);
+ if (!cfg->init_data)
+ return -ENOMEM;
+
+ if (cfg->gpio < 0)
+ id->constraints.always_on = 1;
+ id->constraints.name = cfg->supply_name;
+ id->constraints.min_uV = cfg->microvolts;
+ id->constraints.max_uV = cfg->microvolts;
+ id->constraints.valid_modes_mask = REGULATOR_MODE_NORMAL;
+ id->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+ id->consumer_supplies = supplies;
+ id->num_consumer_supplies = num_supplies;
+
+ platform_device_register_resndata(NULL, "reg-fixed-voltage", n,
+ NULL, 0, cfg, sizeof(*cfg));
+ return 0;
+}
+
/*
* Common I/O mapping:
*
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 9750292..5f3cb52 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -47,3 +47,11 @@ static inline int sa11x0_pm_init(void) { return 0; }
#endif
int sa11xx_clk_init(void);
+
+struct gpiod_lookup_table;
+void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *);
+
+struct fixed_voltage_config;
+struct regulator_consumer_supply;
+int sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg,
+ struct regulator_consumer_supply *supplies, unsigned num_supplies);
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index b69e766..36a78b0 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -11,6 +11,7 @@
*/
#include <linux/kernel.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio.h>
#include <linux/gpio_keys.h>
#include <linux/input.h>
@@ -264,8 +265,24 @@ static struct platform_device *h3xxx_devices[] = {
&h3xxx_micro_asic,
};
+static struct gpiod_lookup_table h3xxx_pcmcia_gpio_table = {
+ .dev_id = "sa11x0-pcmcia",
+ .table = {
+ GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_CD0,
+ "pcmcia0-detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_IRQ0,
+ "pcmcia0-ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_CD1,
+ "pcmcia1-detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_IRQ1,
+ "pcmcia1-ready", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
void __init h3xxx_mach_init(void)
{
+ gpiod_add_lookup_table(&h3xxx_pcmcia_gpio_table);
sa1100_register_uart_fns(&h3xxx_port_fns);
sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1);
platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices));
diff --git a/arch/arm/mach-sa1100/include/mach/assabet.h b/arch/arm/mach-sa1100/include/mach/assabet.h
index 558b453..641a961 100644
--- a/arch/arm/mach-sa1100/include/mach/assabet.h
+++ b/arch/arm/mach-sa1100/include/mach/assabet.h
@@ -96,10 +96,4 @@ extern void assabet_uda1341_reset(int set);
#define ASSABET_GPIO_BATT_LOW GPIO_GPIO (26) /* Low battery */
#define ASSABET_GPIO_RCLK GPIO_GPIO (26) /* CCLK/2 */
-/* These are gpiolib GPIO numbers, not bitmasks */
-#define ASSABET_GPIO_CF_IRQ 21 /* CF IRQ */
-#define ASSABET_GPIO_CF_CD 22 /* CF CD */
-#define ASSABET_GPIO_CF_BVD2 24 /* CF BVD / IOSPKR */
-#define ASSABET_GPIO_CF_BVD1 25 /* CF BVD / IOSTSCHG */
-
#endif
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index f1cb378..4d35258 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -12,6 +12,7 @@
*/
#include <linux/init.h>
+#include <linux/gpio/machine.h>
#include <linux/kernel.h>
#include <linux/platform_data/sa11x0-serial.h>
#include <linux/mtd/mtd.h>
@@ -99,8 +100,30 @@ static void __init nanoengine_map_io(void)
Ser2HSCR0 = 0;
}
+static struct gpiod_lookup_table nanoengine_pcmcia0_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.0",
+ .table = {
+ GPIO_LOOKUP("gpio", 11, "ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 13, "detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", 15, "reset", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table nanoengine_pcmcia1_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.1",
+ .table = {
+ GPIO_LOOKUP("gpio", 12, "ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", 14, "detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", 16, "reset", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init nanoengine_init(void)
{
+ sa11x0_register_pcmcia(0, &nanoengine_pcmcia0_gpio_table);
+ sa11x0_register_pcmcia(1, &nanoengine_pcmcia1_gpio_table);
sa11x0_register_mtd(&nanoengine_flash_data, nanoengine_flash_resources,
ARRAY_SIZE(nanoengine_flash_resources));
}
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 856664c..22f7fe0 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -5,11 +5,14 @@
#include <linux/init.h>
#include <linux/device.h>
+#include <linux/gpio/machine.h>
#include <linux/kernel.h>
#include <linux/platform_data/sa11x0-serial.h>
#include <linux/tty.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
#include <video/sa1100fb.h>
@@ -72,8 +75,43 @@ static struct sa1100fb_mach_info shannon_lcd_info = {
.lccr3 = LCCR3_ACBsDiv(512),
};
+static struct gpiod_lookup_table shannon_pcmcia0_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.0",
+ .table = {
+ GPIO_LOOKUP("gpio", 24, "detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", 26, "ready", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table shannon_pcmcia1_gpio_table = {
+ .dev_id = "sa11x0-pcmcia.1",
+ .table = {
+ GPIO_LOOKUP("gpio", 25, "detect", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio", 27, "ready", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
+static struct regulator_consumer_supply shannon_cf_vcc_consumers[] = {
+ REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.0"),
+ REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.1"),
+};
+
+static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = {
+ .supply_name = "cf-power",
+ .microvolts = 3300000,
+ .enabled_at_boot = 1,
+ .gpio = -EINVAL,
+};
+
static void __init shannon_init(void)
{
+ sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata,
+ shannon_cf_vcc_consumers,
+ ARRAY_SIZE(shannon_cf_vcc_consumers));
+ sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table);
+ sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table);
sa11x0_ppc_configure_mcp();
sa11x0_register_lcd(&shannon_lcd_info);
sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1);
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 7d4feb8..ace0104 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -4,6 +4,7 @@
*/
#include <linux/module.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/tty.h>
@@ -364,6 +365,15 @@ static struct platform_device *devices[] __initdata = {
&simpad_i2c,
};
+/* Compact Flash */
+static struct gpiod_lookup_table simpad_cf_gpio_table = {
+ .dev_id = "sa11x0-pcmcia",
+ .table = {
+ GPIO_LOOKUP("gpio", GPIO_CF_IRQ, "cf-ready", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio", GPIO_CF_CD, "cf-detect", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
static int __init simpad_init(void)
@@ -385,6 +395,7 @@ static int __init simpad_init(void)
pm_power_off = simpad_power_off;
+ sa11x0_register_pcmcia(-1, &simpad_cf_gpio_table);
sa11x0_ppc_configure_mcp();
sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
ARRAY_SIZE(simpad_flash_resources));
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index 1267e64..0224416 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -70,7 +70,7 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from,
void *kto = kmap_atomic(to);
if (!test_and_set_bit(PG_dcache_clean, &from->flags))
- __flush_dcache_page(page_mapping(from), from);
+ __flush_dcache_page(page_mapping_file(from), from);
raw_spin_lock(&minicache_lock);
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 7042334..a698e575e3 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -76,7 +76,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to,
unsigned long kfrom, kto;
if (!test_and_set_bit(PG_dcache_clean, &from->flags))
- __flush_dcache_page(page_mapping(from), from);
+ __flush_dcache_page(page_mapping_file(from), from);
/* FIXME: not highmem safe */
discard_old_kernel_data(page_address(to));
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index 0fb8502..9797237 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -90,7 +90,7 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
void *kto = kmap_atomic(to);
if (!test_and_set_bit(PG_dcache_clean, &from->flags))
- __flush_dcache_page(page_mapping(from), from);
+ __flush_dcache_page(page_mapping_file(from), from);
raw_spin_lock(&minicache_lock);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ada8eb2..8c398fe 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
void __init dma_contiguous_remap(void)
{
int i;
+
+ if (!dma_mmu_remap_num)
+ return;
+
+ /* call flush_cache_all() since CMA area would be large enough */
+ flush_cache_all();
for (i = 0; i < dma_mmu_remap_num; i++) {
phys_addr_t start = dma_mmu_remap[i].base;
phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void)
flush_tlb_kernel_range(__phys_to_virt(start),
__phys_to_virt(end));
- iotable_init(&map, 1);
+ /*
+ * All the memory in CMA region will be on ZONE_MOVABLE.
+ * If that zone is considered as highmem, the memory in CMA
+ * region is also considered as highmem even if it's
+ * physical address belong to lowmem. In this case,
+ * re-mapping isn't required.
+ */
+ if (!is_highmem_idx(ZONE_MOVABLE))
+ iotable_init(&map, 1);
}
}
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index d9e0d00..4d75dae 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -195,7 +195,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
if (page == ZERO_PAGE(0))
return;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
__flush_dcache_page(mapping, page);
if (mapping) {
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index f1e6190..5846962 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -285,7 +285,7 @@ void __sync_icache_dcache(pte_t pteval)
page = pfn_to_page(pfn);
if (cache_is_vipt_aliasing())
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
else
mapping = NULL;
@@ -333,7 +333,7 @@ void flush_dcache_page(struct page *page)
return;
}
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!cache_ops_need_broadcast() &&
mapping && !page_mapcount(page))
@@ -363,7 +363,7 @@ void flush_kernel_dcache_page(struct page *page)
if (cache_is_vivt() || cache_is_vipt_aliasing()) {
struct address_space *mapping;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!mapping || mapping_mapped(mapping)) {
void *addr;
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index bd6f451..c186474 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -758,20 +758,9 @@ void set_kernel_text_ro(void)
static inline void fix_kernmem_perms(void) { }
#endif /* CONFIG_STRICT_KERNEL_RWX */
-void free_tcmmem(void)
-{
-#ifdef CONFIG_HAVE_TCM
- extern char __tcm_start, __tcm_end;
-
- poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
- free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
-#endif
-}
-
void free_initmem(void)
{
fix_kernmem_perms();
- free_tcmmem();
poison_init_mem(__init_begin, __init_end - __init_begin);
if (!machine_is_integrator() && !machine_is_cintegrator())
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index eb1de66..f866870 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -21,20 +21,20 @@
#define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5)
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index d55d493..b528a15 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -272,6 +272,7 @@ ENDPROC(cpu_pj4b_do_resume)
__v7_ca5mp_setup:
__v7_ca9mp_setup:
__v7_cr7mp_setup:
+__v7_cr8mp_setup:
mov r10, #(1 << 0) @ Cache/TLB ops broadcasting
b 1f
__v7_ca7mp_setup:
@@ -642,6 +643,16 @@ __v7_cr7mp_proc_info:
.size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
/*
+ * ARM Ltd. Cortex R8 processor.
+ */
+ .type __v7_cr8mp_proc_info, #object
+__v7_cr8mp_proc_info:
+ .long 0x410fc180
+ .long 0xff0ffff0
+ __v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup
+ .size __v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info
+
+ /*
* ARM Ltd. Cortex A7 processor.
*/
.type __v7_ca7mp_proc_info, #object
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 177be0d..eb2cf49 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -922,6 +922,22 @@ config HARDEN_BRANCH_PREDICTOR
If unsure, say Y.
+config HARDEN_EL2_VECTORS
+ bool "Harden EL2 vector mapping against system register leak" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to leak privileged information such as the vector base
+ register, resulting in a potential defeat of the EL2 layout
+ randomization.
+
+ This config option will map the vectors to a fixed location,
+ independent of the EL2 code mapping, so that revealing VBAR_EL2
+ to an attacker does not give away any extra information. This
+ only gets enabled on affected CPUs.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 6690281..a91933b 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -5,6 +5,8 @@
#include <asm/cpucaps.h>
#include <asm/insn.h>
+#define ARM64_CB_PATCH ARM64_NCAPS
+
#ifndef __ASSEMBLY__
#include <linux/init.h>
@@ -22,12 +24,19 @@ struct alt_instr {
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
void __init apply_alternatives_all(void);
void apply_alternatives(void *start, size_t length);
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(feature,cb) \
" .word 661b - .\n" /* label */ \
+ " .if " __stringify(cb) " == 0\n" \
" .word 663f - .\n" /* new instruction */ \
+ " .else\n" \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .endif\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length);
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(feature,cb) \
".popsection\n" \
+ " .if " __stringify(cb) " == 0\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
"663:\n\t" \
newinstr "\n" \
@@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length);
".popsection\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n" \
+ ".else\n\t" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>
@@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length);
661:
.endm
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ .popsection
+661:
+.endm
+
/*
* Provide the other half of the alternative code sequence.
*/
@@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length);
.endm
/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
* previous case.
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7dfcec4..0094c66 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
dsb(ish);
}
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
/*
* We don't appear to need to do anything here. In fact, if we did, we'd
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 21bb624..a311880 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -32,7 +32,7 @@
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_HYP_OFFSET_LOW 14
+#define ARM64_HARDEN_EL2_VECTORS 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
#define ARM64_HAS_NO_FPSIMD 16
#define ARM64_WORKAROUND_REPEAT_TLBI 17
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4214c38..f62c56b 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -70,6 +70,7 @@ enum aarch64_insn_imm_type {
AARCH64_INSN_IMM_6,
AARCH64_INSN_IMM_S,
AARCH64_INSN_IMM_R,
+ AARCH64_INSN_IMM_N,
AARCH64_INSN_IMM_MAX
};
@@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
__AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000)
__AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000)
+__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000)
+__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000)
+__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000)
+__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000)
__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
__AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
@@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
int shift,
enum aarch64_insn_variant variant,
enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm);
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb);
u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b0c8417..6dd285e 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -25,6 +25,7 @@
/* Hyp Configuration Register (HCR) bits */
#define HCR_TEA (UL(1) << 37)
#define HCR_TERR (UL(1) << 36)
+#define HCR_TLOR (UL(1) << 35)
#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
@@ -64,6 +65,7 @@
/*
* The bits we set in HCR:
+ * TLOR: Trap LORegion register accesses
* RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
@@ -81,9 +83,9 @@
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
- HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+ HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b7..d53d407 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+/* Translate a kernel address of @sym into its equivalent linear mapping */
#define kvm_ksym_ref(sym) \
({ \
void *val = &sym; \
@@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void);
@@ -70,6 +73,20 @@ extern u32 __init_stage2_translation(void);
extern void __qcom_hyp_sanitize_btac_predictors(void);
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+ adr_l \reg, kvm_host_cpu_state
+ mrs \tmp, tpidr_el2
+ add \reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+ get_host_ctxt \ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+ kern_hyp_va \vcpu
+.endm
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 413dc82..23b33e8 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,13 +26,15 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
+static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.hcr_el2 & HCR_RW);
+}
+
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
@@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
-}
-static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hcr_el2;
+ /*
+ * TID3: trap feature register accesses that we virtualise.
+ * For now this is conditional, since no AArch32 feature regs
+ * are currently virtualised.
+ */
+ if (!vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 |= HCR_TID3;
}
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = hcr;
+ return (unsigned long *)&vcpu->arch.hcr_el2;
}
static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
@@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
}
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
}
+static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(elr);
+ else
+ return *__vcpu_elr_el1(vcpu);
+}
+
+static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, elr);
+ else
+ *__vcpu_elr_el1(vcpu) = v;
+}
+
static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
@@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
}
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
- return vcpu_spsr32(vcpu);
+ return vcpu_read_spsr32(vcpu);
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(spsr);
+ else
+ return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu_mode_is_32bit(vcpu)) {
+ vcpu_write_spsr32(vcpu, v);
+ return;
+ }
+
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, spsr);
+ else
+ vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
@@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
+ if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
- else
- vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+ } else {
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ sctlr |= (1 << 25);
+ vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+ }
}
static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
@@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
- return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 596f8e4..ab46bc7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -272,9 +272,6 @@ struct kvm_vcpu_arch {
/* IO related fields */
struct kvm_decode mmio_decode;
- /* Interrupt related fields */
- u64 irq_lines; /* IRQ and FIQ levels */
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -287,10 +284,25 @@ struct kvm_vcpu_arch {
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
+
+ /* True when deferrable sysregs are loaded on the physical CPU,
+ * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+ bool sysregs_loaded_on_cpu;
};
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a running VCPU. For
+ * example, for userspace access or for system registers that are never context
+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
+
/*
* CP14 and CP15 live in the same array, as they are backed by the
* same system registers.
@@ -298,14 +310,6 @@ struct kvm_vcpu_arch {
#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1)
-#else
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r))
-#endif
-
struct kvm_vm_stat {
ulong remote_tlb_flush;
};
@@ -358,10 +362,15 @@ int kvm_perf_teardown(void);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
+ u64 tpidr_el2;
+
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +379,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
*/
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+ /*
+ * Calculate the raw per-cpu offset without a translation from the
+ * kernel's mapping to the linear mapping, and store it in tpidr_el2
+ * so that we can use adr_l to access per-cpu variables in EL2.
+ */
+ tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+ - (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+ kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
}
static inline void kvm_arch_hardware_unsetup(void) {}
@@ -416,6 +435,13 @@ static inline void kvm_arm_vhe_guest_enter(void)
static inline void kvm_arm_vhe_guest_exit(void)
{
local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
}
static inline bool kvm_arm_harden_branch_predictor(void)
@@ -423,4 +449,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
}
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index f26f9cd..384c343 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \
return val; \
}
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
-void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
-void __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
bool __fpsimd_enabled(void);
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
+void deactivate_traps_vhe_put(void);
+
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7faed6e..0821109 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -69,9 +69,6 @@
* mappings, and none of this applies in that case.
*/
-#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1)
-#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1)
-
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
@@ -81,28 +78,19 @@
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*
- * This generates the following sequences:
- * - High mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * nop
- * - Low mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK
- * - VHE:
- * nop
- * nop
- *
- * The "low mask" version works because the mask is a strict subset of
- * the "high mask", hence performing the first mask for nothing.
- * Should be completely invisible on any viable CPU.
+ * The actual code generation takes place in kvm_update_va_mask, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask uses the
+ * specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else_nop_endif
-alternative_if ARM64_HYP_OFFSET_LOW
- and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_else_nop_endif
+alternative_cb kvm_update_va_mask
+ and \reg, \reg, #1 /* mask with va_mask */
+ ror \reg, \reg, #1 /* rotate to the first tag bit */
+ add \reg, \reg, #0 /* insert the low 12 bits of the tag */
+ add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
+ ror \reg, \reg, #63 /* rotate back */
+alternative_cb_end
.endm
#else
@@ -113,24 +101,44 @@ alternative_else_nop_endif
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
+void kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
static inline unsigned long __kern_hyp_va(unsigned long v)
{
- asm volatile(ALTERNATIVE("and %0, %0, %1",
- "nop",
- ARM64_HAS_VIRT_HOST_EXTN)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_HIGH_MASK));
- asm volatile(ALTERNATIVE("nop",
- "and %0, %0, %1",
- ARM64_HYP_OFFSET_LOW)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_LOW_MASK));
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
+ "ror %0, %0, #1\n"
+ "add %0, %0, #0\n"
+ "add %0, %0, #0, lsl 12\n"
+ "ror %0, %0, #63\n",
+ kvm_update_va_mask)
+ : "+r" (v));
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s) \
+ ({ \
+ typeof(s) *addr; \
+ asm("adrp %0, %1\n" \
+ "add %0, %0, :lo12:%1\n" \
+ : "=r" (addr) : "S" (&s)); \
+ addr; \
+ })
+
+/*
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
@@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
@@ -249,7 +261,7 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+ return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
@@ -348,36 +360,95 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
+ * hardening sequence is placed in one of the vector slots, which is
+ * executed before jumping to the real vectors.
+ *
+ * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
+ * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
+ * hardening sequence is mapped next to the idmap page, and executed
+ * before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
+ * empty slot is selected, mapped next to the idmap page, and
+ * executed before jumping to the real vectors.
+ *
+ * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
#include <asm/mmu.h>
+extern void *__kvm_bp_vect_base;
+extern int __kvm_harden_el2_vector_slot;
+
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();
- void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+ void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+ int slot = -1;
- if (data->fn) {
- vect = __bp_harden_hyp_vecs_start +
- data->hyp_vectors_slot * SZ_2K;
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+ vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
+ slot = data->hyp_vectors_slot;
+ }
- if (!has_vhe())
- vect = lm_alias(vect);
+ if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
+ vect = __kvm_bp_vect_base;
+ if (slot == -1)
+ slot = __kvm_harden_el2_vector_slot;
}
+ if (slot != -1)
+ vect += slot * SZ_2K;
+
return vect;
}
+/* This is only called on a !VHE system */
static inline int kvm_map_vectors(void)
{
- return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
- kvm_ksym_ref(__bp_harden_hyp_vecs_end),
- PAGE_HYP_EXEC);
-}
+ /*
+ * HBP = ARM64_HARDEN_BRANCH_PREDICTOR
+ * HEL2 = ARM64_HARDEN_EL2_VECTORS
+ *
+ * !HBP + !HEL2 -> use direct vectors
+ * HBP + !HEL2 -> use hardened vectors in place
+ * !HBP + HEL2 -> allocate one vector slot and use exec mapping
+ * HBP + HEL2 -> use hardened vertors and use exec mapping
+ */
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+ __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
+ __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
+ }
+
+ if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start);
+ unsigned long size = (__bp_harden_hyp_vecs_end -
+ __bp_harden_hyp_vecs_start);
+
+ /*
+ * Always allocate a spare vector slot, as we don't
+ * know yet which CPUs have a BP hardening slot that
+ * we can reuse.
+ */
+ __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
+ return create_hyp_exec_mappings(vect_pa, size,
+ &__kvm_bp_vect_base);
+ }
+ return 0;
+}
#else
static inline void *kvm_get_hyp_vector(void)
{
- return kvm_ksym_ref(__kvm_hyp_vector);
+ return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
}
static inline int kvm_map_vectors(void)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 50fa96a..49d9921 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -29,12 +29,6 @@
#include <asm/sizes.h>
/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
* Size of the PCI I/O space. This must remain a power of two so that
* IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
*/
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a050d4f..dd320df 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -21,6 +21,8 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
+#define BP_HARDEN_EL2_SLOTS 4
+
#ifndef __ASSEMBLY__
typedef struct {
@@ -49,9 +51,13 @@ struct bp_hardening_data {
bp_hardening_cb_t fn;
};
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
+ defined(CONFIG_HARDEN_EL2_VECTORS))
extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+extern atomic_t arm64_el2_vector_last_slot;
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e7b9f15..6171178 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -288,6 +288,12 @@
#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
+#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0)
+#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1)
+#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2)
+#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3)
+#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7)
+
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6a4bd80..9b55a3f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,9 +55,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-ifeq ($(CONFIG_KVM),y)
-arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
-endif
+arm64-obj-$(CONFIG_KVM_INDIRECT_VECTORS)+= bpi.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 414288a..5c4bce4 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
+static void patch_alternative(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ __le32 *replptr;
+ int i;
+
+ replptr = ALT_REPL_PTR(alt);
+ for (i = 0; i < nr_inst; i++) {
+ u32 insn;
+
+ insn = get_alt_insn(alt, origptr + i, replptr + i);
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- __le32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *updptr;
+ alternative_cb_t alt_cb;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i, nr_inst;
+ int nr_inst;
- if (!cpus_have_cap(alt->cpufeature))
+ /* Use ARM64_CB_PATCH as an unconditional patch */
+ if (alt->cpufeature < ARM64_CB_PATCH &&
+ !cpus_have_cap(alt->cpufeature))
continue;
- BUG_ON(alt->alt_len != alt->orig_len);
+ if (alt->cpufeature == ARM64_CB_PATCH)
+ BUG_ON(alt->alt_len != 0);
+ else
+ BUG_ON(alt->alt_len != alt->orig_len);
pr_info_once("patching kernel code\n");
origptr = ALT_ORIG_PTR(alt);
- replptr = ALT_REPL_PTR(alt);
updptr = use_linear_alias ? lm_alias(origptr) : origptr;
- nr_inst = alt->alt_len / sizeof(insn);
+ nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- for (i = 0; i < nr_inst; i++) {
- insn = get_alt_insn(alt, origptr + i, replptr + i);
- updptr[i] = cpu_to_le32(insn);
- }
+ if (alt->cpufeature < ARM64_CB_PATCH)
+ alt_cb = patch_alternative;
+ else
+ alt_cb = ALT_REPL_PTR(alt);
+
+ alt_cb(alt, origptr, updptr, nr_inst);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + nr_inst));
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04..78e1b0a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -138,6 +138,7 @@ int main(void)
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
+ DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
index e5de335..bb0b677 100644
--- a/arch/arm64/kernel/bpi.S
+++ b/arch/arm64/kernel/bpi.S
@@ -19,42 +19,61 @@
#include <linux/linkage.h>
#include <linux/arm-smccc.h>
-.macro ventry target
- .rept 31
+#include <asm/alternative.h>
+#include <asm/mmu.h>
+
+.macro hyp_ventry
+ .align 7
+1: .rept 27
nop
.endr
- b \target
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp x0, x1, [sp, #-16]!
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb kvm_patch_vector_branch
+ b __kvm_hyp_vector + (1b - 0b)
+ nop
+ nop
+ nop
+ nop
+alternative_cb_end
.endm
-.macro vectors target
- ventry \target + 0x000
- ventry \target + 0x080
- ventry \target + 0x100
- ventry \target + 0x180
-
- ventry \target + 0x200
- ventry \target + 0x280
- ventry \target + 0x300
- ventry \target + 0x380
+.macro generate_vectors
+0:
+ .rept 16
+ hyp_ventry
+ .endr
+ .org 0b + SZ_2K // Safety measure
+.endm
- ventry \target + 0x400
- ventry \target + 0x480
- ventry \target + 0x500
- ventry \target + 0x580
- ventry \target + 0x600
- ventry \target + 0x680
- ventry \target + 0x700
- ventry \target + 0x780
-.endm
+ .text
+ .pushsection .hyp.text, "ax"
.align 11
ENTRY(__bp_harden_hyp_vecs_start)
- .rept 4
- vectors __kvm_hyp_vector
+ .rept BP_HARDEN_EL2_SLOTS
+ generate_vectors
.endr
ENTRY(__bp_harden_hyp_vecs_end)
+ .popsection
+
ENTRY(__qcom_hyp_sanitize_link_stack_start)
stp x29, x30, [sp, #-16]!
.rept 16
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2df7927..9262ec5 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -78,6 +78,8 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
config_sctlr_el1(SCTLR_EL1_UCT, 0);
}
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -108,7 +110,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
const char *hyp_vecs_end)
{
- static int last_slot = -1;
static DEFINE_SPINLOCK(bp_lock);
int cpu, slot = -1;
@@ -121,10 +122,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
}
if (slot == -1) {
- last_slot++;
- BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
- / SZ_2K) <= last_slot);
- slot = last_slot;
+ slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
}
@@ -348,6 +347,10 @@ static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = {
#endif
+#ifndef ERRATA_MIDR_ALL_VERSIONS
+#define ERRATA_MIDR_ALL_VERSIONS(x) MIDR_ALL_VERSIONS(x)
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -501,6 +504,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
},
#endif
+#ifdef CONFIG_HARDEN_EL2_VECTORS
+ {
+ .desc = "Cortex-A57 EL2 vector hardening",
+ .capability = ARM64_HARDEN_EL2_VECTORS,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ },
+ {
+ .desc = "Cortex-A72 EL2 vector hardening",
+ .capability = ARM64_HARDEN_EL2_VECTORS,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 96b15d7..536d572 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -838,19 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
}
-static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
- int __unused)
-{
- phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
-
- /*
- * Activate the lower HYP offset only if:
- * - the idmap doesn't clash with it,
- * - the kernel is not running at EL2.
- */
- return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
-}
-
static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
{
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
@@ -1121,12 +1108,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL0_SHIFT,
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
},
- {
- .desc = "Reduced HYP mapping offset",
- .capability = ARM64_HYP_OFFSET_LOW,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = hyp_offset_low,
- },
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.desc = "Kernel page table isolation (KPTI)",
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2b6b8b2..b085306 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -577,6 +577,13 @@ set_hcr:
7:
msr mdcr_el2, x3 // Configure debug traps
+ /* LORegions */
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ cbz x0, 1f
+ msr_s SYS_LORC_EL1, xzr
+1:
+
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2718a77..816d03c 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -35,6 +35,7 @@
#define AARCH64_INSN_SF_BIT BIT(31)
#define AARCH64_INSN_N_BIT BIT(22)
+#define AARCH64_INSN_LSL_12 BIT(22)
static int aarch64_insn_encoding_class[] = {
AARCH64_INSN_CLS_UNKNOWN,
@@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
mask = BIT(6) - 1;
shift = 16;
break;
+ case AARCH64_INSN_IMM_N:
+ mask = 1;
+ shift = 22;
+ break;
default:
return -EINVAL;
}
@@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
return AARCH64_BREAK_FAULT;
}
+ /* We can't encode more than a 24bit value (12bit + 12bit shift) */
+ if (imm & ~(BIT(24) - 1))
+ goto out;
+
+ /* If we have something in the top 12 bits... */
if (imm & ~(SZ_4K - 1)) {
- pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
- return AARCH64_BREAK_FAULT;
+ /* ... and in the low 12 bits -> error */
+ if (imm & (SZ_4K - 1))
+ goto out;
+
+ imm >>= 12;
+ insn |= AARCH64_INSN_LSL_12;
}
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+
+out:
+ pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
}
u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
@@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
__check_hi, __check_ls, __check_ge, __check_lt,
__check_gt, __check_le, __check_al, __check_al
};
+
+static bool range_of_ones(u64 val)
+{
+ /* Doesn't handle full ones or full zeroes */
+ u64 sval = val >> __ffs64(val);
+
+ /* One of Sean Eron Anderson's bithack tricks */
+ return ((sval + 1) & (sval)) == 0;
+}
+
+static u32 aarch64_encode_immediate(u64 imm,
+ enum aarch64_insn_variant variant,
+ u32 insn)
+{
+ unsigned int immr, imms, n, ones, ror, esz, tmp;
+ u64 mask = ~0UL;
+
+ /* Can't encode full zeroes or full ones */
+ if (!imm || !~imm)
+ return AARCH64_BREAK_FAULT;
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (upper_32_bits(imm))
+ return AARCH64_BREAK_FAULT;
+ esz = 32;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ esz = 64;
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ /*
+ * Inverse of Replicate(). Try to spot a repeating pattern
+ * with a pow2 stride.
+ */
+ for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
+ u64 emask = BIT(tmp) - 1;
+
+ if ((imm & emask) != ((imm >> tmp) & emask))
+ break;
+
+ esz = tmp;
+ mask = emask;
+ }
+
+ /* N is only set if we're encoding a 64bit value */
+ n = esz == 64;
+
+ /* Trim imm to the element size */
+ imm &= mask;
+
+ /* That's how many ones we need to encode */
+ ones = hweight64(imm);
+
+ /*
+ * imms is set to (ones - 1), prefixed with a string of ones
+ * and a zero if they fit. Cap it to 6 bits.
+ */
+ imms = ones - 1;
+ imms |= 0xf << ffs(esz);
+ imms &= BIT(6) - 1;
+
+ /* Compute the rotation */
+ if (range_of_ones(imm)) {
+ /*
+ * Pattern: 0..01..10..0
+ *
+ * Compute how many rotate we need to align it right
+ */
+ ror = __ffs64(imm);
+ } else {
+ /*
+ * Pattern: 0..01..10..01..1
+ *
+ * Fill the unused top bits with ones, and check if
+ * the result is a valid immediate (all ones with a
+ * contiguous ranges of zeroes).
+ */
+ imm |= ~mask;
+ if (!range_of_ones(~imm))
+ return AARCH64_BREAK_FAULT;
+
+ /*
+ * Compute the rotation to get a continuous set of
+ * ones, with the first bit set at position 0
+ */
+ ror = fls(~imm);
+ }
+
+ /*
+ * immr is the number of bits we need to rotate back to the
+ * original set of ones. Note that this is relative to the
+ * element size...
+ */
+ immr = (esz - ror) % esz;
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LOGIC_AND:
+ insn = aarch64_insn_get_and_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_ORR:
+ insn = aarch64_insn_get_orr_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_EOR:
+ insn = aarch64_insn_get_eor_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+ insn = aarch64_insn_get_ands_imm_value();
+ break;
+ default:
+ pr_err("%s: unknown logical encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_encode_immediate(imm, variant, insn);
+}
+
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb)
+{
+ u32 insn;
+
+ insn = aarch64_insn_get_extr_value();
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (lsb > 31)
+ return AARCH64_BREAK_FAULT;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ if (lsb > 63)
+ return AARCH64_BREAK_FAULT;
+ insn |= AARCH64_INSN_SF_BIT;
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
+}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 2257dfc..a2e3a5a 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -57,6 +57,9 @@ config KVM_ARM_PMU
Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines.
+config KVM_INDIRECT_VECTORS
+ def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 87c4f7a..93afff9 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fa63b28..a1f4ebd 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2);
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
+ u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+
+ vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
@@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
+ u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+ vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
- vcpu_sys_reg(vcpu, MDSCR_EL1));
+ vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
/**
@@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+ unsigned long mdscr;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
} else {
- vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
@@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
@@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (trap_debug)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+ /* If KDE or MDE are set, perform a full save/restore cycle. */
+ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
- trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
+ trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 5aa9ccf..6fd91b3 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -117,7 +117,6 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
/* Set the stack and new vectors */
kern_hyp_va x1
mov sp, x1
- kern_hyp_va x2
msr vbar_el2, x2
/* copy tpidr_el1 into tpidr_el2 for use by HYP */
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f04400d..4313f74 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index dabb5cc..3e717f6 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -66,11 +66,6 @@
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
-{
- /* The vcpu can run. but it can't hide. */
-}
-
static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
{
u64 reg;
@@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
dsb(nsh);
}
-static hyp_alternate_select(__debug_save_spe,
- __debug_save_spe_nvhe, __debug_save_spe_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
{
if (!pmscr_el1)
return;
@@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
}
-void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
@@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
}
-void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
@@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
}
-void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
{
- /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
- * a full save/restore cycle. */
- if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
- (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
-
- __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
- __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ /*
+ * Non-VHE: Disable and flush SPE data generation
+ * VHE: The vcpu can run, but it can't hide.
+ */
+ if (!has_vhe())
+ __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, host_dbg, host_ctxt);
+ __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
- __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ if (!has_vhe())
+ __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_restore_state(vcpu, host_dbg, host_ctxt);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
- vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068..1f458f7 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
- // Store host_ctxt and vcpu for use at exit time
- stp x1, x0, [sp, #-16]!
-
add x18, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
// Store the guest regs x19-x29, lr
save_callee_saved_regs x1
- // Restore the host_ctxt from the stack
- ldr x2, [sp], #16
+ get_host_ctxt x2, x3
// Now restore the host regs
restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f36464b..87dfecc 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -55,15 +55,9 @@ ENTRY(__vhe_hyp_call)
ENDPROC(__vhe_hyp_call)
el1_sync: // Guest trapped into EL2
- stp x0, x1, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x1, esr_el2
-alternative_else
- mrs x1, esr_el1
-alternative_endif
- lsr x0, x1, #ESR_ELx_EC_SHIFT
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
@@ -117,10 +111,14 @@ el1_hvc_guest:
eret
el1_trap:
+ get_vcpu_ptr x1, x0
+
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
/*
* x0: ESR_EC
+ * x1: vcpu pointer
*/
- ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -137,18 +135,18 @@ alternative_else_nop_endif
b __guest_exit
el1_irq:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
el2_error:
+ ldp x0, x1, [sp], #16
+
/*
* Only two possibilities:
* 1) Either we come from the exit path, having just unmasked
@@ -180,14 +178,7 @@ ENTRY(__hyp_do_panic)
ENDPROC(__hyp_do_panic)
ENTRY(__hyp_panic)
- /*
- * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
- * not be accessible by this address from EL2, hyp_panic() converts
- * it with kern_hyp_va() before use.
- */
- ldr x0, =kvm_host_cpu_state
- mrs x1, tpidr_el2
- add x0, x0, x1
+ get_host_ctxt x0, x1
b hyp_panic
ENDPROC(__hyp_panic)
@@ -206,32 +197,43 @@ ENDPROC(\label)
invalid_vector el2h_sync_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_sync_invalid
- invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
.ltorg
.align 11
+.macro valid_vect target
+ .align 7
+ stp x0, x1, [sp, #-16]!
+ b \target
+.endm
+
+.macro invalid_vect target
+ .align 7
+ b \target
+ ldp x0, x1, [sp], #16
+ b \target
+.endm
+
ENTRY(__kvm_hyp_vector)
- ventry el2t_sync_invalid // Synchronous EL2t
- ventry el2t_irq_invalid // IRQ EL2t
- ventry el2t_fiq_invalid // FIQ EL2t
- ventry el2t_error_invalid // Error EL2t
-
- ventry el2h_sync_invalid // Synchronous EL2h
- ventry el2h_irq_invalid // IRQ EL2h
- ventry el2h_fiq_invalid // FIQ EL2h
- ventry el2_error // Error EL2h
-
- ventry el1_sync // Synchronous 64-bit EL1
- ventry el1_irq // IRQ 64-bit EL1
- ventry el1_fiq_invalid // FIQ 64-bit EL1
- ventry el1_error // Error 64-bit EL1
-
- ventry el1_sync // Synchronous 32-bit EL1
- ventry el1_irq // IRQ 32-bit EL1
- ventry el1_fiq_invalid // FIQ 32-bit EL1
- ventry el1_error // Error 32-bit EL1
+ invalid_vect el2t_sync_invalid // Synchronous EL2t
+ invalid_vect el2t_irq_invalid // IRQ EL2t
+ invalid_vect el2t_fiq_invalid // FIQ EL2t
+ invalid_vect el2t_error_invalid // Error EL2t
+
+ invalid_vect el2h_sync_invalid // Synchronous EL2h
+ invalid_vect el2h_irq_invalid // IRQ EL2h
+ invalid_vect el2h_fiq_invalid // FIQ EL2h
+ valid_vect el2_error // Error EL2h
+
+ valid_vect el1_sync // Synchronous 64-bit EL1
+ valid_vect el1_irq // IRQ 64-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_error // Error 64-bit EL1
+
+ valid_vect el1_sync // Synchronous 32-bit EL1
+ valid_vect el1_irq // IRQ 32-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_error // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 870f4b1..07b5721 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void)
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
-static bool __hyp_text __fpsimd_enabled_vhe(void)
+static bool fpsimd_enabled_vhe(void)
{
return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
}
-static hyp_alternate_select(__fpsimd_is_enabled,
- __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-bool __hyp_text __fpsimd_enabled(void)
-{
- return __fpsimd_is_enabled()();
-}
-
-static void __hyp_text __activate_traps_vhe(void)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-
-static void __hyp_text __activate_traps_nvhe(void)
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
- u64 val;
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
- write_sysreg(val, cptr_el2);
+ vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
}
-static hyp_alternate_select(__activate_traps_arch,
- __activate_traps_nvhe, __activate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
{
- u64 val;
-
/*
* We are about to set CPTR_EL2.TFP to trap all floating point
* register accesses to EL2, however, the ARM ARM clearly states that
@@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
* it will cause an exception.
*/
- val = vcpu->arch.hcr_el2;
-
- if (!(val & HCR_RW) && system_supports_fpsimd()) {
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
+}
- if (val & HCR_RW) /* for AArch64 only: */
- val |= HCR_TID3; /* TID3: trap feature register accesses */
-
- write_sysreg(val, hcr_el2);
-
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
+
/*
* Make sure we trap PMU access from EL0 to EL2. Also sanitize
* PMSELR_EL0 to make sure it never contains the cycle
@@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- __activate_traps_arch()();
}
-static void __hyp_text __deactivate_traps_vhe(void)
+static void __hyp_text __deactivate_traps_common(void)
{
- extern char vectors[]; /* kernel exception vectors */
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
- write_sysreg(mdcr_el2, mdcr_el2);
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+ write_sysreg(val, cptr_el2);
+}
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+ __activate_traps_fpsimd32(vcpu);
+ if (has_vhe())
+ activate_traps_vhe(vcpu);
+ else
+ __activate_traps_nvhe(vcpu);
+}
+
+static void deactivate_traps_vhe(void)
+{
+ extern char vectors[]; /* kernel exception vectors */
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ __deactivate_traps_common();
+
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
@@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void)
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
}
-static hyp_alternate_select(__deactivate_traps_arch,
- __deactivate_traps_nvhe, __deactivate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
/*
@@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
- __deactivate_traps_arch()();
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
+ if (has_vhe())
+ deactivate_traps_vhe();
+ else
+ __deactivate_traps_nvhe();
+}
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
}
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_vm(struct kvm *kvm)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
@@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
write_sysreg(0, vttbr_el2);
}
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
- else
- __vgic_v2_save_state(vcpu);
-
- write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
}
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
- u64 val;
-
- val = read_sysreg(hcr_el2);
- val |= HCR_INT_OVERRIDE;
- val |= vcpu->arch.irq_lines;
- write_sysreg(val, hcr_el2);
-
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
- else
- __vgic_v2_restore_state(vcpu);
+ }
}
static bool __hyp_text __true_value(void)
@@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
}
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
- u64 exit_code;
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- __sysreg_save_host_state(host_ctxt);
- __debug_cond_save_host_state(vcpu);
-
- __activate_traps(vcpu);
- __activate_vm(vcpu);
-
- __vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_guest_state(guest_ctxt);
- __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
-
- /* Jump in the fire! */
-again:
- exit_code = __guest_enter(vcpu, host_ctxt);
- /* And we're baaack! */
-
- if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
/*
* We're using the raw exception code in order to only process
* the trap if no SError is pending. We will come back to the
* same PC once the SError has been injected, and replay the
* trapping instruction.
*/
- if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
- goto again;
+ if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP) {
+ *exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -366,9 +352,9 @@ again:
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
if (ret == -1) {
@@ -380,29 +366,112 @@ again:
*/
if (!__skip_instr(vcpu))
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
- exit_code = ARM_EXCEPTION_EL1_SERROR;
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
}
-
- /* 0 falls through to be handler out of EL2 */
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP &&
+ *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
+ }
- /* 0 falls through to be handled out of EL2 */
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(vcpu->kvm);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ fp_enabled = fpsimd_enabled_vhe();
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (fp_enabled) {
+ __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+ __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
u32 midr = read_cpuid_id();
@@ -413,29 +482,29 @@ again:
}
}
- fp_enabled = __fpsimd_enabled();
+ fp_enabled = __fpsimd_enabled_nvhe();
- __sysreg_save_guest_state(guest_ctxt);
+ __sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
- __vgic_save_state(vcpu);
+ __hyp_vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
+ __sysreg_restore_state_nvhe(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
- __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/*
* This must come after restoring the host sysregs, since a non-VHE
* system may enable SPE here and make use of the TTBRs.
*/
- __debug_cond_restore_host_state(vcpu);
+ __debug_switch_to_host(vcpu);
return exit_code;
}
@@ -443,10 +512,20 @@ again:
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+ struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu;
unsigned long str_va;
+ vcpu = __host_ctxt->__hyp_running_vcpu;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(__host_ctxt);
+ }
+
/*
* Force the panic string to be loaded from the literal pool,
* making sure it is a kernel address and not a PC-relative
@@ -460,40 +539,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par, vcpu);
}
-static hyp_alternate_select(__hyp_call_panic,
- __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
- struct kvm_vcpu *vcpu = NULL;
-
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
- if (read_sysreg(vttbr_el2)) {
- struct kvm_cpu_context *host_ctxt;
-
- host_ctxt = kern_hyp_va(__host_ctxt);
- vcpu = host_ctxt->__hyp_running_vcpu;
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
- }
-
- /* Call panic for real */
- __hyp_call_panic()(spsr, elr, par, vcpu);
+ if (!has_vhe())
+ __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+ else
+ __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 2c17afd..b3894df 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,32 +19,43 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
-/* Yes, this does nothing, on purpose */
-static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
- * and guest must save everything.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
}
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
+ ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
@@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
@@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_save_host_state,
- __sysreg_save_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
}
-void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_save_state(ctxt);
__sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
}
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
}
-static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
@@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static void __hyp_text
+__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
@@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_restore_host_state,
- __sysreg_restore_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_restore_host_state()(ctxt);
__sysreg_restore_common_state(ctxt);
}
-void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_state(ctxt);
__sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
- if (__fpsimd_enabled())
- sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
}
@@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
}
+
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+ asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
new file mode 100644
index 0000000..86801b6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
+ * guest.
+ *
+ * @vcpu: the offending vcpu
+ *
+ * Returns:
+ * 1: GICV access successfully performed
+ * 0: Not a GICV access
+ * -1: Illegal GICV access
+ */
+int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ phys_addr_t fault_ipa;
+ void __iomem *addr;
+ int rd;
+
+ /* Build the full address */
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ /* If not for GICV, move on */
+ if (fault_ipa < vgic->vgic_cpu_base ||
+ fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
+ return 0;
+
+ /* Reject anything but a 32bit access */
+ if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
+ return -1;
+
+ /* Not aligned? Don't bother */
+ if (fault_ipa & 3)
+ return -1;
+
+ rd = kvm_vcpu_dabt_get_rd(vcpu);
+ addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr += fault_ipa - vgic->vgic_cpu_base;
+
+ if (kvm_vcpu_dabt_iswrite(vcpu)) {
+ u32 data = vcpu_data_guest_to_host(vcpu,
+ vcpu_get_reg(vcpu, rd),
+ sizeof(u32));
+ writel_relaxed(data, addr);
+ } else {
+ u32 data = readl_relaxed(addr);
+ vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
+ sizeof(u32)));
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 60666a0..d8e7165 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
exc_offset = LOWER_EL_AArch32_VECTOR;
}
- return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+ return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
- vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
/*
* Build an {i,d}abort, depending on the level and the
@@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+ vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
/*
* Build an unknown exception, depending on the instruction
@@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
}
/**
@@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*/
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_dabt32(vcpu, addr);
else
inject_abt64(vcpu, false, addr);
@@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_pabt32(vcpu, addr);
else
inject_abt64(vcpu, true, addr);
@@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_undef32(vcpu);
else
inject_undef64(vcpu);
@@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
vcpu_set_vsesr(vcpu, esr);
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
}
/**
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index bbc6ae3..eefe403 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
{
unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
switch (mode) {
- case COMPAT_PSR_MODE_SVC:
- mode = KVM_SPSR_SVC;
- break;
- case COMPAT_PSR_MODE_ABT:
- mode = KVM_SPSR_ABT;
- break;
- case COMPAT_PSR_MODE_UND:
- mode = KVM_SPSR_UND;
- break;
- case COMPAT_PSR_MODE_IRQ:
- mode = KVM_SPSR_IRQ;
- break;
- case COMPAT_PSR_MODE_FIQ:
- mode = KVM_SPSR_FIQ;
- break;
+ case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC;
+ case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT;
+ case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND;
+ case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ;
+ case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ;
+ default: BUG();
+ }
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return read_sysreg_el1(spsr);
+ case KVM_SPSR_ABT:
+ return read_sysreg(spsr_abt);
+ case KVM_SPSR_UND:
+ return read_sysreg(spsr_und);
+ case KVM_SPSR_IRQ:
+ return read_sysreg(spsr_irq);
+ case KVM_SPSR_FIQ:
+ return read_sysreg(spsr_fiq);
default:
BUG();
}
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ return;
+ }
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ write_sysreg_el1(v, spsr);
+ case KVM_SPSR_ABT:
+ write_sysreg(v, spsr_abt);
+ case KVM_SPSR_UND:
+ write_sysreg(v, spsr_und);
+ case KVM_SPSR_IRQ:
+ write_sysreg(v, spsr_irq);
+ case KVM_SPSR_FIQ:
+ write_sysreg(v, spsr_fiq);
+ }
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 50a43c7..806b0b12 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
#include <asm/sysreg.h>
@@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_read;
+
+ /*
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ switch (reg) {
+ case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
+ case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+ case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
+ case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
+ case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
+ case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
+ case TCR_EL1: return read_sysreg_s(tcr_EL12);
+ case ESR_EL1: return read_sysreg_s(esr_EL12);
+ case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
+ case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
+ case FAR_EL1: return read_sysreg_s(far_EL12);
+ case MAIR_EL1: return read_sysreg_s(mair_EL12);
+ case VBAR_EL1: return read_sysreg_s(vbar_EL12);
+ case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
+ case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
+ case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
+ case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
+ case AMAIR_EL1: return read_sysreg_s(amair_EL12);
+ case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
+ case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
+ case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
+ case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
+ case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
+ }
+
+immediate_read:
+ return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_write;
+
+ /*
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the the MPIDR should only be
+ * set once, before running the VCPU, and never changed later.
+ */
+ switch (reg) {
+ case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
+ case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
+ case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
+ case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
+ case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
+ case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
+ case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
+ case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
+ case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
+ case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
+ case FAR_EL1: write_sysreg_s(val, far_EL12); return;
+ case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
+ case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
+ case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
+ case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
+ }
+
+immediate_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
+}
+
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
bool was_enabled = vcpu_has_cache_enabled(vcpu);
+ u64 val;
+ int reg = r->reg;
BUG_ON(!p->is_write);
- if (!p->is_aarch32) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ /* See the 32bit mapping in kvm_host.h */
+ if (p->is_aarch32)
+ reg = r->reg / 2;
+
+ if (!p->is_aarch32 || !p->is_32bit) {
+ val = p->regval;
} else {
- if (!p->is_32bit)
- vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
- vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
+ val = vcpu_read_sys_reg(vcpu, reg);
+ if (r->reg % 2)
+ val = (p->regval << 32) | (u64)lower_32_bits(val);
+ else
+ val = ((u64)upper_32_bits(val) << 32) |
+ lower_32_bits(p->regval);
}
+ vcpu_write_sys_reg(vcpu, val, reg);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
+static bool trap_undef(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ vcpu_write_sys_reg(vcpu, p->regval, r->reg);
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- p->regval = vcpu_sys_reg(vcpu, r->reg);
+ p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu,
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
+ u64 amair = read_sysreg(amair_el1);
+ vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
- vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
+ vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
}
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
}
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
{
- u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+ u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
if (!enabled)
@@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
/* Only update writeable bits of PMCR */
- val = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
} else {
/* PMCR.P & PMCR.C are RAZ */
- val = vcpu_sys_reg(vcpu, PMCR_EL0)
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0)
& ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
p->regval = val;
}
@@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
if (p->is_write)
- vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
else
/* return PMSELR.SEL field */
- p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
return true;
@@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
{
u64 pmcr, val;
- pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+ pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
@@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
@@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
- vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
}
return true;
@@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = p->regval & mask;
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
} else {
/* accessing PMCNTENCLR_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
kvm_pmu_disable_counter(vcpu, val);
}
} else {
- p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
}
return true;
@@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
else
/* accessing PMINTENCLR_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
}
return true;
@@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
else
/* accessing PMOVSCLR_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
- p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
}
return true;
@@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
- vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
- & ARMV8_PMU_USERENR_MASK;
+ __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+ p->regval & ARMV8_PMU_USERENR_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
}
@@ -893,6 +1000,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
task_pid_nr(current));
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ } else if (id == SYS_ID_AA64MMFR1_EL1) {
+ if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
+ pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n",
+ task_pid_nr(current));
+
+ val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
}
return val;
@@ -1178,6 +1291,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+ { SYS_DESC(SYS_LORSA_EL1), trap_undef },
+ { SYS_DESC(SYS_LOREA_EL1), trap_undef },
+ { SYS_DESC(SYS_LORN_EL1), trap_undef },
+ { SYS_DESC(SYS_LORC_EL1), trap_undef },
+ { SYS_DESC(SYS_LORID_EL1), trap_undef },
+
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
@@ -1545,6 +1664,11 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+ /* CNTP_TVAL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
+ /* CNTP_CTL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+
/* PMEVCNTRn */
PMU_PMEVCNTR(0),
PMU_PMEVCNTR(1),
@@ -1618,6 +1742,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
};
/* Target specific emulation tables */
@@ -2194,7 +2319,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->get_user)
return (r->get_user)(vcpu, r, reg, uaddr);
- return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+ return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
}
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -2215,7 +2340,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->set_user)
return (r->set_user)(vcpu, r, reg, uaddr);
- return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+ return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
}
static unsigned int num_demux_regs(void)
@@ -2421,6 +2546,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
reset_sys_reg_descs(vcpu, table, num);
for (num = 1; num < NR_SYS_REGS; num++)
- if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
- panic("Didn't reset vcpu_sys_reg(%zi)", num);
+ if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+ panic("Didn't reset __vcpu_sys_reg(%zi)", num);
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 060f534..cd710f8 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+ __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = r->val;
+ __vcpu_sys_reg(vcpu, r->reg) = r->val;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 969ade1..ddb8497 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
return true;
}
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
+ __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
}
/*
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
new file mode 100644
index 0000000..c712a73
--- /dev/null
+++ b/arch/arm64/kvm/va_layout.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/random.h>
+#include <linux/memblock.h>
+#include <asm/alternative.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ */
+static u8 tag_lsb;
+/*
+ * The random hyp VA tag value with the region bit if hyp randomization is used
+ */
+static u64 tag_val;
+static u64 va_mask;
+
+static void compute_layout(void)
+{
+ phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
+ u64 hyp_va_msb;
+ int kva_msb;
+
+ /* Where is my RAM region? */
+ hyp_va_msb = idmap_addr & BIT(VA_BITS - 1);
+ hyp_va_msb ^= BIT(VA_BITS - 1);
+
+ kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ (u64)(high_memory - 1));
+
+ if (kva_msb == (VA_BITS - 1)) {
+ /*
+ * No space in the address, let's compute the mask so
+ * that it covers (VA_BITS - 1) bits, and the region
+ * bit. The tag stays set to zero.
+ */
+ va_mask = BIT(VA_BITS - 1) - 1;
+ va_mask |= hyp_va_msb;
+ } else {
+ /*
+ * We do have some free bits to insert a random tag.
+ * Hyp VAs are now created from kernel linear map VAs
+ * using the following formula (with V == VA_BITS):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ */
+ tag_lsb = kva_msb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
+ tag_val |= hyp_va_msb;
+ tag_val >>= tag_lsb;
+ }
+}
+
+static u32 compute_instruction(int n, u32 rd, u32 rn)
+{
+ u32 insn = AARCH64_BREAK_FAULT;
+
+ switch (n) {
+ case 0:
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_VARIANT_64BIT,
+ rn, rd, va_mask);
+ break;
+
+ case 1:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd,
+ tag_lsb);
+ break;
+
+ case 2:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(11, 0),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 3:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(23, 12),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 4:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd, 64 - tag_lsb);
+ break;
+ }
+
+ return insn;
+}
+
+void __init kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ int i;
+
+ BUG_ON(nr_inst != 5);
+
+ if (!has_vhe() && !va_mask)
+ compute_layout();
+
+ for (i = 0; i < nr_inst; i++) {
+ u32 rd, rn, insn, oinsn;
+
+ /*
+ * VHE doesn't need any address translation, let's NOP
+ * everything.
+ *
+ * Alternatively, if we don't have any spare bits in
+ * the address, NOP everything after masking that
+ * kernel VA.
+ */
+ if (has_vhe() || (!tag_lsb && i > 0)) {
+ updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+ continue;
+ }
+
+ oinsn = le32_to_cpu(origptr[i]);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+ rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn);
+
+ insn = compute_instruction(i, rd, rn);
+ BUG_ON(insn == AARCH64_BREAK_FAULT);
+
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
+void *__kvm_bp_vect_base;
+int __kvm_harden_el2_vector_slot;
+
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u64 addr;
+ u32 insn;
+
+ BUG_ON(nr_inst != 5);
+
+ if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+ return;
+ }
+
+ if (!va_mask)
+ compute_layout();
+
+ /*
+ * Compute HYP VA by using the same computation as kern_hyp_va()
+ */
+ addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
+ addr &= va_mask;
+ addr |= tag_val << tag_lsb;
+
+ /* Use PC[10:7] to branch to the same vector in KVM */
+ addr |= ((u64)origptr & GENMASK_ULL(10, 7));
+
+ /*
+ * Branch to the second instruction in the vectors in order to
+ * avoid the initial store on the stack (which we already
+ * perform in the hardening vectors).
+ */
+ addr += AARCH64_INSN_SIZE;
+
+ /* stp x0, x1, [sp, #-16]! */
+ insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
+ AARCH64_INSN_REG_1,
+ AARCH64_INSN_REG_SP,
+ -16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movz x0, #(addr & 0xffff) */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)addr,
+ 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 16),
+ 16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 32),
+ 32,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* br x0 */
+ insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0,
+ AARCH64_INSN_BRANCH_NOLINK);
+ *updptr++ = cpu_to_le32(insn);
+}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index decccff..842c8a5 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -38,12 +38,12 @@
#define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5)
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
@@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
* This function, called very early during the creation of a new process VM
* image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality bit is set, or
* if the expected stack growth is unlimited:
*/
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index 6f6096f..6ab942e 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -25,6 +25,7 @@ KBUILD_AFLAGS += -mbig-endian
LINKFLAGS += -mbig-endian
KBUILD_LDFLAGS += -mbig-endian
LDFLAGS += -EB
+CHECKFLAGS += -D_BIG_ENDIAN
endif
head-y := arch/c6x/kernel/head.o
diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c
index cff5776..0f8fde4 100644
--- a/arch/c6x/kernel/asm-offsets.c
+++ b/arch/c6x/kernel/asm-offsets.c
@@ -107,7 +107,6 @@ void foo(void)
/* These would be unneccessary if we ran asm files
* through the preprocessor.
*/
- DEFINE(KTHREAD_SIZE, THREAD_SIZE);
DEFINE(KTHREAD_SHIFT, THREAD_SHIFT);
DEFINE(KTHREAD_START_SP, THREAD_START_SP);
DEFINE(ENOSYS_, ENOSYS);
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
index e8b6cc6..1ef04b5 100644
--- a/arch/c6x/platforms/plldata.c
+++ b/arch/c6x/platforms/plldata.c
@@ -19,6 +19,7 @@
#include <asm/clock.h>
#include <asm/setup.h>
+#include <asm/special_insns.h>
#include <asm/irq.h>
/*
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index f5ec736..7ccc64d 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -398,7 +398,7 @@ pcibios_enable_device (struct pci_dev *dev, int mask)
if (ret < 0)
return ret;
- if (!dev->msi_enabled)
+ if (!pci_dev_msi_enabled(dev))
return acpi_pci_irq_enable(dev);
return 0;
}
@@ -407,7 +407,7 @@ void
pcibios_disable_device (struct pci_dev *dev)
{
BUG_ON(atomic_read(&dev->enable_cnt));
- if (!dev->msi_enabled)
+ if (!pci_dev_msi_enabled(dev))
acpi_pci_irq_disable(dev);
}
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index 84938fd..908d583 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -135,7 +135,11 @@ static struct platform_device mcf_fec0 = {
.id = 0,
.num_resources = ARRAY_SIZE(mcf_fec0_resources),
.resource = mcf_fec0_resources,
- .dev.platform_data = FEC_PDATA,
+ .dev = {
+ .dma_mask = &mcf_fec0.dev.coherent_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = FEC_PDATA,
+ }
};
#ifdef MCFFEC_BASE1
@@ -167,7 +171,11 @@ static struct platform_device mcf_fec1 = {
.id = 1,
.num_resources = ARRAY_SIZE(mcf_fec1_resources),
.resource = mcf_fec1_resources,
- .dev.platform_data = FEC_PDATA,
+ .dev = {
+ .dma_mask = &mcf_fec1.dev.coherent_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = FEC_PDATA,
+ }
};
#endif /* MCFFEC_BASE1 */
#endif /* CONFIG_FEC */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 61e9a24..225c95d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2029,6 +2029,7 @@ config CPU_MIPSR6
select CPU_HAS_RIXI
select HAVE_ARCH_BITREVERSE
select MIPS_ASID_BITS_VARIABLE
+ select MIPS_CRC_SUPPORT
select MIPS_SPRAM
config EVA
@@ -2502,6 +2503,9 @@ config MIPS_ASID_BITS
config MIPS_ASID_BITS_VARIABLE
bool
+config MIPS_CRC_SUPPORT
+ bool
+
#
# - Highmem only makes sense for the 32-bit kernel.
# - The current highmem code will only work properly on physically indexed
@@ -2850,8 +2854,7 @@ config CRASH_DUMP
config PHYSICAL_START
hex "Physical address where the kernel is loaded"
- default "0xffffffff84000000" if 64BIT
- default "0x84000000" if 32BIT
+ default "0xffffffff84000000"
depends on CRASH_DUMP
help
This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d1ca839..5e9fce0 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -222,6 +222,8 @@ xpa-cflags-y := $(mips-cflags)
xpa-cflags-$(micromips-ase) += -mmicromips -Wa$(comma)-fatal-warnings
toolchain-xpa := $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA
+toolchain-crc := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc) += -DTOOLCHAIN_SUPPORTS_CRC
#
# Firmware support
@@ -249,20 +251,12 @@ ifdef CONFIG_PHYSICAL_START
load-y = $(CONFIG_PHYSICAL_START)
endif
-entry-noisa-y = 0x$(shell $(NM) vmlinux 2>/dev/null \
- | grep "\bkernel_entry\b" | cut -f1 -d \ )
-ifdef CONFIG_CPU_MICROMIPS
- #
- # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
- # clear which would lead to images containing addresses which bootloaders may
- # jump to as MIPS32 code.
- #
- entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
- $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
- $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
-else
- entry-y = $(entry-noisa-y)
-endif
+# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
+entry-y = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+ | sed -n '/^start address / { \
+ s/^.* //; \
+ s/0x\([0-7].......\)$$/0x00000000\1/; \
+ s/0x\(........\)$$/0xffffffff\1/; p }')
cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
drivers-$(CONFIG_PCI) += arch/mips/pci/
@@ -330,6 +324,7 @@ libs-y += arch/mips/math-emu/
# See arch/mips/Kbuild for content of core part of the kernel
core-y += arch/mips/
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
# suspend and hibernation support
@@ -473,6 +468,21 @@ define archhelp
echo
echo ' {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">'
echo
+ echo ' Where BOARDS is some subset of the following:'
+ for board in $(sort $(BOARDS)); do echo " $${board}"; done
+ echo
+ echo ' Specifically the following generic default configurations are'
+ echo ' supported:'
+ echo
+ $(foreach cfg,$(generic_defconfigs),
+ printf " %-24s - Build generic kernel for $(call describe_generic_defconfig,$(cfg))\n" $(cfg);)
+ echo
+ echo ' The following legacy default configurations have been converted to'
+ echo ' generic and can still be used:'
+ echo
+ $(foreach cfg,$(sort $(legacy_defconfigs)),
+ printf " %-24s - Build $($(cfg)-y)\n" $(cfg);)
+ echo
echo ' Otherwise, the following default configurations are available:'
endef
@@ -507,6 +517,10 @@ endef
$(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el))
$(eval $(call gen_generic_defconfigs,micro32,r2,eb el))
+define describe_generic_defconfig
+$(subst 32r,MIPS32 r,$(subst 64r,MIPS64 r,$(subst el, little endian,$(patsubst %_defconfig,%,$(1)))))
+endef
+
.PHONY: $(generic_defconfigs)
$(generic_defconfigs):
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
@@ -543,14 +557,18 @@ generic_defconfig:
# now that the boards have been converted to use the generic kernel they are
# wrappers around the generic rules above.
#
-.PHONY: sead3_defconfig
-sead3_defconfig:
- $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs += ocelot_defconfig
+ocelot_defconfig-y := 32r2el_defconfig BOARDS=ocelot
+
+legacy_defconfigs += sead3_defconfig
+sead3_defconfig-y := 32r2el_defconfig BOARDS=sead-3
+
+legacy_defconfigs += sead3micro_defconfig
+sead3micro_defconfig-y := micro32r2el_defconfig BOARDS=sead-3
-.PHONY: sead3micro_defconfig
-sead3micro_defconfig:
- $(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs += xilfpga_defconfig
+xilfpga_defconfig-y := 32r2el_defconfig BOARDS=xilfpga
-.PHONY: xilfpga_defconfig
-xilfpga_defconfig:
- $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=xilfpga
+.PHONY: $(legacy_defconfigs)
+$(legacy_defconfigs):
+ $(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c
index 328d697..4e79dbd 100644
--- a/arch/mips/alchemy/board-gpr.c
+++ b/arch/mips/alchemy/board-gpr.c
@@ -190,7 +190,7 @@ static struct platform_device gpr_mtd_device = {
/*
* LEDs
*/
-static struct gpio_led gpr_gpio_leds[] = {
+static const struct gpio_led gpr_gpio_leds[] = {
{ /* green */
.name = "gpr:green",
.gpio = 4,
diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c
index 85bb756..aab55aa 100644
--- a/arch/mips/alchemy/board-mtx1.c
+++ b/arch/mips/alchemy/board-mtx1.c
@@ -145,7 +145,7 @@ static struct platform_device mtx1_wdt = {
.resource = mtx1_wdt_res,
};
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
{
.name = "mtx1:green",
.gpio = 211,
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index e1675c2..f09262e 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -346,7 +346,7 @@ static struct platform_device ar7_udc = {
/*****************************************************************************
* LEDs
****************************************************************************/
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
{
.name = "status",
.gpio = 8,
@@ -354,12 +354,12 @@ static struct gpio_led default_leds[] = {
},
};
-static struct gpio_led titan_leds[] = {
+static const struct gpio_led titan_leds[] = {
{ .name = "status", .gpio = 8, .active_low = 1, },
{ .name = "wifi", .gpio = 13, .active_low = 1, },
};
-static struct gpio_led dsl502t_leds[] = {
+static const struct gpio_led dsl502t_leds[] = {
{
.name = "status",
.gpio = 9,
@@ -377,7 +377,7 @@ static struct gpio_led dsl502t_leds[] = {
},
};
-static struct gpio_led dg834g_leds[] = {
+static const struct gpio_led dg834g_leds[] = {
{
.name = "ppp",
.gpio = 6,
@@ -406,7 +406,7 @@ static struct gpio_led dg834g_leds[] = {
},
};
-static struct gpio_led fb_sl_leds[] = {
+static const struct gpio_led fb_sl_leds[] = {
{
.name = "1",
.gpio = 7,
@@ -433,7 +433,7 @@ static struct gpio_led fb_sl_leds[] = {
},
};
-static struct gpio_led fb_fon_leds[] = {
+static const struct gpio_led fb_fon_leds[] = {
{
.name = "1",
.gpio = 8,
@@ -459,7 +459,7 @@ static struct gpio_led fb_fon_leds[] = {
},
};
-static struct gpio_led gt701_leds[] = {
+static const struct gpio_led gt701_leds[] = {
{
.name = "inet:green",
.gpio = 13,
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 88a8fb2..88d400d 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -355,7 +355,7 @@ bcm47xx_buttons_luxul_xwr_600_v1[] = {
static const struct gpio_keys_button
bcm47xx_buttons_luxul_xwr_1750_v1[] = {
- BCM47XX_GPIO_KEY(14, BTN_TASK),
+ BCM47XX_GPIO_KEY(14, KEY_RESTART),
};
/* Microsoft */
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 8307a8a..34a7b3f 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -409,6 +409,12 @@ bcm47xx_leds_luxul_xap_1500_v1[] __initconst = {
};
static const struct gpio_led
+bcm47xx_leds_luxul_xap1500_v1_extra[] __initconst = {
+ BCM47XX_GPIO_LED(44, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+ BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
bcm47xx_leds_luxul_xbr_4400_v1[] __initconst = {
BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
BCM47XX_GPIO_LED_TRIGGER(15, "green", "status", 0, "timer"),
@@ -435,6 +441,11 @@ bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = {
BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
};
+static const struct gpio_led
+bcm47xx_leds_luxul_xwr1750_v1_extra[] __initconst = {
+ BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
/* Microsoft */
static const struct gpio_led
@@ -528,6 +539,12 @@ static struct gpio_led_platform_data bcm47xx_leds_pdata;
bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds); \
} while (0)
+static struct gpio_led_platform_data bcm47xx_leds_pdata_extra __initdata = {};
+#define bcm47xx_set_pdata_extra(dev_leds) do { \
+ bcm47xx_leds_pdata_extra.leds = dev_leds; \
+ bcm47xx_leds_pdata_extra.num_leds = ARRAY_SIZE(dev_leds); \
+} while (0)
+
void __init bcm47xx_leds_register(void)
{
enum bcm47xx_board board = bcm47xx_board_get();
@@ -705,6 +722,7 @@ void __init bcm47xx_leds_register(void)
break;
case BCM47XX_BOARD_LUXUL_XAP_1500_V1:
bcm47xx_set_pdata(bcm47xx_leds_luxul_xap_1500_v1);
+ bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xap1500_v1_extra);
break;
case BCM47XX_BOARD_LUXUL_XBR_4400_V1:
bcm47xx_set_pdata(bcm47xx_leds_luxul_xbr_4400_v1);
@@ -717,6 +735,7 @@ void __init bcm47xx_leds_register(void)
break;
case BCM47XX_BOARD_LUXUL_XWR_1750_V1:
bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1);
+ bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xwr1750_v1_extra);
break;
case BCM47XX_BOARD_MICROSOFT_MN700:
@@ -760,4 +779,6 @@ void __init bcm47xx_leds_register(void)
}
gpio_led_register_device(-1, &bcm47xx_leds_pdata);
+ if (bcm47xx_leds_pdata_extra.num_leds)
+ gpio_led_register_device(0, &bcm47xx_leds_pdata_extra);
}
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index fdf99e9..81df904 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -76,12 +76,7 @@ void error(char *x)
#include "../../../../lib/decompress_unxz.c"
#endif
-unsigned long __stack_chk_guard;
-
-void __stack_chk_guard_setup(void)
-{
- __stack_chk_guard = 0x000a0dff;
-}
+const unsigned long __stack_chk_guard = 0x000a0dff;
void __stack_chk_fail(void)
{
@@ -92,8 +87,6 @@ void decompress_kernel(unsigned long boot_heap_start)
{
unsigned long zimage_start, zimage_size;
- __stack_chk_guard_setup();
-
zimage_start = (unsigned long)(&__image_begin);
zimage_size = (unsigned long)(&__image_end) -
(unsigned long)(&__image_begin);
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index e2c6f13..1e79cab 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -4,6 +4,7 @@ subdir-y += cavium-octeon
subdir-y += img
subdir-y += ingenic
subdir-y += lantiq
+subdir-y += mscc
subdir-y += mti
subdir-y += netlogic
subdir-y += ni
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 2f9ef56..5bf77b6 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -198,6 +198,13 @@
status = "disabled";
};
+ watchdog: watchdog@4067e8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4067e8 0x14>;
+ status = "disabled";
+ };
+
upg_gio: gpio@406700 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index 02e426f..2afa0da 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -233,6 +233,13 @@
status = "disabled";
};
+ watchdog: watchdog@4067e8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4067e8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408440 {
compatible = "brcm,l2-intc";
reg = <0x408440 0x30>;
@@ -243,6 +250,17 @@
brcm,irq-can-wake;
};
+ aon_ctrl: syscon@408000 {
+ compatible = "brcm,brcmstb-aon-ctrl";
+ reg = <0x408000 0x100>, <0x408200 0x200>;
+ reg-names = "aon-ctrl", "aon-sram";
+ };
+
+ timers: timer@4067c0 {
+ compatible = "brcm,brcmstb-timers";
+ reg = <0x4067c0 0x40>;
+ };
+
upg_gio: gpio@406700 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406700 0x60>;
@@ -483,5 +501,49 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@408e80 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x408e80 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
+ };
+
+ memory_controllers {
+ compatible = "simple-bus";
+ ranges = <0x0 0x103b0000 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory-controller@0 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x0 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 1089d6e..6375fc7 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -217,6 +217,13 @@
status = "disabled";
};
+ watchdog: watchdog@4066a8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4066a8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408240 {
compatible = "brcm,l2-intc";
reg = <0x408240 0x30>;
@@ -362,5 +369,15 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@408e80 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x408e80 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index 4b87ebe..a57cace 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -209,6 +209,13 @@
status = "disabled";
};
+ watchdog: watchdog@4066a8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4066a8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408440 {
compatible = "brcm,l2-intc";
reg = <0x408440 0x30>;
@@ -219,6 +226,17 @@
brcm,irq-can-wake;
};
+ aon_ctrl: syscon@408000 {
+ compatible = "brcm,brcmstb-aon-ctrl";
+ reg = <0x408000 0x100>, <0x408200 0x200>;
+ reg-names = "aon-ctrl", "aon-sram";
+ };
+
+ timers: timer@406680 {
+ compatible = "brcm,brcmstb-timers";
+ reg = <0x406680 0x40>;
+ };
+
upg_gio: gpio@406500 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406500 0xa0>;
@@ -402,5 +420,49 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@408e80 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x408e80 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
+ };
+
+ memory_controllers {
+ compatible = "simple-bus";
+ ranges = <0x0 0x103b0000 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory-controller@0 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x0 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index ca657df..728b9e9 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -205,6 +205,13 @@
status = "disabled";
};
+ watchdog: watchdog@4066a8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4066a8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408440 {
compatible = "brcm,l2-intc";
reg = <0x408440 0x30>;
@@ -215,6 +222,17 @@
brcm,irq-can-wake;
};
+ aon_ctrl: syscon@408000 {
+ compatible = "brcm,brcmstb-aon-ctrl";
+ reg = <0x408000 0x100>, <0x408200 0x200>;
+ reg-names = "aon-ctrl", "aon-sram";
+ };
+
+ timers: timer@406680 {
+ compatible = "brcm,brcmstb-timers";
+ reg = <0x406680 0x40>;
+ };
+
upg_gio: gpio@406500 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406500 0xa0>;
@@ -398,5 +416,49 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@408e80 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x408e80 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
+ };
+
+ memory_controllers {
+ compatible = "simple-bus";
+ ranges = <0x0 0x103b0000 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory-controller@0 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x0 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index d262e11..9540c27 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -214,6 +214,13 @@
status = "disabled";
};
+ watchdog: watchdog@4067e8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4067e8 0x14>;
+ status = "disabled";
+ };
+
upg_gio: gpio@406700 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index e4fb9b6..410e61e 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -232,6 +232,13 @@
status = "disabled";
};
+ watchdog: watchdog@4067e8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4067e8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408440 {
compatible = "brcm,l2-intc";
reg = <0x408440 0x30>;
@@ -242,6 +249,17 @@
brcm,irq-can-wake;
};
+ aon_ctrl: syscon@408000 {
+ compatible = "brcm,brcmstb-aon-ctrl";
+ reg = <0x408000 0x100>, <0x408200 0x200>;
+ reg-names = "aon-ctrl", "aon-sram";
+ };
+
+ timers: timer@4067c0 {
+ compatible = "brcm,brcmstb-timers";
+ reg = <0x4067c0 0x40>;
+ };
+
upg_gio: gpio@406700 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406700 0x80>;
@@ -494,5 +512,76 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@409580 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x409580 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
+ };
+
+ memory_controllers {
+ compatible = "simple-bus";
+ ranges = <0x0 0x103b0000 0x1a000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory-controller@0 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x0 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
+
+ memory-controller@1 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x10000 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 1484e89..8398b7f 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -247,6 +247,13 @@
status = "disabled";
};
+ watchdog: watchdog@4067e8 {
+ clocks = <&upg_clk>;
+ compatible = "brcm,bcm7038-wdt";
+ reg = <0x4067e8 0x14>;
+ status = "disabled";
+ };
+
aon_pm_l2_intc: interrupt-controller@408440 {
compatible = "brcm,l2-intc";
reg = <0x408440 0x30>;
@@ -257,6 +264,17 @@
brcm,irq-can-wake;
};
+ aon_ctrl: syscon@408000 {
+ compatible = "brcm,brcmstb-aon-ctrl";
+ reg = <0x408000 0x100>, <0x408200 0x200>;
+ reg-names = "aon-ctrl", "aon-sram";
+ };
+
+ timers: timer@4067c0 {
+ compatible = "brcm,brcmstb-timers";
+ reg = <0x4067c0 0x40>;
+ };
+
upg_gio: gpio@406700 {
compatible = "brcm,brcmstb-gpio";
reg = <0x406700 0x80>;
@@ -509,5 +527,76 @@
interrupt-names = "mspi_done";
status = "disabled";
};
+
+ waketimer: waketimer@409580 {
+ compatible = "brcm,brcmstb-waketimer";
+ reg = <0x409580 0x14>;
+ interrupts = <0x3>;
+ interrupt-parent = <&aon_pm_l2_intc>;
+ interrupt-names = "timer";
+ clocks = <&upg_clk>;
+ status = "disabled";
+ };
+ };
+
+ memory_controllers {
+ compatible = "simple-bus";
+ ranges = <0x0 0x103b0000 0x1a000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory-controller@0 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x0 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
+
+ memory-controller@1 {
+ compatible = "brcm,brcmstb-memc", "simple-bus";
+ ranges = <0x0 0x10000 0xa000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memc-arb@1000 {
+ compatible = "brcm,brcmstb-memc-arb";
+ reg = <0x1000 0x248>;
+ };
+
+ memc-ddr@2000 {
+ compatible = "brcm,brcmstb-memc-ddr";
+ reg = <0x2000 0x300>;
+ };
+
+ ddr-phy@6000 {
+ compatible = "brcm,brcmstb-ddr-phy";
+ reg = <0x6000 0xc8>;
+ };
+
+ shimphy@8000 {
+ compatible = "brcm,brcmstb-ddr-shimphy";
+ reg = <0x8000 0x13c>;
+ };
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
index 7f59ea2..79e9769 100644
--- a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
@@ -50,6 +50,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
/* FIXME: USB is wonky; disable it for now */
&ehci0 {
status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
index 9e7d522..28370ff 100644
--- a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
@@ -59,6 +59,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -114,3 +118,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
index 708207a..41c1b51 100644
--- a/arch/mips/boot/dts/brcm/bcm97358svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
@@ -55,6 +55,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -106,3 +110,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
index 73c6dc9..9f6c6c9 100644
--- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
@@ -50,6 +50,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -109,3 +113,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
index 37bacfd..df8b755 100644
--- a/arch/mips/boot/dts/brcm/bcm97362svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
@@ -47,6 +47,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -78,3 +82,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts
index f96241e..086faea 100644
--- a/arch/mips/boot/dts/brcm/bcm97420c.dts
+++ b/arch/mips/boot/dts/brcm/bcm97420c.dts
@@ -60,6 +60,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
/* FIXME: MAC driver comes up but cannot attach to PHY */
&enet0 {
status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
index ce762c7..0ed2221 100644
--- a/arch/mips/boot/dts/brcm/bcm97425svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
@@ -61,6 +61,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -144,3 +148,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
index d4dd31a..2c145a8 100644
--- a/arch/mips/boot/dts/brcm/bcm97435svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
@@ -61,6 +61,10 @@
status = "okay";
};
+&watchdog {
+ status = "okay";
+};
+
&enet0 {
status = "okay";
};
@@ -120,3 +124,7 @@
&mspi {
status = "okay";
};
+
+&waketimer {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts
index 2cd49b6..1bd1054 100644
--- a/arch/mips/boot/dts/img/boston.dts
+++ b/arch/mips/boot/dts/img/boston.dts
@@ -157,7 +157,7 @@
#address-cells = <1>;
#size-cells = <0>;
- rtc@0x68 {
+ rtc@68 {
compatible = "st,m41t81s";
reg = <0x68>;
};
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index a4cc522..3807859 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -110,22 +110,22 @@
reg = <0x0 0x0 0x0 0x800000>;
};
- partition@0x800000 {
+ partition@800000 {
label = "u-boot";
reg = <0x0 0x800000 0x0 0x200000>;
};
- partition@0xa00000 {
+ partition@a00000 {
label = "u-boot-env";
reg = <0x0 0xa00000 0x0 0x200000>;
};
- partition@0xc00000 {
+ partition@c00000 {
label = "boot";
reg = <0x0 0xc00000 0x0 0x4000000>;
};
- partition@0x8c00000 {
+ partition@4c00000 {
label = "system";
reg = <0x0 0x4c00000 0x1 0xfb400000>;
};
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile
new file mode 100644
index 0000000..c511645
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/Makefile
@@ -0,0 +1,3 @@
+dtb-$(CONFIG_LEGACY_BOARD_OCELOT) += ocelot_pcb123.dtb
+
+obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
new file mode 100644
index 0000000..dd239ca
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "mscc,ocelot";
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "mips,mips24KEc";
+ device_type = "cpu";
+ clocks = <&cpu_clk>;
+ reg = <0>;
+ };
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ cpuintc: interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "mti,cpu-interrupt-controller";
+ };
+
+ cpu_clk: cpu-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <500000000>;
+ };
+
+ ahb_clk: ahb-clk {
+ compatible = "fixed-factor-clock";
+ #clock-cells = <0>;
+ clocks = <&cpu_clk>;
+ clock-div = <2>;
+ clock-mult = <1>;
+ };
+
+ ahb@70000000 {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x70000000 0x2000000>;
+
+ interrupt-parent = <&intc>;
+
+ cpu_ctrl: syscon@0 {
+ compatible = "mscc,ocelot-cpu-syscon", "syscon";
+ reg = <0x0 0x2c>;
+ };
+
+ intc: interrupt-controller@70 {
+ compatible = "mscc,ocelot-icpu-intr";
+ reg = <0x70 0x70>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ };
+
+ uart0: serial@100000 {
+ pinctrl-0 = <&uart_pins>;
+ pinctrl-names = "default";
+ compatible = "ns16550a";
+ reg = <0x100000 0x20>;
+ interrupts = <6>;
+ clocks = <&ahb_clk>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+
+ status = "disabled";
+ };
+
+ uart2: serial@100800 {
+ pinctrl-0 = <&uart2_pins>;
+ pinctrl-names = "default";
+ compatible = "ns16550a";
+ reg = <0x100800 0x20>;
+ interrupts = <7>;
+ clocks = <&ahb_clk>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+
+ status = "disabled";
+ };
+
+ reset@1070008 {
+ compatible = "mscc,ocelot-chip-reset";
+ reg = <0x1070008 0x4>;
+ };
+
+ gpio: pinctrl@1070034 {
+ compatible = "mscc,ocelot-pinctrl";
+ reg = <0x1070034 0x68>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-ranges = <&gpio 0 0 22>;
+
+ uart_pins: uart-pins {
+ pins = "GPIO_6", "GPIO_7";
+ function = "uart";
+ };
+
+ uart2_pins: uart2-pins {
+ pins = "GPIO_12", "GPIO_13";
+ function = "uart2";
+ };
+ };
+ };
+};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
new file mode 100644
index 0000000..29d6414
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include "ocelot.dtsi"
+
+/ {
+ compatible = "mscc,ocelot-pcb123", "mscc,ocelot";
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0e000000>;
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index d99f524..b3aec10 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
parent_irq = irq_of_parse_and_map(ciu_node, 0);
if (!parent_irq) {
- pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+ pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
ciu_node->name);
return -EINVAL;
}
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
addr = of_get_address(ciu_node, 0, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
return -EINVAL;
}
host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
addr = of_get_address(ciu_node, 1, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
return -EINVAL;
}
host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
if (r) {
- pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+ pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
ciu_node->name);
return r;
}
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
&octeon_irq_domain_cib_ops,
host_data);
if (!cib_domain) {
- pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+ pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
return -ENOMEM;
}
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
index 3cefa6b..47aecb8 100644
--- a/arch/mips/configs/bmips_stb_defconfig
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -72,6 +72,7 @@ CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
+CONFIG_SOC_BRCMSTB=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config
index ca606e7..1a5d5ea 100644
--- a/arch/mips/configs/generic/32r6.config
+++ b/arch/mips/configs/generic/32r6.config
@@ -1,2 +1,4 @@
CONFIG_CPU_MIPS32_R6=y
CONFIG_HIGHMEM=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config
index 7cac033..5dd8e85 100644
--- a/arch/mips/configs/generic/64r6.config
+++ b/arch/mips/configs/generic/64r6.config
@@ -2,3 +2,5 @@ CONFIG_CPU_MIPS64_R6=y
CONFIG_64BIT=y
CONFIG_MIPS32_O32=y
CONFIG_MIPS32_N32=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
new file mode 100644
index 0000000..aa81576
--- /dev/null
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -0,0 +1,35 @@
+# require CONFIG_CPU_MIPS32_R2=y
+
+CONFIG_LEGACY_BOARD_OCELOT=y
+
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_PLATFORM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_MTD_UBI=y
+
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_GPIO_SYSFS=y
+
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_SPIDEV=y
+
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_OCELOT_RESET=y
+
+CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644
index 0000000..e07aca5
--- /dev/null
+++ b/arch/mips/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644
index 0000000..7d1d242
--- /dev/null
+++ b/arch/mips/crypto/crc32-mips.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+ b, h, w, d,
+};
+
+enum crc_type {
+ crc32,
+ crc32c,
+};
+
+#ifndef TOOLCHAIN_SUPPORTS_CRC
+#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \
+_ASM_MACRO_3R(OP, rt, rs, rt2, \
+ ".ifnc \\rt, \\rt2\n\t" \
+ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
+ ".endif\n\t" \
+ _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \
+ ((SZ) << 6) | ((TYPE) << 8)) \
+ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \
+ ((SZ) << 14) | ((TYPE) << 3)))
+_ASM_MACRO_CRC32(crc32b, 0, 0);
+_ASM_MACRO_CRC32(crc32h, 1, 0);
+_ASM_MACRO_CRC32(crc32w, 2, 0);
+_ASM_MACRO_CRC32(crc32d, 3, 0);
+_ASM_MACRO_CRC32(crc32cb, 0, 1);
+_ASM_MACRO_CRC32(crc32ch, 1, 1);
+_ASM_MACRO_CRC32(crc32cw, 2, 1);
+_ASM_MACRO_CRC32(crc32cd, 3, 1);
+#define _ASM_SET_CRC ""
+#else /* !TOOLCHAIN_SUPPORTS_CRC */
+#define _ASM_SET_CRC ".set\tcrc\n\t"
+#endif
+
+#define _CRC32(crc, value, size, type) \
+do { \
+ __asm__ __volatile__( \
+ ".set push\n\t" \
+ _ASM_SET_CRC \
+ #type #size " %0, %1, %0\n\t" \
+ ".set pop" \
+ : "+r" (crc) \
+ : "r" (value)); \
+} while (0)
+
+#define CRC32(crc, value, size) \
+ _CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+ _CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+ while (len >= sizeof(u32)) {
+#endif
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32(crc, value, b);
+ }
+
+ return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32C(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+ while (len >= sizeof(u32)) {
+#endif
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32C(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32C(crc, value, b);
+ }
+ return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE 1
+#define CHKSUM_DIGEST_SIZE 4
+
+struct chksum_ctx {
+ u32 key;
+};
+
+struct chksum_desc_ctx {
+ u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = mctx->key;
+
+ return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+ if (keylen != sizeof(mctx->key)) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ mctx->key = get_unaligned_le32(key);
+ return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(ctx->crc, out);
+ return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(~ctx->crc, out);
+ return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+ return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+ return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = ~0;
+ return 0;
+}
+
+static struct shash_alg crc32_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksum_update,
+ .final = chksum_final,
+ .finup = chksum_finup,
+ .digest = chksum_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32",
+ .cra_driver_name = "crc32-mips-hw",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksum_cra_init,
+ }
+};
+
+static struct shash_alg crc32c_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksumc_update,
+ .final = chksumc_final,
+ .finup = chksumc_finup,
+ .digest = chksumc_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-mips-hw",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksum_cra_init,
+ }
+};
+
+static int __init crc32_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_shash(&crc32_alg);
+
+ if (err)
+ return err;
+
+ err = crypto_register_shash(&crc32c_alg);
+
+ if (err) {
+ crypto_unregister_shash(&crc32_alg);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+ crypto_unregister_shash(&crc32_alg);
+ crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 2ff3b17..ba9b2c8c 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -27,6 +27,22 @@ config LEGACY_BOARD_SEAD3
Enable this to include support for booting on MIPS SEAD-3 FPGA-based
development boards, which boot using a legacy boot protocol.
+comment "MSCC Ocelot doesn't work with SEAD3 enabled"
+ depends on LEGACY_BOARD_SEAD3
+
+config LEGACY_BOARD_OCELOT
+ bool "Support MSCC Ocelot boards"
+ depends on LEGACY_BOARD_SEAD3=n
+ select LEGACY_BOARDS
+ select MSCC_OCELOT
+
+config MSCC_OCELOT
+ bool
+ select GPIOLIB
+ select MSCC_OCELOT_IRQ
+ select SYS_HAS_EARLY_PRINTK
+ select USE_GENERIC_EARLY_PRINTK_8250
+
comment "FIT/UHI Boards"
config FIT_IMAGE_FDT_BOSTON
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index 5c31e0c..d03a36f 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -14,5 +14,6 @@ obj-y += proc.o
obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o
obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o
+obj-$(CONFIG_LEGACY_BOARD_OCELOT) += board-ocelot.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
new file mode 100644
index 0000000..06d92fb
--- /dev/null
+++ b/arch/mips/generic/board-ocelot.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi MIPS SoC support
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <asm/machine.h>
+#include <asm/prom.h>
+
+#define DEVCPU_GCB_CHIP_REGS_CHIP_ID 0x71070000
+#define CHIP_ID_PART_ID GENMASK(27, 12)
+
+#define OCELOT_PART_ID (0x7514 << 12)
+
+#define UART_UART 0x70100000
+
+static __init bool ocelot_detect(void)
+{
+ u32 rev;
+ int idx;
+
+ /* Look for the TLB entry set up by redboot before trying to use it */
+ write_c0_entryhi(DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+ mtc0_tlbw_hazard();
+ tlb_probe();
+ tlb_probe_hazard();
+ idx = read_c0_index();
+ if (idx < 0)
+ return 0;
+
+ /* A TLB entry exists, lets assume its usable and check the CHIP ID */
+ rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+
+ if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
+ return 0;
+
+ /* Copy command line from bootloader early for Initrd detection */
+ if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
+ unsigned int prom_argc = fw_arg0;
+ const char **prom_argv = (const char **)fw_arg1;
+
+ if (prom_argc > 1 && strlen(prom_argv[1]) > 0)
+ /* ignore all built-in args if any f/w args given */
+ strcpy(arcs_cmdline, prom_argv[1]);
+ }
+
+ return 1;
+}
+
+static void __init ocelot_earlyprintk_init(void)
+{
+ void __iomem *uart_base;
+
+ uart_base = ioremap_nocache(UART_UART, 0x20);
+ setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000);
+}
+
+static void __init ocelot_late_init(void)
+{
+ ocelot_earlyprintk_init();
+}
+
+static __init const void *ocelot_fixup_fdt(const void *fdt,
+ const void *match_data)
+{
+ /* This has to be done so late because ioremap needs to work */
+ late_time_init = ocelot_late_init;
+
+ return fdt;
+}
+
+extern char __dtb_ocelot_pcb123_begin[];
+
+MIPS_MACHINE(ocelot) = {
+ .fdt = __dtb_ocelot_pcb123_begin,
+ .fixup_fdt = ocelot_fixup_fdt,
+ .detect = ocelot_detect,
+};
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 721b698..5f74590 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -11,6 +11,7 @@
#include <asm/cpu.h>
#include <asm/cpu-info.h>
+#include <asm/isa-rev.h>
#include <cpu-feature-overrides.h>
/*
@@ -493,7 +494,7 @@
# define cpu_has_perf (cpu_data[0].options & MIPS_CPU_PERF)
#endif
-#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#if defined(CONFIG_SMP) && (MIPS_ISA_REV >= 6)
/*
* Some systems share FTLB RAMs between threads within a core (siblings in
* kernel parlance). This means that FTLB entries may become invalid at almost
@@ -525,7 +526,7 @@
# define cpu_has_shared_ftlb_entries \
(current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES)
# endif
-#endif /* SMP && __mips_isa_rev >= 6 */
+#endif /* SMP && MIPS_ISA_REV >= 6 */
#ifndef cpu_has_shared_ftlb_ram
# define cpu_has_shared_ftlb_ram 0
diff --git a/arch/mips/include/asm/isa-rev.h b/arch/mips/include/asm/isa-rev.h
new file mode 100644
index 0000000..683ea34
--- /dev/null
+++ b/arch/mips/include/asm/isa-rev.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Author: Matt Redfearn <matt.redfearn@mips.com>
+ */
+
+#ifndef __MIPS_ASM_ISA_REV_H__
+#define __MIPS_ASM_ISA_REV_H__
+
+/*
+ * The ISA revision level. This is 0 for MIPS I to V and N for
+ * MIPS{32,64}rN.
+ */
+
+/* If the compiler has defined __mips_isa_rev, believe it. */
+#ifdef __mips_isa_rev
+#define MIPS_ISA_REV __mips_isa_rev
+#else
+/* The compiler hasn't defined the isa rev so assume it's MIPS I - V (0) */
+#define MIPS_ISA_REV 0
+#endif
+
+
+#endif /* __MIPS_ASM_ISA_REV_H__ */
diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
index 60b1aa0..b57e978 100644
--- a/arch/mips/include/asm/kvm_para.h
+++ b/arch/mips/include/asm/kvm_para.h
@@ -94,6 +94,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
#ifdef CONFIG_MIPS_PARAVIRT
static inline bool kvm_para_available(void)
{
diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index aa3800c..d99ca86 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -167,7 +167,7 @@
#define AR71XX_AHB_DIV_MASK 0x7
#define AR724X_PLL_REG_CPU_CONFIG 0x00
-#define AR724X_PLL_REG_PCIE_CONFIG 0x18
+#define AR724X_PLL_REG_PCIE_CONFIG 0x10
#define AR724X_PLL_FB_SHIFT 0
#define AR724X_PLL_FB_MASK 0x3ff
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 858752d..f658597 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -664,6 +664,7 @@
#define MIPS_CONF5_FRE (_ULCAST_(1) << 8)
#define MIPS_CONF5_UFE (_ULCAST_(1) << 9)
#define MIPS_CONF5_CA2 (_ULCAST_(1) << 14)
+#define MIPS_CONF5_CRCP (_ULCAST_(1) << 18)
#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27)
#define MIPS_CONF5_EVA (_ULCAST_(1) << 28)
#define MIPS_CONF5_CV (_ULCAST_(1) << 29)
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index 600ad8f..a2aba4b 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -5,5 +5,6 @@
/* HWCAP flags */
#define HWCAP_MIPS_R6 (1 << 0)
#define HWCAP_MIPS_MSA (1 << 1)
+#define HWCAP_MIPS_CRC32 (1 << 2)
#endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 606e02c..3035ca4 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -50,6 +50,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
/*
* Flags for msync
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index cf3fd54..6b07b73 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
if (config5 & MIPS_CONF5_CA2)
c->ases |= MIPS_ASE_MIPS16E2;
+ if (config5 & MIPS_CONF5_CRCP)
+ elf_hwcap |= HWCAP_MIPS_CRC32;
+
return config5 & MIPS_CONF_M;
}
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 421e06d..55c3fbe 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/slab.h>
+#include <linux/suspend.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
@@ -670,6 +671,34 @@ static int cps_pm_online_cpu(unsigned int cpu)
return 0;
}
+static int cps_pm_power_notifier(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ unsigned int stat;
+
+ switch (event) {
+ case PM_SUSPEND_PREPARE:
+ stat = read_cpc_cl_stat_conf();
+ /*
+ * If we're attempting to suspend the system and power down all
+ * of the cores, the JTAG detect bit indicates that the CPC will
+ * instead put the cores into clock-off state. In this state
+ * a connected debugger can cause the CPU to attempt
+ * interactions with the powered down system. At best this will
+ * fail. At worst, it can hang the NoC, requiring a hard reset.
+ * To avoid this, just block system suspend if a JTAG probe
+ * is detected.
+ */
+ if (stat & CPC_Cx_STAT_CONF_EJTAG_PROBE) {
+ pr_warn("JTAG probe is connected - abort suspend\n");
+ return NOTIFY_BAD;
+ }
+ return NOTIFY_DONE;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int __init cps_pm_init(void)
{
/* A CM is required for all non-coherent states */
@@ -705,6 +734,8 @@ static int __init cps_pm_init(void)
pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
}
+ pm_notifier(cps_pm_power_notifier, 0);
+
return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online",
cps_pm_online_cpu, NULL);
}
diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c
index 7c746d3..6288780 100644
--- a/arch/mips/kernel/reset.c
+++ b/arch/mips/kernel/reset.c
@@ -13,6 +13,9 @@
#include <linux/reboot.h>
#include <linux/delay.h>
+#include <asm/compiler.h>
+#include <asm/idle.h>
+#include <asm/mipsregs.h>
#include <asm/reboot.h>
/*
@@ -26,6 +29,62 @@ void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
+static void machine_hang(void)
+{
+ /*
+ * We're hanging the system so we don't want to be interrupted anymore.
+ * Any interrupt handlers that ran would at best be useless & at worst
+ * go awry because the system isn't in a functional state.
+ */
+ local_irq_disable();
+
+ /*
+ * Mask all interrupts, giving us a better chance of remaining in the
+ * low power wait state.
+ */
+ clear_c0_status(ST0_IM);
+
+ while (true) {
+ if (cpu_has_mips_r) {
+ /*
+ * We know that the wait instruction is supported so
+ * make use of it directly, leaving interrupts
+ * disabled.
+ */
+ asm volatile(
+ ".set push\n\t"
+ ".set " MIPS_ISA_ARCH_LEVEL "\n\t"
+ "wait\n\t"
+ ".set pop");
+ } else if (cpu_wait) {
+ /*
+ * Try the cpu_wait() callback. This isn't ideal since
+ * it'll re-enable interrupts, but that ought to be
+ * harmless given that they're all masked.
+ */
+ cpu_wait();
+ local_irq_disable();
+ } else {
+ /*
+ * We're going to burn some power running round the
+ * loop, but we don't really have a choice. This isn't
+ * a path we should expect to run for long during
+ * typical use anyway.
+ */
+ }
+
+ /*
+ * In most modern MIPS CPUs interrupts will cause the wait
+ * instruction to graduate even when disabled, and in some
+ * cases even when masked. In order to prevent a timer
+ * interrupt from continuously taking us out of the low power
+ * wait state, we clear any pending timer interrupt here.
+ */
+ if (cpu_has_counter)
+ write_c0_compare(0);
+ }
+}
+
void machine_restart(char *command)
{
if (_machine_restart)
@@ -38,8 +97,7 @@ void machine_restart(char *command)
do_kernel_restart(command);
mdelay(1000);
pr_emerg("Reboot failed -- System halted\n");
- local_irq_disable();
- while (1);
+ machine_hang();
}
void machine_halt(void)
@@ -51,8 +109,7 @@ void machine_halt(void)
preempt_disable();
smp_send_stop();
#endif
- local_irq_disable();
- while (1);
+ machine_hang();
}
void machine_power_off(void)
@@ -64,6 +121,5 @@ void machine_power_off(void)
preempt_disable();
smp_send_stop();
#endif
- local_irq_disable();
- while (1);
+ machine_hang();
}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5f8b0a9..563188a 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -155,7 +155,8 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
add_memory_region(start, size, BOOT_MEM_RAM);
}
-bool __init memory_region_available(phys_addr_t start, phys_addr_t size)
+static bool __init __maybe_unused memory_region_available(phys_addr_t start,
+ phys_addr_t size)
{
int i;
bool in_ram = false, free = true;
@@ -453,7 +454,7 @@ static void __init bootmem_init(void)
pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
(min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
min_low_pfn - ARCH_PFN_OFFSET);
- } else if (min_low_pfn < ARCH_PFN_OFFSET) {
+ } else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) {
pr_info("%lu free pages won't be used\n",
ARCH_PFN_OFFSET - min_low_pfn);
}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 44ac64d..0d3c656 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -86,7 +86,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
void __flush_dcache_page(struct page *page)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping_file(page);
unsigned long addr;
if (mapping && !mapping_mapped(mapping)) {
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 84b7b59..400676c 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -30,7 +30,6 @@
#include <linux/hardirq.h>
#include <linux/gfp.h>
#include <linux/kcore.h>
-#include <linux/export.h>
#include <linux/initrd.h>
#include <asm/asm-offsets.h>
@@ -46,7 +45,6 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
-#include <asm/maar.h>
/*
* We have up to 8 empty zeroed pages so we can map one of the right colour
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 33d3251..2f616eb 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
#define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5)
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index 88a2075..57154c5 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -11,6 +11,7 @@
*/
#include <asm/asm.h>
+#include <asm/isa-rev.h>
#include <asm/regdef.h>
#include "bpf_jit.h"
@@ -65,7 +66,7 @@ FEXPORT(sk_load_word_positive)
lw $r_A, 0(t1)
.set noreorder
#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
wsbh t0, $r_A
rotr $r_A, t0, 16
# else
@@ -92,7 +93,7 @@ FEXPORT(sk_load_half_positive)
PTR_ADDU t1, $r_skb_data, offset
lhu $r_A, 0(t1)
#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
wsbh $r_A, $r_A
# else
sll t0, $r_A, 8
@@ -170,7 +171,7 @@ FEXPORT(sk_load_byte_positive)
NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
bpf_slow_path_common(4)
#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
wsbh t0, $r_s0
jr $r_ra
rotr $r_A, t0, 16
@@ -196,7 +197,7 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
bpf_slow_path_common(2)
#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
jr $r_ra
wsbh $r_A, $r_s0
# else
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 407f155..f6b7778 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -315,6 +315,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
break;
case MT762X_SOC_MT7628AN:
+ case MT762X_SOC_MT7688:
if (mt7628_pci_hw_init(pdev))
return -1;
break;
diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c
index f5b367e..31955c1 100644
--- a/arch/mips/txx9/rbtx4927/setup.c
+++ b/arch/mips/txx9/rbtx4927/setup.c
@@ -319,7 +319,7 @@ static void __init rbtx4927_mtd_init(void)
static void __init rbtx4927_gpioled_init(void)
{
- static struct gpio_led leds[] = {
+ static const struct gpio_led leds[] = {
{ .name = "gpioled:green:0", .gpio = 0, .active_low = 1, },
{ .name = "gpioled:green:1", .gpio = 1, .active_low = 1, },
};
diff --git a/arch/mips/vdso/elf.S b/arch/mips/vdso/elf.S
index be37bbb..428a191 100644
--- a/arch/mips/vdso/elf.S
+++ b/arch/mips/vdso/elf.S
@@ -10,6 +10,8 @@
#include "vdso.h"
+#include <asm/isa-rev.h>
+
#include <linux/elfnote.h>
#include <linux/version.h>
@@ -40,11 +42,7 @@ __mips_abiflags:
.byte __mips /* isa_level */
/* isa_rev */
-#ifdef __mips_isa_rev
- .byte __mips_isa_rev
-#else
- .byte 0
-#endif
+ .byte MIPS_ISA_REV
/* gpr_size */
#ifdef __mips64
@@ -54,7 +52,7 @@ __mips_abiflags:
#endif
/* cpr1_size */
-#if (defined(__mips_isa_rev) && __mips_isa_rev >= 6) || defined(__mips64)
+#if (MIPS_ISA_REV >= 6) || defined(__mips64)
.byte 2 /* AFL_REG_64 */
#else
.byte 1 /* AFL_REG_32 */
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 7b9b20a..1240f14 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
void flush_kernel_dcache_page(struct page *page);
void flush_icache_range(unsigned long start, unsigned long end);
void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-#define flush_dcache_mmap_lock(mapping) spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
#else
#include <asm-generic/cacheflush.h>
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 55e383c..18eb9f6 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
extern void flush_dcache_range(unsigned long start, unsigned long end);
extern void invalidate_dcache_range(unsigned long start, unsigned long end);
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#endif /* _ASM_NIOS2_CACHEFLUSH_H */
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 20e8620..ab88b6d 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer)
return ret;
}
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
- ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0);
+ ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0);
ts->tv_nsec = 0;
}
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 87bf88e..506f6e1c 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -180,7 +180,7 @@ void flush_dcache_page(struct page *page)
if (page == ZERO_PAGE(0))
return;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
/* Flush this page if there are aliases. */
if (mapping && !mapping_mapped(mapping)) {
@@ -215,7 +215,7 @@ void update_mmu_cache(struct vm_area_struct *vma,
if (page == ZERO_PAGE(0))
return;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
__flush_dcache_page(mapping, page);
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index bd5ce31..0c83644 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page);
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_icache_page(vma,page) do { \
flush_kernel_dcache_page(page); \
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index a056a64..870fbf8 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -26,6 +26,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#define MS_SYNC 1 /* synchronous memory sync */
#define MS_ASYNC 2 /* sync memory asynchronously */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e3b4554..a99da95 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -88,7 +88,8 @@ update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
return;
page = pfn_to_page(pfn);
- if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) {
+ if (page_mapping_file(page) &&
+ test_bit(PG_dcache_dirty, &page->flags)) {
flush_kernel_dcache_page_addr(pfn_va(pfn));
clear_bit(PG_dcache_dirty, &page->flags);
} else if (parisc_requires_coherency())
@@ -304,7 +305,7 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
void flush_dcache_page(struct page *page)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping_file(page);
struct vm_area_struct *mpnt;
unsigned long offset;
unsigned long addr, old_addr = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 8c99ebb..43b308c 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
* Top of mmap area (just below the process stack).
*/
-static unsigned long mmap_upper_limit(void)
+/*
+ * When called from arch_get_unmapped_area(), rlim_stack will be NULL,
+ * indicating that "current" should be used instead of a passed-in
+ * value from the exec bprm as done with arch_pick_mmap_layout().
+ */
+static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{
unsigned long stack_base;
/* Limit stack size - see setup_arg_pages() in fs/exec.c */
- stack_base = rlimit_max(RLIMIT_STACK);
+ stack_base = rlim_stack ? rlim_stack->rlim_max
+ : rlimit_max(RLIMIT_STACK);
if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX;
@@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_legacy_base;
- info.high_limit = mmap_upper_limit();
+ info.high_limit = mmap_upper_limit(NULL);
info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(last_mmap, pgoff);
addr = vm_unmapped_area(&info);
@@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_legacy_base = mmap_legacy_base();
- mm->mmap_base = mmap_upper_limit();
+ mm->mmap_base = mmap_upper_limit(rlim_stack);
if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index f7e6845..c383040 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -174,7 +174,7 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
/* we treat tod_sec as unsigned, so this can work until year 2106 */
rtc_time64_to_tm(tod_data.tod_sec, tm);
- return rtc_valid_tm(tm);
+ return 0;
}
static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ccd2556..95813df 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -141,11 +141,18 @@ AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
endif
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
+
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
+CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
ifeq ($(CONFIG_PPC_BOOK3S_64),y)
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
-CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
+else
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
+endif
else
CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
endif
@@ -166,11 +173,11 @@ ifdef CONFIG_MPROFILE_KERNEL
endif
CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
-CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
+CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9)
# Altivec option not allowed with e500mc64 in GCC.
ifeq ($(CONFIG_ALTIVEC),y)
@@ -243,6 +250,7 @@ endif
cpu-as-$(CONFIG_4xx) += -Wa,-m405
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 8626615..deb52e4 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -219,6 +219,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts
index 0839847..7f5ff41 100644
--- a/arch/powerpc/boot/dts/adder875-redboot.dts
+++ b/arch/powerpc/boot/dts/adder875-redboot.dts
@@ -178,6 +178,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts
index e4554ca..bd9f33c 100644
--- a/arch/powerpc/boot/dts/adder875-uboot.dts
+++ b/arch/powerpc/boot/dts/adder875-uboot.dts
@@ -177,6 +177,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index 7467792..8a7a101 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -410,6 +410,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
index 49ac36b..7124301 100644
--- a/arch/powerpc/boot/dts/amigaone.dts
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -168,6 +168,6 @@
};
chosen {
- linux,stdout-path = "/pci@80000000/isa@7/serial@3f8";
+ stdout-path = "/pci@80000000/isa@7/serial@3f8";
};
};
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
index 9198745..e987b5a 100644
--- a/arch/powerpc/boot/dts/asp834x-redboot.dts
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -304,7 +304,7 @@
chosen {
bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts
index aa68911..538e42b 100644
--- a/arch/powerpc/boot/dts/bamboo.dts
+++ b/arch/powerpc/boot/dts/bamboo.dts
@@ -295,6 +295,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
index 27f169e..c5beb72 100644
--- a/arch/powerpc/boot/dts/c2k.dts
+++ b/arch/powerpc/boot/dts/c2k.dts
@@ -361,6 +361,6 @@
};
};
chosen {
- linux,stdout-path = &MPSC0;
+ stdout-path = &MPSC0;
};
};
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index f2ad581..a04a4fc 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -237,6 +237,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index c280e75..c3922fc 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -78,7 +78,7 @@
};
rtc@56 {
- compatible = "mc,rv3029c2";
+ compatible = "microcrystal,rv3029";
reg = <0x56>;
};
diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts
index ec2d142..5d11e6e 100644
--- a/arch/powerpc/boot/dts/ebony.dts
+++ b/arch/powerpc/boot/dts/ebony.dts
@@ -332,6 +332,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
index 48bcf71..7a1231d 100644
--- a/arch/powerpc/boot/dts/eiger.dts
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -421,7 +421,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts
index 53ef06c..4ac9c5a 100644
--- a/arch/powerpc/boot/dts/ep405.dts
+++ b/arch/powerpc/boot/dts/ep405.dts
@@ -225,6 +225,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts
index e2d306a..721cb53 100644
--- a/arch/powerpc/boot/dts/fsl/mvme7100.dts
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -146,7 +146,7 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index 6560283..9311b86 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -607,7 +607,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@b0020000";
+ stdout-path = "/plb/opb/serial@b0020000";
bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
};
};
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index 43e6f0c..02bd304 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -191,6 +191,6 @@
};
chosen {
- linux,stdout-path = "/tsi109@c0000000/serial@7808";
+ stdout-path = "/tsi109@c0000000/serial@7808";
};
};
diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts
index 71d3bb4..b93bf2d 100644
--- a/arch/powerpc/boot/dts/hotfoot.dts
+++ b/arch/powerpc/boot/dts/hotfoot.dts
@@ -291,6 +291,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
index 9c94fd7..2e6e3a7 100644
--- a/arch/powerpc/boot/dts/icon.dts
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -442,6 +442,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
index 23e9d9b..f706319 100644
--- a/arch/powerpc/boot/dts/iss4xx-mpic.dts
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -150,6 +150,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts
index 4ff6555..5533aff 100644
--- a/arch/powerpc/boot/dts/iss4xx.dts
+++ b/arch/powerpc/boot/dts/iss4xx.dts
@@ -111,6 +111,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index f913dbe..02629e1 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -505,6 +505,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts
index 8c94290..d9613b7 100644
--- a/arch/powerpc/boot/dts/klondike.dts
+++ b/arch/powerpc/boot/dts/klondike.dts
@@ -222,6 +222,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@50001000";
+ stdout-path = "/plb/opb/serial@50001000";
};
};
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index 5d68236..fe6c17c 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -339,6 +339,6 @@
chosen {
- linux,stdout-path = "/soc/cpm/serial@91a00";
+ stdout-path = "/soc/cpm/serial@91a00";
};
};
diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
index b5413cb..843f156 100644
--- a/arch/powerpc/boot/dts/media5200.dts
+++ b/arch/powerpc/boot/dts/media5200.dts
@@ -25,7 +25,7 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
cpus {
diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts
index 6d2cddf..98282c1 100644
--- a/arch/powerpc/boot/dts/mpc8272ads.dts
+++ b/arch/powerpc/boot/dts/mpc8272ads.dts
@@ -262,6 +262,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
+ stdout-path = "/soc/cpm/serial@11a00";
};
};
diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts
index 34c1f48..4443fac 100644
--- a/arch/powerpc/boot/dts/mpc866ads.dts
+++ b/arch/powerpc/boot/dts/mpc866ads.dts
@@ -185,6 +185,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 4e93bd9..5b037f5 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -227,6 +227,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts
index 1ecb341..a7eb6d2 100644
--- a/arch/powerpc/boot/dts/mvme5100.dts
+++ b/arch/powerpc/boot/dts/mvme5100.dts
@@ -179,7 +179,7 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts
index 18e7d79..d10b041 100644
--- a/arch/powerpc/boot/dts/obs600.dts
+++ b/arch/powerpc/boot/dts/obs600.dts
@@ -309,6 +309,6 @@
};
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts
index 0c525ff..a477615 100644
--- a/arch/powerpc/boot/dts/pq2fads.dts
+++ b/arch/powerpc/boot/dts/pq2fads.dts
@@ -242,6 +242,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
+ stdout-path = "/soc/cpm/serial@11a00";
};
};
diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts
index 9684c80..e59829c 100644
--- a/arch/powerpc/boot/dts/rainier.dts
+++ b/arch/powerpc/boot/dts/rainier.dts
@@ -344,7 +344,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index d86a3a4..f3e046f 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -381,7 +381,7 @@
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts
index 088361c..7d15f18 100644
--- a/arch/powerpc/boot/dts/sam440ep.dts
+++ b/arch/powerpc/boot/dts/sam440ep.dts
@@ -288,6 +288,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index e41b88a..60d211d 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -406,7 +406,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts
index 2a55573..99f6f54 100644
--- a/arch/powerpc/boot/dts/storcenter.dts
+++ b/arch/powerpc/boot/dts/storcenter.dts
@@ -137,6 +137,6 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts
index 1657ad0..803f1bf 100644
--- a/arch/powerpc/boot/dts/taishan.dts
+++ b/arch/powerpc/boot/dts/taishan.dts
@@ -422,6 +422,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000300";
+ stdout-path = "/plb/opb/serial@40000300";
};
};
diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts
index 391a4e2..66f1c63 100644
--- a/arch/powerpc/boot/dts/virtex440-ml507.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml507.dts
@@ -32,7 +32,7 @@
} ;
chosen {
bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = &RS232_Uart_1;
+ stdout-path = &RS232_Uart_1;
} ;
cpus {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts
index 81201d3..3b736ca 100644
--- a/arch/powerpc/boot/dts/virtex440-ml510.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml510.dts
@@ -26,7 +26,7 @@
} ;
chosen {
bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = "/plb@0/serial@83e00000";
+ stdout-path = "/plb@0/serial@83e00000";
} ;
cpus {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts
index 4a9f726..0872862 100644
--- a/arch/powerpc/boot/dts/walnut.dts
+++ b/arch/powerpc/boot/dts/walnut.dts
@@ -241,6 +241,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index ea9053e..b4f3274 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -304,6 +304,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 17a5bab..104b1d6 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -13,6 +13,7 @@
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
/*
* This is commented-out for now.
@@ -176,6 +177,15 @@
compatible = "nintendo,hollywood-gpio";
reg = <0x0d8000c0 0x40>;
gpio-controller;
+ ngpios = <24>;
+
+ gpio-line-names =
+ "POWER", "SHUTDOWN", "FAN", "DC_DC",
+ "DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+ "SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+ "EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+ "DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+ "DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
/*
* This is commented out while a standard binding
@@ -214,5 +224,16 @@
interrupts = <2>;
};
};
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ /* This is the blue LED in the disk drive slot */
+ drive-slot {
+ label = "wii:blue:drive_slot";
+ gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+ panic-indicator;
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
index 646acfb..d5e1442 100644
--- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts
+++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
@@ -503,6 +503,6 @@
/* Needed for dtbImage boot wrapper compatibility */
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
index 30bb475..5650878 100644
--- a/arch/powerpc/boot/dts/yosemite.dts
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -327,6 +327,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index f52c31b..2a0c8b1 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -7,8 +7,6 @@
#include "of.h"
-typedef u32 uint32_t;
-typedef u64 uint64_t;
typedef unsigned long uintptr_t;
typedef __be16 fdt16_t;
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 7330150..d9713ad 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs);
void single_step_exception(struct pt_regs *regs);
void program_check_exception(struct pt_regs *regs);
void alignment_exception(struct pt_regs *regs);
+void slb_miss_bad_addr(struct pt_regs *regs);
void StackOverflow(struct pt_regs *regs);
void nonrecoverable_exception(struct pt_regs *regs);
void kernel_fp_unavailable_exception(struct pt_regs *regs);
@@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
+int sys_debug_setcontext(struct ucontext __user *ctx,
+ int ndbg, struct sig_dbg_op __user *dbg,
+ int r6, int r7, int r8,
+ struct pt_regs *regs);
+int
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
#endif
+
+long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low);
long sys_switch_endian(void);
notrace unsigned int __check_irq_replay(void);
void notrace restore_interrupts(void);
@@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64);
void _mcount(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 10daa1d..c7c6395 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,7 +35,8 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#ifdef __SUBARCH_HAS_LWSYNC
+/* The sub-arch has lwsync */
+#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
# define SMPWMB LWSYNC
#else
# define SMPWMB eieio
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 67c5475..4b54230 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -11,6 +11,12 @@
#define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 9
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 46
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
@@ -34,6 +40,14 @@
#define H_PAGE_COMBO 0x0
#define H_PTE_FRAG_NR 0
#define H_PTE_FRAG_SIZE_SHIFT 0
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT0 0
+#define H_PTE_PKEY_BIT1 0
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+
/*
* On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 3bcf269..cc82745 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,10 +4,16 @@
#define H_PTE_INDEX_SIZE 8
#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 7
+#define H_PUD_INDEX_SIZE 10
#define H_PGD_INDEX_SIZE 8
/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 49
+
+/*
* 64k aligned address free up few of the lower bits of RPN for us
* We steal that here. For more deatils look at pte_pfn/pfn_pte()
*/
@@ -16,6 +22,13 @@
#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */
#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
+/* memory key bits. */
+#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
@@ -25,15 +38,13 @@
/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
/*
- * we support 16 fragments per PTE page of 64K size.
- */
-#define H_PTE_FRAG_NR 16
-/*
* We use a 2K PTE page fragment and another 2K for storing
* real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
*/
-#define H_PTE_FRAG_SIZE_SHIFT 12
-#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+#define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
#ifndef __ASSEMBLY__
#include <asm/errno.h>
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 935adcd..cc8cd65 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -212,7 +212,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
extern void hash__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
-int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid);
int hash__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 37671feb..5094696 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -80,8 +80,29 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
typedef struct {
- mm_context_id_t id;
+ union {
+ /*
+ * We use id as the PIDR content for radix. On hash we can use
+ * more than one id. The extended ids are used when we start
+ * having address above 512TB. We allocate one extended id
+ * for each 512TB. The new id is then used with the 49 bit
+ * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+ * from EA and new context ids to build the new VAs.
+ */
+ mm_context_id_t id;
+ mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+ };
u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
@@ -94,9 +115,18 @@ typedef struct {
struct npu_context *npu_context;
#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+# endif
+ struct slice_mask mask_4k;
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+# endif
#else
u16 sllp; /* SLB page size encoding */
#endif
@@ -177,5 +207,25 @@ extern void radix_init_pseries(void);
static inline void radix_init_pseries(void) { };
#endif
+static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+{
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+ return ctx->extended_id[index];
+
+ /* should never happen */
+ WARN_ON(1);
+ return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+ unsigned long ea, int ssize)
+{
+ unsigned long context = get_ea_context(ctx, ea);
+
+ return get_vsid(context, ea, ssize);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 4746bc6..558a159 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -80,8 +80,18 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
+ /*
+ * With hugetlb, we don't clear the second half of the page table.
+ * If we share the same slab cache with the pmd or pud level table,
+ * we need to make sure we zero out the full table on alloc.
+ * With 4K we don't store slot in the second half. Hence we don't
+ * need to do this for 4k.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+ ((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) || \
+ (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX))
memset(pgd, 0, PGD_TABLE_SIZE);
-
+#endif
return pgd;
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a6b9f1d..47b5ffc 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -60,25 +60,6 @@
/* Max physical address bit as per radix table */
#define _RPAGE_PA_MAX 57
-#ifdef CONFIG_PPC_MEM_KEYS
-#ifdef CONFIG_PPC_64K_PAGES
-#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
-#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
-#else /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */
-#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */
-#endif /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
-#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
-#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
-#else /* CONFIG_PPC_MEM_KEYS */
-#define H_PTE_PKEY_BIT0 0
-#define H_PTE_PKEY_BIT1 0
-#define H_PTE_PKEY_BIT2 0
-#define H_PTE_PKEY_BIT3 0
-#define H_PTE_PKEY_BIT4 0
-#endif /* CONFIG_PPC_MEM_KEYS */
-
/*
* Max physical address bit we will use for now.
*
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index a61aa9c..ca366ec 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -9,5 +9,10 @@
#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
#define RADIX_PUD_INDEX_SIZE 9
#define RADIX_PGD_INDEX_SIZE 13
+/*
+ * One fragment per per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index c7e71ba..8300824 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -10,4 +10,10 @@
#define RADIX_PUD_INDEX_SIZE 9
#define RADIX_PGD_INDEX_SIZE 13
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 365010f..705193e 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -313,7 +313,7 @@ static inline unsigned long radix__get_tree_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int radix__create_section_mapping(unsigned long start, unsigned long end);
+int radix__create_section_mapping(unsigned long start, unsigned long end, int nid);
int radix__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644
index 0000000..db0deda
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT 28
+#define SLICE_LOW_TOP (0x100000000ul)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT 40
+#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+
+#else /* CONFIG_PPC_MM_SLICES */
+
+#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
+#define slice_set_user_psize(mm, psize) \
+do { \
+ (mm)->context.user_psize = (psize); \
+ (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
+} while (0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index b77f036..11843e3 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start,
#ifdef CONFIG_PPC64
extern void flush_dcache_range(unsigned long start, unsigned long stop);
extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
#endif
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 2e2bacb..931dda8 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { }
/* CPU kernel features */
-/* Retain the 32b definitions all use bottom half of word */
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001)
-#define CPU_FTR_L2CR ASM_CONST(0x00000002)
-#define CPU_FTR_SPEC7450 ASM_CONST(0x00000004)
-#define CPU_FTR_ALTIVEC ASM_CONST(0x00000008)
-#define CPU_FTR_TAU ASM_CONST(0x00000010)
-#define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020)
-#define CPU_FTR_USE_TB ASM_CONST(0x00000040)
-#define CPU_FTR_L2CSR ASM_CONST(0x00000080)
-#define CPU_FTR_601 ASM_CONST(0x00000100)
-#define CPU_FTR_DBELL ASM_CONST(0x00000200)
-#define CPU_FTR_CAN_NAP ASM_CONST(0x00000400)
-#define CPU_FTR_L3CR ASM_CONST(0x00000800)
-#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000)
-#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000)
-#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000)
-#define CPU_FTR_NO_DPM ASM_CONST(0x00008000)
-#define CPU_FTR_476_DD2 ASM_CONST(0x00010000)
-#define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000)
-#define CPU_FTR_NO_BTIC ASM_CONST(0x00040000)
-#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000)
-#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000)
-#define CPU_FTR_PPC_LE ASM_CONST(0x00200000)
-#define CPU_FTR_REAL_LE ASM_CONST(0x00400000)
-#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000)
-#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000)
-#define CPU_FTR_SPE ASM_CONST(0x02000000)
-#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000)
-#define CPU_FTR_LWSYNC ASM_CONST(0x08000000)
-#define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000)
-#define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000)
-#define CPU_FTR_EMB_HV ASM_CONST(0x40000000)
+#define CPU_FTR_ALTIVEC ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010)
+#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020)
+#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_601 ASM_CONST(0x00001000)
+#define CPU_FTR_L2CR ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
+#define CPU_FTR_TAU ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
+#define CPU_FTR_USE_RTC ASM_CONST(0x00020000)
+#define CPU_FTR_L3CR ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2 ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
+#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000)
+#define CPU_FTR_SPE ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
+
+#else /* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_601 (0)
+#define CPU_FTR_PPC_LE (0)
+#endif
/*
- * Add the 64-bit processor unique features in the top half of the word;
+ * Definitions for the 64-bit processor unique features;
* on 32-bit, make the names available but defined to be 0.
*/
#ifdef __powerpc64__
@@ -174,38 +181,40 @@ static inline void cpu_feature_keys_init(void) { }
#define LONG_ASM_CONST(x) 0
#endif
-#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000)
-#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000)
-#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000)
-#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000)
-#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000)
-#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000)
-#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000)
-#define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000)
-#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000)
-#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000)
-#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
-#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000)
-#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
-#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
-#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
-#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000)
-#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
-#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
-#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000)
-#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
-#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000)
+#define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000)
+#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000)
+#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000)
+#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000)
+#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
#ifndef __ASSEMBLY__
@@ -286,21 +295,19 @@ static inline void cpu_feature_keys_init(void) { }
#endif
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
-#define CPU_FTRS_603 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
-#define CPU_FTRS_604 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
+#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_740 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750CL (CPU_FTRS_750)
@@ -309,125 +316,114 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
#define CPU_FTRS_750GX (CPU_FTRS_750FX)
#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
-#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE)
#define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
+ CPU_FTR_MAYBE_CAN_NAP)
#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON)
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
-#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON)
+#define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
CPU_FTR_INDEXED_DCR)
#define CPU_FTRS_47X (CPU_FTRS_440x6)
-#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
+#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
CPU_FTR_DEBUG_LVL_EXC)
-#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
/*
* e5500/e6500 erratum A-006958 is a timebase bug that can use the
* same workaround as CPU_FTR_CELL_TB_BUG.
*/
-#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
-#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
-#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
- CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
-#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
CPU_FTR_HVMODE | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -435,7 +431,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
CPU_FTR_DABRX)
-#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -444,7 +440,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
-#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -456,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
-#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -464,33 +460,45 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
- CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
- CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG)
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+ CPU_FTR_P9_TLBIE_BUG)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
-#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+ CPU_FTR_P9_TM_HV_ASSIST | \
+ CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
-#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
-#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2)
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500)
#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
#define CPU_FTRS_POSSIBLE \
- (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+ CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+ CPU_FTRS_POWER9_DD2_2)
+#else
+#define CPU_FTRS_POSSIBLE \
+ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
- CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
+ CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+ CPU_FTRS_POWER9_DD2_2)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
@@ -537,12 +545,19 @@ enum {
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500)
#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+ (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
+ CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#else
#define CPU_FTRS_ALWAYS \
- (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+ (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
- CPU_FTRS_POWER9)
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index fc97404..ce5da21 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
void set_breakpoint(struct arch_hw_breakpoint *brk);
void __set_breakpoint(struct arch_hw_breakpoint *brk);
+bool ppc_breakpoint_available(void);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
extern void do_send_trap(struct pt_regs *regs, unsigned long address,
unsigned long error_code, int brkpt);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fd37cc1..c2266ca 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
+static inline bool eeh_state_active(int state)
+{
+ return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+ == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
+
typedef void *(*eeh_traverse_func)(void *data, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 1e551a2..9884e87 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -34,7 +34,8 @@ struct eeh_event {
int eeh_event_init(void);
int eeh_send_failure_event(struct eeh_pe *pe);
void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
#endif /* __KERNEL__ */
#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 9086324..d3a7e36 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
- unsigned long register r0 asm("r0");
- unsigned long register r3 asm("r3") = in[0];
- unsigned long register r4 asm("r4") = in[1];
- unsigned long register r5 asm("r5") = in[2];
- unsigned long register r6 asm("r6") = in[3];
- unsigned long register r7 asm("r7") = in[4];
- unsigned long register r8 asm("r8") = in[5];
- unsigned long register r9 asm("r9") = in[6];
- unsigned long register r10 asm("r10") = in[7];
- unsigned long register r11 asm("r11") = nr;
- unsigned long register r12 asm("r12");
+ register unsigned long r0 asm("r0");
+ register unsigned long r3 asm("r3") = in[0];
+ register unsigned long r4 asm("r4") = in[1];
+ register unsigned long r5 asm("r5") = in[2];
+ register unsigned long r6 asm("r6") = in[3];
+ register unsigned long r7 asm("r7") = in[4];
+ register unsigned long r8 asm("r8") = in[5];
+ register unsigned long r9 asm("r9") = in[6];
+ register unsigned long r10 asm("r10") = in[7];
+ register unsigned long r11 asm("r11") = nr;
+ register unsigned long r12 asm("r12");
asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 1a4847f..78540c0 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
void flush_dcache_icache_hugepage(struct page *page);
-#if defined(CONFIG_PPC_MM_SLICES)
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len);
-#else
+
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
unsigned long len)
{
+ if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+ return slice_is_hugepage_only_range(mm, addr, len);
return 0;
}
-#endif
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
@@ -118,12 +118,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long ceiling);
/*
- * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
- * to override the version in mm/hugetlb.c
- */
-#define vma_mmu_pagesize vma_mmu_pagesize
-
-/*
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index eca3f9c..2e2ddda 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -88,6 +88,7 @@
#define H_P8 -61
#define H_P9 -62
#define H_TOO_BIG -64
+#define H_UNSUPPORTED -67
#define H_OVERLAP -68
#define H_INTERRUPT -69
#define H_BAD_DATA -70
@@ -337,6 +338,9 @@
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ac6432d..8e7b097 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void arch_unregister_hw_breakpoint(struct perf_event *bp);
void hw_breakpoint_pmu_read(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
@@ -79,9 +80,11 @@ static inline void hw_breakpoint_disable(void)
brk.address = 0;
brk.type = 0;
brk.len = 0;
- __set_breakpoint(&brk);
+ if (ppc_breakpoint_available())
+ __set_breakpoint(&brk);
}
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 422f99c..af07492 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -33,8 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev;
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
-#include <asm-generic/iomap.h>
-
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
@@ -663,6 +661,8 @@ static inline void name at \
#define writel_relaxed(v, addr) writel(v, addr)
#define writeq_relaxed(v, addr) writeq(v, addr)
+#include <asm-generic/iomap.h>
+
#ifdef CONFIG_PPC32
#define mmiowb()
#else
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e8e3a0a..ee39ce5 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -66,6 +66,7 @@ extern void irq_ctx_init(void);
extern void call_do_softirq(struct thread_info *tp);
extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
extern void do_IRQ(struct pt_regs *regs);
+extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
int irq_choose_cpu(const struct cpumask *mask);
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index c6d3078..b8b0be8 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return true;
}
+extern void arch_irq_work_raise(void);
#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 09a802b..a790d5c 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -108,6 +108,8 @@
/* book3s_hv */
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500
+
/*
* Special trap used to indicate to host that this is a
* passthrough interrupt that could not be handled
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 376ae80..4c02a73 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 998f7b7..c424e44 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
set_bit_le(i, map);
}
+static inline u64 sanitize_msr(u64 msr)
+{
+ msr &= ~MSR_HV;
+ msr |= MSR_ME;
+ return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cr = vcpu->arch.cr_tm;
+ vcpu->arch.xer = vcpu->arch.xer_tm;
+ vcpu->arch.lr = vcpu->arch.lr_tm;
+ vcpu->arch.ctr = vcpu->arch.ctr_tm;
+ vcpu->arch.amr = vcpu->arch.amr_tm;
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+ vcpu->arch.tar = vcpu->arch.tar_tm;
+ memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
+ sizeof(vcpu->arch.gpr));
+ vcpu->arch.fp = vcpu->arch.fp_tm;
+ vcpu->arch.vr = vcpu->arch.vr_tm;
+ vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cr_tm = vcpu->arch.cr;
+ vcpu->arch.xer_tm = vcpu->arch.xer;
+ vcpu->arch.lr_tm = vcpu->arch.lr;
+ vcpu->arch.ctr_tm = vcpu->arch.ctr;
+ vcpu->arch.amr_tm = vcpu->arch.amr;
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+ vcpu->arch.tar_tm = vcpu->arch.tar;
+ memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
+ sizeof(vcpu->arch.gpr));
+ vcpu->arch.fp_tm = vcpu->arch.fp;
+ vcpu->arch.vr_tm = vcpu->arch.vr;
+ vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index ab386af..d978fdf 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -119,6 +119,7 @@ struct kvmppc_host_state {
u8 host_ipi;
u8 ptid; /* thread number within subcore when split */
u8 tid; /* thread number within whole core */
+ u8 fake_suspend;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
void __iomem *xics_phys;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1f53b56..17498e9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -60,7 +60,6 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
-extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -610,6 +609,7 @@ struct kvm_vcpu_arch {
u64 tfhar;
u64 texasr;
u64 tfiar;
+ u64 orig_texasr;
u32 cr_tm;
u64 xer_tm;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 336a91a..5ceb4ef 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -61,6 +61,11 @@ static inline unsigned int kvm_arch_para_features(void)
return r;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
static inline bool kvm_check_and_clear_guest_paused(void)
{
return false;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7765a80..abe7032 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -295,7 +295,6 @@ struct kvmppc_ops {
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
const struct kvm_memory_slot *new);
- int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
unsigned long end);
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -436,15 +435,15 @@ struct openpic;
extern void kvm_cma_reserve(void) __init;
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
- paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
+ paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
}
static inline void kvmppc_set_xive_tima(int cpu,
unsigned long phys_addr,
void __iomem *virt_addr)
{
- paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
- paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
}
static inline u32 kvmppc_get_xics_latch(void)
@@ -458,7 +457,7 @@ static inline u32 kvmppc_get_xics_latch(void)
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{
- paca[cpu].kvm_hstate.host_ipi = host_ipi;
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index d0a2a2f..7c23ce8 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -34,16 +34,19 @@
#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+#include <asm/firmware.h>
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS 1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
*/
struct lppaca {
/* cacheline 1 contains read-only data */
@@ -97,13 +100,11 @@ struct lppaca {
__be32 page_ins; /* CMO Hint - # page ins by OS */
u8 reserved11[148];
- volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
+ volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
u8 reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+} ____cacheline_aligned;
-#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr)
+#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
/*
* We are using a non architected field to determine if a partition is
@@ -114,6 +115,8 @@ extern struct lppaca lppaca[];
static inline bool lppaca_shared_proc(struct lppaca *l)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
}
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 2f806e3..4f54775 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -186,11 +186,32 @@
#define M_APG2 0x00000040
#define M_APG3 0x00000060
+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/nohash/32/slice.h>
+#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
#ifndef __ASSEMBLY__
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, 0);
+};
+
typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+ u16 user_psize; /* page size index */
+ unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned char high_slices_psize[0];
+ unsigned long slb_addr_limit;
+ struct slice_mask mask_base_psize; /* 4k or 16k */
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_512k;
+ struct slice_mask mask_8m;
+# endif
+#endif
} mm_context_t;
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb38312..61d15ce 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -111,9 +111,9 @@
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
-#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
-#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
-#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
#define MMU_FTRS_POWER7 MMU_FTRS_POWER6
#define MMU_FTRS_POWER8 MMU_FTRS_POWER6
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 3a15b6d..1835ca1 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ int context_id;
+
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ context_id = hash__alloc_context_id();
+ if (context_id < 0)
+ return context_id;
+
+ VM_WARN_ON(mm->context.extended_id[index]);
+ mm->context.extended_id[index] = context_id;
+ return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ int context_id;
+
+ context_id = get_ea_context(&mm->context, ea);
+ if (!context_id)
+ return true;
+ return false;
+}
+
#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ /* non book3s_64 should never find this called */
+ WARN_ON(1);
+ return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ return false;
+}
#endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
new file mode 100644
index 0000000..777d62e
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H
+#define _ASM_POWERPC_NOHASH_32_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT 26 /* 64 slices */
+#define SLICE_LOW_TOP (0x100000000ull)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT 0
+#define SLICE_NUM_HIGH 0ul
+#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
new file mode 100644
index 0000000..ad0d6e3
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/slice.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
+#define _ASM_POWERPC_NOHASH_64_SLICE_H
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr) MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr) MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize) do { BUG(); } while (0)
+
+#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 94bd1bf..d886a5b 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,7 +204,9 @@
#define OPAL_NPU_SPA_SETUP 159
#define OPAL_NPU_SPA_CLEAR_CACHE 160
#define OPAL_NPU_TL_SET 161
-#define OPAL_LAST 161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
+#define OPAL_LAST 165
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 12e70fb..7159e1a 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
@@ -323,7 +325,7 @@ struct rtc_time;
extern unsigned long opal_get_boot_time(void);
extern void opal_nvram_init(void);
extern void opal_flash_update_init(void);
-extern void opal_flash_term_callback(void);
+extern void opal_flash_update_print_message(void);
extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b62c310..4185f1c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -32,6 +32,7 @@
#include <asm/accounting.h>
#include <asm/hmi.h>
#include <asm/cpuidle.h>
+#include <asm/atomic.h>
register struct paca_struct *local_paca asm("r13");
@@ -46,7 +47,10 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
#define get_paca() local_paca
#endif
+#ifdef CONFIG_PPC_PSERIES
#define get_lppaca() (get_paca()->lppaca_ptr)
+#endif
+
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
@@ -58,7 +62,7 @@ struct task_struct;
* processor.
*/
struct paca_struct {
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_PSERIES
/*
* Because hw_cpu_id, unlike other paca fields, is accessed
* routinely from other CPUs (from the IRQ code), we stick to
@@ -67,7 +71,8 @@ struct paca_struct {
*/
struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
+
/*
* MAGIC: the spinlock functions in arch/powerpc/lib/locks.c
* load lock_token and paca_index with a single lwz
@@ -141,7 +146,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_BOOK3S
mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
- u64 mm_ctx_low_slices_psize;
+ unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long mm_ctx_slb_addr_limit;
#else
@@ -160,10 +165,14 @@ struct paca_struct {
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
+ u8 soft_enabled; /* irq soft-enable flag */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
u8 nap_state_lost; /* NV GPR values lost in power7_idle */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ u8 pmcregs_in_use; /* pseries puts this in lppaca */
+#endif
u64 sprg_vdso; /* Saved user-visible sprg */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u64 tm_scratch; /* TM scratch area for reclaim */
@@ -177,6 +186,8 @@ struct paca_struct {
u8 thread_mask;
/* Mask to denote subcore sibling threads */
u8 subcore_sibling_mask;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
/*
* Pointer to an array which contains pointer
* to the sibling threads' paca.
@@ -241,18 +252,20 @@ struct paca_struct {
void *rfi_flush_fallback_area;
u64 l1d_flush_size;
#endif
-};
+} ____cacheline_aligned;
extern void copy_mm_to_paca(struct mm_struct *mm);
-extern struct paca_struct *paca;
+extern struct paca_struct **paca_ptrs;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
-extern void allocate_pacas(void);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
extern void free_unused_pacas(void);
#else /* CONFIG_PPC64 */
-static inline void allocate_pacas(void) { };
+static inline void allocate_paca_ptrs(void) { };
+static inline void allocate_paca(int cpu) { };
static inline void free_unused_pacas(void) { };
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8da5d4c..dec9ce5 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -126,7 +126,15 @@ extern long long virt_phys_offset;
#ifdef CONFIG_FLATMEM
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
+#ifndef __ASSEMBLY__
+extern unsigned long max_mapnr;
+static inline bool pfn_valid(unsigned long pfn)
+{
+ unsigned long min_pfn = ARCH_PFN_OFFSET;
+
+ return pfn >= min_pfn && pfn < max_mapnr;
+}
+#endif
#endif
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
@@ -344,5 +352,6 @@ typedef struct page *pgtable_t;
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
+#include <asm/slice.h>
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 56234c6..af04acd 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -86,65 +86,6 @@ extern u64 ppc64_pft_size;
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT 28
-#define SLICE_HIGH_SHIFT 40
-
-#define SLICE_LOW_TOP (0x100000000ul)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-
-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
- unsigned long len,
- unsigned long flags,
- unsigned int psize,
- int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
- unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_BOOK3S_64
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* !CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#define slice_set_range_psize(mm, start, len, psize) \
- slice_set_user_psize((mm), (psize))
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
#define VM_DATA_DEFAULT_FLAGS \
(is_32bit_task() ? \
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index d82802f..401c62a 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -76,10 +76,11 @@ extern int pci_proc_domain(struct pci_bus *bus);
struct vm_area_struct;
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() and it does WC */
-#define HAVE_PCI_MMAP 1
-#define arch_can_pci_mmap_io() 1
-#define arch_can_pci_mmap_wc() 1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP 1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_io() 1
+#define arch_can_pci_mmap_wc() 1
extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
size_t count);
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 723bf48..67a8a95 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -53,6 +53,8 @@ struct power_pmu {
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+ int n_blacklist_ev;
+ int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
};
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 55eddf5..96c1a46 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
#define _ASM_POWERPC_PLPAR_WRAPPERS_H
+#ifdef CONFIG_PPC_PSERIES
+
#include <linux/string.h>
#include <linux/irqflags.h>
@@ -9,14 +11,6 @@
#include <asm/paca.h>
#include <asm/page.h>
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
static inline long poll_pending(void)
{
return plpar_hcall_norets(H_POLL_PENDING);
@@ -311,17 +305,17 @@ static inline long enable_little_endian_exceptions(void)
return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
}
-static inline long plapr_set_ciabr(unsigned long ciabr)
+static inline long plpar_set_ciabr(unsigned long ciabr)
{
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
}
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
{
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
}
-static inline long plapr_signal_sys_reset(long cpu)
+static inline long plpar_signal_sys_reset(long cpu)
{
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
}
@@ -340,4 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
return rc;
}
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index 5a9ede4..7ac3586 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -31,10 +31,21 @@ void ppc_enable_pmcs(void);
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/lppaca.h>
+#include <asm/firmware.h>
static inline void ppc_set_pmu_inuse(int inuse)
{
- get_lppaca()->pmcregs_in_use = inuse;
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+ get_lppaca()->pmcregs_in_use = inuse;
+#endif
+ } else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ get_paca()->pmcregs_in_use = inuse;
+#endif
+ }
+#endif
}
extern void power4_enable_pmcs(void);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf25..d2d8c28 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
u64 desc);
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
unsigned int virq);
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index dc5f6a5..d1c2d2e6 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
}
static inline void pnv_tm_init(void) { }
+static inline void pnv_power9_force_smt4(void) { }
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index f1083bc..18883b8 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -232,6 +232,7 @@
#define PPC_INST_MSGSYNC 0x7c0006ec
#define PPC_INST_MSGSNDP 0x7c00011c
#define PPC_INST_MSGCLRP 0x7c00015c
+#define PPC_INST_MTMSRD 0x7c000164
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
#define PPC_INST_PASTE 0x7c20070d
@@ -239,8 +240,10 @@
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
#define PPC_INST_POPCNTD 0x7c0003f4
#define PPC_INST_POPCNTW 0x7c0002f4
+#define PPC_INST_RFEBB 0x4c000124
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
+#define PPC_INST_RFID 0x4c000024
#define PPC_INST_RFMCI 0x4c00004c
#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
@@ -271,12 +274,14 @@
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_VPERMXOR 0x1000002d
#define PPC_INST_XXLOR 0xf0000490
#define PPC_INST_XXSWAPD 0xf0000250
#define PPC_INST_XVCPSGNDP 0xf0000780
#define PPC_INST_TRECHKPT 0x7c0007dd
#define PPC_INST_TRECLAIM 0x7c00075d
#define PPC_INST_TABORT 0x7c00071d
+#define PPC_INST_TSR 0x7c0005dd
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
@@ -517,6 +522,11 @@
#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
VSX_XX3((t), (a), (b))))
+#define VPERMXOR(vrt, vra, vrb, vrc) \
+ stringify_in_c(.long (PPC_INST_VPERMXOR | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ae94b36..13f7f4c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -439,14 +439,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
/* The following stops all load and store data streams associated with stream
* ID (ie. streams created explicitly). The embedded and server mnemonics for
- * dcbt are different so we use machine "power4" here explicitly.
+ * dcbt are different so this must only be used for server.
*/
-#define DCBT_STOP_ALL_STREAM_IDS(scratch) \
-.machine push ; \
-.machine "power4" ; \
- lis scratch,0x60000000@h; \
- dcbt 0,scratch,0b01010; \
-.machine pop
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch) \
+ lis scratch,0x60000000@h; \
+ dcbt 0,scratch,0b01010
/*
* toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 01299cdc..c4b36a4 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -109,6 +109,13 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_64TB (0x0000400000000000UL)
#define TASK_SIZE_128TB (0x0000800000000000UL)
#define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB (0x0004000000000000UL)
+#define TASK_SIZE_2PB (0x0008000000000000UL)
+/*
+ * With 52 bits in the address we can support
+ * upto 4PB of range.
+ */
+#define TASK_SIZE_4PB (0x0010000000000000UL)
/*
* For now 512TB is only supported with book3s and 64K linux page size.
@@ -117,11 +124,17 @@ void release_thread(struct task_struct *);
/*
* Max value currently used:
*/
-#define TASK_SIZE_USER64 TASK_SIZE_512TB
+#define TASK_SIZE_USER64 TASK_SIZE_4PB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
#else
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
#endif
/*
@@ -505,6 +518,7 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
extern void power7_idle_type(unsigned long type);
extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern unsigned long power9_offline_stop(unsigned long psscr_val);
extern void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e6c7ead..cb0f272 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -156,6 +156,8 @@
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -237,7 +239,12 @@
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
+#define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */
+#define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */
+#define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */
+#define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
+#define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 0000000..fa4d2e1
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern unsigned long powerpc_security_features;
+extern bool rfi_flush;
+
+static inline void security_ftr_set(unsigned long feature)
+{
+ powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(unsigned long feature)
+{
+ powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(unsigned long feature)
+{
+ return !!(powerpc_security_features & feature);
+}
+
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull
+
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull
+
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+ (SEC_FTR_L1D_FLUSH_HV | \
+ SEC_FTR_L1D_FLUSH_PR | \
+ SEC_FTR_BNDS_CHK_SPEC_BAR | \
+ SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 469b7fd..27fa52e 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,6 +23,7 @@ extern void reloc_got2(unsigned long);
#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
void check_for_initrd(void);
+void mem_topology_setup(void);
void initmem_init(void);
void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
@@ -49,7 +50,7 @@ enum l1d_flush_type {
L1D_FLUSH_MTTRIG = 0x8,
};
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
new file mode 100644
index 0000000..e40406cf5
--- /dev/null
+++ b/arch/powerpc/include/asm/slice.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_H
+#define _ASM_POWERPC_SLICE_H
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/slice.h>
+#elif defined(CONFIG_PPC64)
+#include <asm/nohash/64/slice.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/32/slice.h>
+#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index fac963e..cfecfee 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -31,6 +31,7 @@
extern int boot_cpuid;
extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
extern void cpu_die(void);
extern int cpu_to_chip_id(int cpu);
@@ -170,12 +171,12 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
#ifdef CONFIG_PPC64
static inline int get_hard_smp_processor_id(int cpu)
{
- return paca[cpu].hw_cpu_id;
+ return paca_ptrs[cpu]->hw_cpu_id;
}
static inline void set_hard_smp_processor_id(int cpu, int phys)
{
- paca[cpu].hw_cpu_id = phys;
+ paca_ptrs[cpu]->hw_cpu_id = phys;
}
#else
/* 32-bit */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index a7916ee..bc66712 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,7 +17,7 @@
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
+extern int create_section_mapping(unsigned long start, unsigned long end, int nid);
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index b9ebc30..72dc4dd 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,6 +56,8 @@
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
}
#endif
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index c3ca42c..be8c9fa 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void)
msr_check_and_clear(MSR_FP);
}
#else
-static inline void __giveup_fpu(struct task_struct *t) { }
static inline void save_fpu(struct task_struct *t) { }
static inline void flush_fp_to_thread(struct task_struct *t) { }
#endif
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 63e7f5a..6ec5460 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -6,10 +6,6 @@
#include <linux/stringify.h>
#include <asm/feature-fixups.h>
-#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
-#define __SUBARCH_HAS_LWSYNC
-#endif
-
#ifndef __ASSEMBLY__
extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 4a12c00..5964145db 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void)
return (struct thread_info *)val;
}
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index b240666..db546c0 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm);
extern void tick_broadcast_ipi_handler(void);
extern void generic_calibrate_decr(void);
+extern void hdec_interrupt(struct pt_regs *regs);
/* Some sane defaults: 125 MHz timebase, 1GHz processor */
extern unsigned long ppc_proc_freq;
@@ -46,7 +47,7 @@ struct div_result {
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
#ifdef CONFIG_6xx
-#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB))
+#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC))
#else
#define __USE_RTC() 0
#endif
@@ -204,6 +205,7 @@ struct cpu_usage {
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 51bfeb8..a62ee66 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -47,9 +47,13 @@
#else
-#define __access_ok(addr, size, segment) \
- (((addr) <= (segment).seg) && \
- (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr)))))
+static inline int __access_ok(unsigned long addr, unsigned long size,
+ mm_segment_t seg)
+{
+ if (addr > seg.seg)
+ return 0;
+ return (size == 0 || size - 1 <= seg.seg - addr);
+}
#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2358f97..2b4c40b2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ea5eb91..6bee65f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -221,12 +221,17 @@ int main(void)
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXSLB, paca_struct, exslb);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_PSERIES
OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+#endif
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
@@ -568,6 +573,7 @@ int main(void)
OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+ OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
@@ -650,6 +656,7 @@ int main(void)
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_TID, tid);
+ HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -759,6 +766,7 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+ OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
STOP_SPR(STOP_PID, pid);
STOP_SPR(STOP_LDBAR, ldbar);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index c5e5a94..a9f3970 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -226,7 +226,7 @@ BEGIN_FTR_SECTION
beq 1f
END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
lwz r6,CPU_SPEC_FEATURES(r4)
- andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
+ andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h
beq 1f
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 462aed9..8d142e5 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500)
* the feature on the primary core, avoid doing it on the
* secondary core.
*/
- andis. r6, r3, CPU_FTR_EMB_HV@h
+ andi. r6, r3, CPU_FTR_EMB_HV
beq 2f
rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
stw r3, CPU_SPEC_FEATURES(r4)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c40a9fc..c8fc969 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void);
static struct cpu_spec __initdata cpu_specs[] = {
#ifdef CONFIG_PPC_BOOK3S_64
- { /* Power4 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00350000,
- .cpu_name = "POWER4 (gp)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
- { /* Power4+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00380000,
- .cpu_name = "POWER4+ (gq)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
{ /* PPC970 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00390000,
@@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
- { /* Power9 DD 2.1 or later (see DD2.0 above) */
+ { /* Power9 DD 2.1 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0201,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.2 or later */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_features = CPU_FTRS_POWER9_DD2_2,
.cpu_user_features = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features = MMU_FTRS_POWER9,
@@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
- .cpu_name = "POWER4 (compatible)",
+ .cpu_name = "POWER5 (compatible)",
.cpu_features = CPU_FTRS_COMPATIBLE,
.cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+ .mmu_features = MMU_FTRS_POWER,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
- .platform = "power4",
+ .platform = "power5",
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 00b2151..17c8b99 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
if (i == cpu)
continue;
- while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
break;
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8ca5d5b7..e88fbb1 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -54,8 +54,7 @@ struct dt_cpu_feature {
};
#define CPU_FTRS_BASE \
- (CPU_FTR_USE_TB | \
- CPU_FTR_LWSYNC | \
+ (CPU_FTR_LWSYNC | \
CPU_FTR_FPU_UNAVAILABLE |\
CPU_FTR_NODSISRALIGN |\
CPU_FTR_NOEXECUTE |\
@@ -84,6 +83,7 @@ static int hv_mode;
static struct {
u64 lpcr;
+ u64 lpcr_clear;
u64 hfscr;
u64 fscr;
} system_registers;
@@ -92,6 +92,8 @@ static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
+ u64 lpcr;
+
/*
* LPCR is restored by the power on engine already. It can be changed
* after early init e.g., by radix enable, and we have no unified API
@@ -104,8 +106,10 @@ static void __restore_cpu_cpufeatures(void)
* The best we can do to accommodate secondary boot and idle restore
* for now is "or" LPCR with existing.
*/
-
- mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= system_registers.lpcr;
+ lpcr &= ~system_registers.lpcr_clear;
+ mtspr(SPRN_LPCR, lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
@@ -325,8 +329,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
lpcr = mfspr(SPRN_LPCR);
- lpcr &= ~LPCR_ISL;
+ lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
@@ -590,6 +595,8 @@ static struct dt_cpu_feature_match __initdata
{"virtual-page-class-key-protection", feat_enable, 0},
{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
{"transactional-memory-v3", feat_enable_tm, 0},
+ {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+ {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
{"idle-nap", feat_enable_idle_nap, 0},
{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
{"idle-stop", feat_enable_idle_stop, 0},
@@ -707,11 +714,28 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ ; /* DD2.0 has no feature flag */
else if ((version & 0xffffefff) == 0x004e0201)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ } else /* DD2.1 and up have DD2_1 */
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- if ((version & 0xffff0000) == 0x004e0000)
+ if ((version & 0xffff0000) == 0x004e0000) {
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+ }
+
+ /*
+ * PKEY was not in the initial base or feature node
+ * specification, but it should become optional in the next
+ * cpu feature version sequence.
+ */
+ cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 2b9df00..bc640e4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Check PHB state */
ret = eeh_ops->get_state(phb_pe, NULL);
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
ret = 0;
goto out;
}
@@ -433,7 +431,6 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
@@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* state, PE is in good state.
*/
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- ((ret & active_flags) == active_flags)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
@@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
- if (ret > 0 &&
- (ret & active_flags) != active_flags)
+ if (ret > 0 && !eeh_state_active(ret))
pe = parent_pe;
/* Next parent level */
@@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
*/
int eeh_pe_reset_full(struct eeh_pe *pe)
{
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
@@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
/* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((state & active_flags) == active_flags)
+ if (eeh_state_active(state))
break;
if (state < 0) {
@@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
struct pci_device_id *id;
- int flags, ret;
+ int ret;
/* Check PE state */
- flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
ret = eeh_ops->get_state(pe, NULL);
if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
return 0;
/* Unfrozen PE, nothing to do */
- if ((ret & flags) == flags)
+ if (eeh_state_active(ret))
return 0;
/* Frozen PE, check if it needs PE level reset */
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index d4cc266..201943d 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
- * that implements this address. Be sure to pci_dev_put the device
- * when finished. I/O port numbers are assumed to be offset
+ * that implements this address. I/O port numbers are assumed to be offset
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0c0b66f..b8a329f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
edev->in_error = true;
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+ device_lock(&dev->dev);
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
if (!driver->err_handler ||
!driver->err_handler->mmio_enabled ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (edev->mode & EEH_DEV_NO_HANDLER))
+ goto out;
rc = driver->err_handler->mmio_enabled(dev);
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_enable_irq(dev);
if (!driver->err_handler ||
!driver->err_handler->slot_reset ||
(edev->mode & EEH_DEV_NO_HANDLER) ||
- (!edev->in_error)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (!edev->in_error))
+ goto out;
rc = driver->err_handler->slot_reset(dev);
if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
was_in_error = edev->in_error;
edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
!driver->err_handler->resume ||
(edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
edev->mode &= ~EEH_DEV_NO_HANDLER;
- eeh_pcid_put(dev);
- return NULL;
+ goto out;
}
driver->err_handler->resume(dev);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+ eeh_pcid_put(dev);
#ifdef CONFIG_PCI_IOV
if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
#endif
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
/**
* eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
* @pe: EEH PE
* @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
- struct eeh_rmv_data *rmv_data)
+ struct eeh_rmv_data *rmv_data,
+ bool driver_eeh_aware)
{
- struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- } else {
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- }
- } else if (frozen_bus) {
+ if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+ } else {
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
}
/*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (bus) {
- pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ if (!driver_eeh_aware || rmv_data->removed) {
+ pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+ (driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
/*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ if (!driver_eeh_aware)
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_hp_add_devices(bus);
}
- } else if (frozen_bus && rmv_data->removed) {
- pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
- ssleep(5);
-
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- if (pe->type & EEH_PE_VF)
- eeh_add_virt_device(edev, NULL);
- else
- pci_hp_add_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
*
* Attempts to recover the given PE. If recovery fails or the PE has failed
* too many times, remove the PE.
*
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
*/
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
{
- struct pci_bus *frozen_bus;
+ struct pci_bus *bus;
struct eeh_dev *edev, *tmp;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
- frozen_bus = eeh_pe_bus_get(pe);
- if (!frozen_bus) {
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
- return false;
+ return;
}
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus, NULL);
+ rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL, &rmv_data);
+ rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return false;
+ goto final;
hard_fail:
/*
@@ -916,23 +936,21 @@ hard_fail:
* all removed devices correctly to avoid access
* the their PCI config any more.
*/
- if (frozen_bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
- pci_lock_rescan_remove();
- pci_hp_remove_devices(frozen_bus);
- pci_unlock_rescan_remove();
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- /* The passed PE should no longer be used */
- return true;
- }
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
- return false;
+final:
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
}
/**
@@ -942,7 +960,7 @@ hard_fail:
* specific PE. Iterates through possible failures and handles them as
* necessary.
*/
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- /*
- * eeh_handle_normal_event() can make the PE stale if it
- * determines that the PE cannot possibly be recovered.
- * Don't modify the PE state if that's the case.
- */
- if (eeh_handle_normal_event(pe))
- continue;
-
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
break;
} while (rc != EEH_NEXT_ERR_NONE);
}
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
- if (pe)
- eeh_handle_normal_event(pe);
- else
- eeh_handle_special_event();
-}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index accbf8b..61c9356 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy)
/* We might have event without binding PE */
pe = event->pe;
if (pe) {
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
if (pe->type & EEH_PE_PHB)
pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
@@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy)
pr_info("EEH: Detected PCI bus error on "
"PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
- eeh_handle_event(pe);
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
- eeh_handle_event(NULL);
+ eeh_handle_special_event();
}
kfree(event);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb5109..5169560 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -545,7 +545,7 @@ _GLOBAL(_switch)
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
*/
- DCBT_STOP_ALL_STREAM_IDS(r6)
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
#endif
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ecfd8f..ae6a849 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common)
b pnv_powersave_wakeup
#endif
+/*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+#define ADD_RECONCILE_NMI \
+ li r10,IRQS_ALL_DISABLED; \
+ stb r10,PACAIRQSOFTMASK(r13); \
+ lbz r10,PACAIRQHAPPENED(r13); \
+ std r10,_DAR(r1)
+
EXC_COMMON_BEGIN(system_reset_common)
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
@@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
system_reset, system_reset_exception,
- ADD_NVGPRS;ADD_RECONCILE)
+ ADD_NVGPRS;ADD_RECONCILE_NMI)
+
+ /* This (and MCE) can be simplified with mtmsrd L=1 */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r0,MSR_RI
+ mfmsr r9
+ andc r9,r9,r0
+ mtmsrd r9,1
/*
- * The stack is no longer in use, decrement in_nmi.
+ * MSR_RI is clear, now we can decrement paca->in_nmi.
*/
lhz r10,PACA_IN_NMI(r13)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- b ret_from_except
+ /*
+ * Restore soft mask settings.
+ */
+ ld r10,_DAR(r1)
+ stb r10,PACAIRQHAPPENED(r13)
+ ld r10,SOFTE(r1)
+ stb r10,PACAIRQSOFTMASK(r13)
+
+ /*
+ * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
+ * Should share common bits...
+ */
+
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_SRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_SRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+ RFI_TO_USER_OR_KERNEL
#ifdef CONFIG_PPC_PSERIES
/*
@@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq- 8f /* if bad address, make full stack frame */
+ /*
+ * Large address, check whether we have to allocate new contexts.
+ */
+ beq- 8f
bne- cr5,2f /* if unrecoverable exception, oops */
@@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bne cr4,1f /* returning to kernel */
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
RFI_TO_USER
b . /* prevent speculative execution */
1:
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mr r3,r12
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,bad_addr_slb)
+ LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
@@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
bl unrecoverable_exception
b 1b
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13)
@@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
std r10, _TRAP(r1)
2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_bad_addr
+ bl slb_miss_large_addr
b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1273,7 +1325,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
bne+ denorm_assist
#endif
- KVMTEST_PR(0x1500)
+ KVMTEST_HV(0x1500)
EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
@@ -1285,7 +1337,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1466,7 +1518,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
@@ -1506,7 +1558,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index a61151a..6eca15f 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -392,19 +392,20 @@ generic_secondary_common_init:
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
- ld r13,0(r13) /* Get base vaddr of paca array */
#ifndef CONFIG_SMP
- addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
b kexec_wait /* wait for next kernel if !SMP */
#else
+ LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
+ ld r8,0(r8) /* Get base vaddr of array */
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
li r5,0 /* logical cpu id */
-1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
+1:
+ sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */
+ lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
- addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
cmpw r5,r7 /* Check if more pacas exist */
blt 1b
@@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
- ld r4,0(r4) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
- add r13,r13,r4 /* for this processor. */
+ LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca_ptrs array */
+ sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */
SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
/* Mark interrupts soft and hard disabled (they might be enabled
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 53b9c1d..4c1012b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -33,6 +33,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
+#include <asm/debug.h>
#include <linux/uaccess.h>
/*
@@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
* 'symbolsize' should satisfy the check below.
*/
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
length_max = 8; /* DABR */
if (cpu_has_feature(CPU_FTR_DAWR)) {
length_max = 512 ; /* 64 doublewords */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 01e1c19..79d0054 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -325,12 +325,6 @@ enter_winkle:
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /* DO THIS IN REAL MODE! See comment above. */
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
@@ -339,6 +333,7 @@ power_enter_stop:
bne .Lhandle_esl_ec_set
PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
+ std r3, PACA_REQ_PSSCR(r13)
/*
* pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
@@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
* r3 contains desired PSSCR register value.
+ *
+ * Offline (CPU unplug) case also must notify KVM that the CPU is
+ * idle.
*/
+_GLOBAL(power9_offline_stop)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+ /* fall through */
+
_GLOBAL(power9_idle_stop)
std r3, PACA_REQ_PSSCR(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+ sync
+ lwz r5, PACA_DONT_STOP(r13)
+ cmpwi r5, 0
+ bne 1f
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+#endif
mtspr SPRN_PSSCR,r3
LOAD_REG_ADDR(r4,power_enter_stop)
b pnv_powersave_common
/* No return */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+1:
+ /*
+ * We get here when TM / thread reconfiguration bug workaround
+ * code wants to get the CPU into SMT4 mode, and therefore
+ * we are being asked not to stop.
+ */
+ li r3, 0
+ std r3, PACA_REQ_PSSCR(r13)
+ blr /* return 0 for wakeup cause / SRR1 value */
+#endif
/*
* On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
@@ -520,6 +551,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r0,HSTATE_HWTHREAD_STATE(r13)
+ cmpwi r0,KVM_HWTHREAD_IN_KERNEL
+ beq 1f
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
@@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71)
mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
+ li r0, 0 /* clear requested_psscr to say we're awake */
+ std r0, PACA_REQ_PSSCR(r13)
cmpd cr4,r5,r4
bge cr4,pnv_wakeup_tb_loss /* returns to caller */
@@ -834,6 +870,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PTCR,r4
ld r4,_RPR(r1)
mtspr SPRN_RPR,r4
+ ld r4,_AMOR(r1)
+ mtspr SPRN_AMOR,r4
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4,_TSCR(r1)
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index aab456e..5ac84ef 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr)
{
return readq(addr);
}
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+ return readq(addr);
+}
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+ return readq(addr);
+}
u64 ioread64be(void __iomem *addr)
{
return readq_be(addr);
}
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+ return readq_be(addr);
+}
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+ return readq_be(addr);
+}
EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
#endif /* __powerpc64__ */
void iowrite8(u8 val, void __iomem *addr)
@@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr)
{
writeq(val, addr);
}
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
void iowrite64be(u64 val, void __iomem *addr)
{
writeq_be(val, addr);
}
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
#endif /* __powerpc64__ */
/*
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ca5d5a0..e4c5bf3 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->nip = orig_ret_address;
+
/*
- * Make LR point to the orig_ret_address.
- * When the 'nop' inside the kretprobe_trampoline
- * is optimized, we can do a 'blr' after executing the
- * detour buffer code.
+ * We get here through one of two paths:
+ * 1. by taking a trap -> kprobe_handler() -> here
+ * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+ *
+ * When going back through (1), we need regs->nip to be setup properly
+ * as it is used to determine the return address from the trap.
+ * For (2), since nip is not honoured with optprobes, we instead setup
+ * the link register properly so that the subsequent 'blr' in
+ * kretprobe_trampoline jumps back to the right instruction.
+ *
+ * For nip, we should set the address to the previous instruction since
+ * we end up emulating it in kprobe_handler(), which increments the nip
+ * again.
*/
+ regs->nip = orig_ret_address - 4;
regs->link = orig_ret_address;
- reset_current_kprobe();
kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+
+ return 0;
}
NOKPROBE_SYMBOL(trampoline_probe_handler);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 49d34d7..1044bf1 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
* are correctly onlined. If somehow we start a CPU on boot with RTAS
* start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
* time, the boot CPU will timeout. If it does eventually execute
- * stuff, the secondary will start up (paca[].cpu_start was written) and
- * get into a peculiar state. If the platform supports
- * smp_ops->take_timebase(), the secondary CPU will probably be spinning
- * in there. If not (i.e. pseries), the secondary will continue on and
- * try to online itself/idle/etc. If it survives that, we need to find
- * these possible-but-not-online-but-should-be CPUs and chaperone them
- * into kexec_smp_wait().
+ * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+ * written) and get into a peculiar state.
+ * If the platform supports smp_ops->take_timebase(), the secondary CPU
+ * will probably be spinning in there. If not (i.e. pseries), the
+ * secondary will continue on and try to online itself/idle/etc. If it
+ * survives that, we need to find these
+ * possible-but-not-online-but-should-be CPUs and chaperone them into
+ * kexec_smp_wait().
*/
for_each_online_cpu(i) {
if (i == my_cpu)
continue;
- while (paca[i].kexec_state < wait_state) {
+ while (paca_ptrs[i]->kexec_state < wait_state) {
barrier();
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter %i state\n",
- i, paca[i].hw_cpu_id, wait_state);
+ i, paca_ptrs[i]->hw_cpu_id, wait_state);
notified = i;
}
}
@@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image)
kexec_stack.thread_info.cpu = current_thread_info()->cpu;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
- * it. Also poison per_cpu_offset to catch anyone using non-static
- * data.
+ * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+ * non-static data.
*/
memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
- paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
- kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+ kexec_paca.lppaca_ptr = NULL;
+#endif
+ paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
setup_paca(&kexec_paca);
- /* XXX: If anyone does 'dynamic lppacas' this will also need to be
- * switched to a static version!
+ /*
+ * The lppaca should be unregistered at this point so the HV won't
+ * touch it. In the case of a crash, none of the lppacas are
+ * unregistered so there is not much we can do about it here.
*/
+
/*
* On Book3S, the copy must happen with the MMU off if we are either
* using Radix page tables or we are not in an LPAR since we can
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index e4395f9..45e0b7d 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -43,7 +43,7 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
fops = kexec_file_loaders[i];
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3280953..fa267e9 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
blr
EXPORT_SYMBOL(flush_dcache_range)
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mfmsr r5 /* Disable MMU Data Relocation */
- ori r0,r5,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- mtmsr r5 /* Re-enable MMU Data Relocation */
- sync
- isync
- blr
-
_GLOBAL(flush_inval_dcache_range)
ld r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 496d639..ba681da 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part,
tmp_index = part->index;
- rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
return rc;
@@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
tmp_index = part->index;
if (part->os_partition) {
- rc = ppc_md.nvram_read((char *)&info,
- sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
return rc;
@@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void)
"detected: 0-length partition\n");
goto out;
}
- tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
err = -ENOMEM;
if (!tmp_part) {
printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 95ffedf..0ee3e6d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -20,116 +20,105 @@
#include "setup.h"
-#ifdef CONFIG_PPC_BOOK3S
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ unsigned long pa;
+ int nid;
+
+ /*
+ * boot_cpuid paca is allocated very early before cpu_to_node is up.
+ * Set bottom-up mode, because the boot CPU should be on node-0,
+ * which will put its paca in the right place.
+ */
+ if (cpu == boot_cpuid) {
+ nid = -1;
+ memblock_set_bottom_up(true);
+ } else {
+ nid = early_cpu_to_node(cpu);
+ }
+
+ pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(size, align, limit);
+ if (!pa)
+ panic("cannot allocate paca data");
+ }
+
+ if (cpu == boot_cpuid)
+ memblock_set_bottom_up(false);
+
+ return __va(pa);
+}
+
+#ifdef CONFIG_PPC_PSERIES
/*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary. The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary. The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
*/
-struct lppaca lppaca[] = {
- [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+ *lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(sizeof(struct lppaca)),
+ .size = cpu_to_be16(0x400),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
- .page_ins = 0,
- },
+ .page_ins = 0, };
};
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
- if (nr_cpus <= NR_LPPACAS)
- return;
-
- lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
- (nr_cpus - NR_LPPACAS));
- extra_lppacas = __va(memblock_alloc_base(lppaca_size,
- PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
+ size_t size = 0x400;
- if (cpu < NR_LPPACAS)
- return &lppaca[cpu];
+ BUILD_BUG_ON(size < sizeof(struct lppaca));
+
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ return NULL;
- lp = extra_lppacas + (cpu - NR_LPPACAS);
- *lp = lppaca[0];
+ lp = alloc_paca_data(size, 0x400, limit, cpu);
+ init_lppaca(lp);
return lp;
}
-
-static void __init free_lppacas(void)
-{
- long new_size = 0, nr;
-
- if (!lppaca_size)
- return;
- nr = num_possible_cpus() - NR_LPPACAS;
- if (nr > 0)
- new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
- if (new_size >= lppaca_size)
- return;
-
- memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
- lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * 3 persistent SLBs are registered here. The buffer will be zero
+ * 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
*
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow * __initdata slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
-{
- int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
-
- if (early_radix_enabled())
- return;
-
- slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
- memset(slb_shadow, 0, size);
-}
-
-static struct slb_shadow * __init init_slb_shadow(int cpu)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
{
struct slb_shadow *s;
- if (early_radix_enabled())
- return NULL;
-
- s = &slb_shadow[cpu];
+ if (cpu != boot_cpuid) {
+ /*
+ * Boot CPU comes here before early_radix_enabled
+ * is parsed (e.g., for disable_radix). So allocate
+ * always and this will be fixed up in free_unused_pacas.
+ */
+ if (early_radix_enabled())
+ return NULL;
+ }
- /*
- * When we come through here to initialise boot_paca, the slb_shadow
- * buffers are not allocated yet. That's OK, we'll get one later in
- * boot, but make sure we don't corrupt memory at 0.
- */
- if (!slb_shadow)
- return NULL;
+ s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+ memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* !CONFIG_PPC_BOOK3S_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
@@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
-#ifdef CONFIG_PPC_BOOK3S
- new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+ new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E
new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
@@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
#ifdef CONFIG_PPC_BOOK3S_64
- new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+ new_paca->slb_shadow_ptr = NULL;
#endif
#ifdef CONFIG_PPC_BOOK3E
@@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca)
}
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+ paca_nr_cpu_ids = nr_cpu_ids;
+
+ paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+ memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
-void __init allocate_pacas(void)
+void __init allocate_paca(int cpu)
{
u64 limit;
- int cpu;
+ struct paca_struct *paca;
+
+ BUG_ON(cpu >= paca_nr_cpu_ids);
#ifdef CONFIG_PPC_BOOK3S_64
/*
@@ -220,40 +219,44 @@ void __init allocate_pacas(void)
limit = ppc64_rma_size;
#endif
- paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
- paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
- memset(paca, 0, paca_size);
-
- printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
- paca_size, nr_cpu_ids, paca);
-
- allocate_lppacas(nr_cpu_ids, limit);
-
- allocate_slb_shadows(nr_cpu_ids, limit);
+ paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+ limit, cpu);
+ paca_ptrs[cpu] = paca;
+ memset(paca, 0, sizeof(struct paca_struct));
- /* Can't use for_each_*_cpu, as they aren't functional yet */
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- initialise_paca(&paca[cpu], cpu);
+ initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+ paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+ paca_struct_size += sizeof(struct paca_struct);
}
void __init free_unused_pacas(void)
{
- int new_size;
-
- new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+ int new_ptrs_size;
- if (new_size >= paca_size)
- return;
+ new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ if (new_ptrs_size < paca_ptrs_size)
+ memblock_free(__pa(paca_ptrs) + new_ptrs_size,
+ paca_ptrs_size - new_ptrs_size);
- memblock_free(__pa(paca) + new_size, paca_size - new_size);
+ paca_nr_cpu_ids = nr_cpu_ids;
+ paca_ptrs_size = new_ptrs_size;
- printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
- paca_size - new_size);
-
- paca_size = new_size;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (early_radix_enabled()) {
+ /* Ugly fixup, see new_slb_shadow() */
+ memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+ sizeof(struct slb_shadow));
+ paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+ }
+#endif
- free_lppacas();
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+ paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
void copy_mm_to_paca(struct mm_struct *mm)
@@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm->context.slb_addr_limit);
get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_low_slices_psize,
+ &context->low_slices_psize, sizeof(context->low_slices_psize));
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 446c796..fe9733f 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -410,72 +410,22 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
}
/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
* -- paulus.
*/
-
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- resource_size_t *offset,
- enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long io_offset = 0;
- int i, res_bit;
-
- if (hose == NULL)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
- *offset += hose->pci_mem_offset;
-#endif
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ resource_size_t ioaddr = pci_resource_start(pdev, bar);
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
+ if (!hose)
+ return -EINVAL;
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
- }
+ /* Convert to an offset within this PCI controller */
+ ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
- return NULL;
+ vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+ return 0;
}
/*
@@ -527,42 +477,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
return prot;
}
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar,
- struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- resource_size_t offset =
- ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- if (write_combine)
- vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
- return ret;
-}
-
/* This provides legacy IO read access on a bus */
int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1738c41..1237f13 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits)
EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
-void __giveup_fpu(struct task_struct *tsk)
+static void __giveup_fpu(struct task_struct *tsk)
{
unsigned long msr;
@@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
}
-void save_all(struct task_struct *tsk)
+static void save_all(struct task_struct *tsk)
{
unsigned long usermsr;
@@ -718,7 +718,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
{
thread->hw_brk.address = 0;
thread->hw_brk.type = 0;
- set_breakpoint(&thread->hw_brk);
+ if (ppc_breakpoint_available())
+ set_breakpoint(&thread->hw_brk);
}
#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -815,9 +816,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
if (cpu_has_feature(CPU_FTR_DAWR))
+ // Power8 or later
set_dawr(brk);
- else
+ else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ // Power7 or earlier
set_dabr(brk);
+ else
+ // Shouldn't happen due to higher level checks
+ WARN_ON_ONCE(1);
}
void set_breakpoint(struct arch_hw_breakpoint *brk)
@@ -827,6 +833,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk)
preempt_enable();
}
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR))
+ return true; /* POWER8 DAWR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false; /* POWER9 with DAWR disabled */
+ /* DABR: Everything but POWER8 and POWER9 */
+ return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
#ifdef CONFIG_PPC64
DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4dffef9..9dbed48 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node)
static void __init check_cpu_feature_properties(unsigned long node)
{
- unsigned long i;
+ int i;
struct feature_property *fp = feature_properties;
const __be32 *prop;
- for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+ for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
prop = of_get_flat_dt_prop(node, fp->name, NULL);
if (prop && be32_to_cpup(prop) >= fp->min_value) {
cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
DBG("boot cpu: logical %d physical %d\n", found,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
/*
* PAPR defines "logical" PVR values for cpus that
@@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
else if (!dt_cpu_ftrs_in_use())
cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ allocate_paca(boot_cpuid);
#endif
+ set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
return 0;
}
@@ -744,7 +745,7 @@ void __init early_init_devtree(void *params)
* FIXME .. and the initrd too? */
move_device_tree();
- allocate_pacas();
+ allocate_paca_ptrs();
DBG("Scanning CPUs ...\n");
@@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
+#ifdef CONFIG_SMP
+ /*
+ * Early firmware scanning must use this rather than
+ * get_hard_smp_processor_id because we don't have pacas allocated
+ * until memory topology is discovered.
+ */
+ if (cpu_to_phys_id != NULL)
+ return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
return (int)phys_id == get_hard_smp_processor_id(cpu);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index acf4b2e..f9d6bef 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start;
static unsigned long __initdata prom_tce_alloc_end;
#endif
-static bool __initdata prom_radix_disable;
+static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
struct platform_support {
bool hash_mmu;
@@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void)
opt = strstr(prom_cmd_line, "disable_radix");
if (opt) {
- prom_debug("Radix disabled from cmdline\n");
- prom_radix_disable = true;
+ opt += 13;
+ if (*opt && *opt == '=') {
+ bool val;
+
+ if (kstrtobool(++opt, &val))
+ prom_radix_disable = false;
+ else
+ prom_radix_disable = val;
+ } else
+ prom_radix_disable = true;
}
+ if (prom_radix_disable)
+ prom_debug("Radix disabled from cmdline\n");
}
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void)
}
}
- if (supported.radix_mmu && supported.radix_gtse) {
+ if (supported.radix_mmu && supported.radix_gtse &&
+ IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
/* Radix preferred - but we require GTSE for now */
prom_debug("Asking for radix with GTSE\n");
ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
@@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void)
* size to 4 MB. This is enough to map 2GB of PCI DMA space.
* By doing this, we avoid the pitfalls of trying to DMA to
* MMIO space and the DMA alias hole.
- *
- * On POWER4, firmware sets the TCE region by assuming
- * each TCE table is 8MB. Using this memory for anything
- * else will impact performance, so we always allocate 8MB.
- * Anton
*/
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
- minsize = 8UL << 20;
- else
- minsize = 4UL << 20;
+ minsize = 4UL << 20;
/* Align to the greater of the align or size */
align = max(minalign, minsize);
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 12640f7..acb6b92 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -19,7 +19,7 @@
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
_end enter_prom memcpy memset reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ca72d73..d23cf63 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -41,6 +41,7 @@
#include <asm/switch_to.h>
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
struct perf_event_attr attr;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ bool set_bp = true;
struct arch_hw_breakpoint hw_brk;
#endif
@@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
hw_brk.address = data & (~HW_BRK_TYPE_DABR);
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
hw_brk.len = 8;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
bp = thread->ptrace_bps[0];
- if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
+ if (!set_bp) {
if (bp) {
unregister_hw_breakpoint(bp);
thread->ptrace_bps[0] = NULL;
@@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
return PTR_ERR(bp);
}
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
task->thread.hw_brk = hw_brk;
#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
if (child->thread.hw_brk.address)
return -ENOSPC;
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
child->thread.hw_brk = brk;
return 1;
@@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request,
#endif
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
dbginfo.num_instruction_bps = 0;
- dbginfo.num_data_bps = 1;
+ if (ppc_breakpoint_available())
+ dbginfo.num_data_bps = 1;
+ else
+ dbginfo.num_data_bps = 0;
dbginfo.num_condition_regs = 0;
#ifdef CONFIG_PPC64
dbginfo.data_bp_alignment = 8;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 0000000..bab5a27
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool thread_priv;
+
+ thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (rfi_flush || thread_priv) {
+ struct seq_buf s;
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (rfi_flush)
+ seq_buf_printf(&s, "RFI Flush");
+
+ if (rfi_flush && thread_priv)
+ seq_buf_printf(&s, ", ");
+
+ if (thread_priv)
+ seq_buf_printf(&s, "L1D private per thread");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool bcs, ccd, ori;
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+ ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+ ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (bcs || ccd) {
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (bcs)
+ seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+ if (bcs && ccd)
+ seq_buf_printf(&s, ", ");
+
+ if (ccd)
+ seq_buf_printf(&s, "Indirect branch cache disabled");
+ } else
+ seq_buf_printf(&s, "Vulnerable");
+
+ if (ori)
+ seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d73ec51..0af5c11 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc)
}
+u32 *cpu_to_phys_id = NULL;
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
+ cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32)));
+ memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
@@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void)
intserv = of_get_property(dn, "reg", &len);
if (!intserv) {
cpu_be = cpu_to_be32(cpu);
+ /* XXX: what is this? uninitialized?? */
intserv = &cpu_be; /* assume logical == phys */
len = 4;
}
@@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void)
"enable-method", "spin-table");
set_cpu_present(cpu, avail);
- set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
set_cpu_possible(cpu, true);
+ cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
cpu++;
}
@@ -835,6 +842,23 @@ static __init void print_system_info(void)
pr_info("-----------------------------------------------------\n");
}
+#ifdef CONFIG_SMP
+static void smp_setup_pacas(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ allocate_paca(cpu);
+ set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+ }
+
+ memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = NULL;
+}
+#endif
+
/*
* Called into from start_kernel this initializes memblock, which is used
* to manage page allocation until mem_init is called.
@@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p)
/* Check the SMT related command line arguments (ppc64). */
check_smt_enabled();
- /* On BookE, setup per-core TLB data structures. */
- setup_tlb_core_data();
+ /* Parse memory topology */
+ mem_topology_setup();
/*
* Release secondary cpus out of their spinloops at 0x60 now that
@@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p)
* so smp_release_cpus() does nothing for them.
*/
#ifdef CONFIG_SMP
+ smp_setup_pacas();
+
+ /* On BookE, setup per-core TLB data structures. */
+ setup_tlb_core_data();
+
smp_release_cpus();
#endif
@@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC64
if (!radix_enabled())
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 3fc11e3..d144df5 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -46,13 +46,10 @@ static inline void emergency_stack_init(void) { };
#endif
#ifdef CONFIG_PPC64
-void record_spr_defaults(void);
-#else
-static inline void record_spr_defaults(void) { };
-#endif
-
-#ifdef CONFIG_PPC64
u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
#endif
/*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 51ebc01..7445748 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
#include <asm/udbg.h>
#include <asm/code-patching.h>
#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
#define DBG(fmt...)
@@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr)
}
/* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L2CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str)
__setup("l2cr=", ppc_setup_l2cr);
/* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L3CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync);
#endif /* CONFIG_NVRAM */
-int __init ppc_init(void)
+static int __init ppc_init(void)
{
/* clear the progress line */
if (ppc_md.progress)
@@ -192,7 +193,6 @@ int __init ppc_init(void)
}
return 0;
}
-
arch_initcall(ppc_init);
void __init irqstack_early_init(void)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c388cc3..66f2b62 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void)
if (cpu_first_thread_sibling(boot_cpuid) == first)
first = boot_cpuid;
- paca[cpu].tcd_ptr = &paca[first].tcd;
+ paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
/*
* If we have threads, we need either tlbsrx.
@@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void)
get_paca()->kernel_msr = MSR_KERNEL;
}
+unsigned long spr_default_dscr = 0;
+
+void __init record_spr_defaults(void)
+{
+ if (early_cpu_has_feature(CPU_FTR_DSCR))
+ spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr)
early_init_devtree(__va(dt_ptr));
/* Now we know the logical id of our boot cpu, setup the paca. */
- setup_paca(&paca[boot_cpuid]);
+ if (boot_cpuid != 0) {
+ /* Poison paca_ptrs[0] again if it's not the boot cpu */
+ memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
+ }
+ setup_paca(paca_ptrs[boot_cpuid]);
fixup_boot_paca();
/*
@@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void)
#endif
}
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+ unsigned long pa;
+
+ pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
+ early_cpu_to_node(cpu), MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ if (!pa)
+ panic("cannot allocate stacks");
+ }
+
+ return __va(pa);
+}
+
void __init irqstack_early_init(void)
{
u64 limit = ppc64_bolted_size();
@@ -610,12 +637,8 @@ void __init irqstack_early_init(void)
* accessed in realmode.
*/
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
+ softirq_ctx[i] = alloc_stack(limit, i);
+ hardirq_ctx[i] = alloc_stack(limit, i);
}
}
@@ -623,20 +646,21 @@ void __init irqstack_early_init(void)
void __init exc_lvl_early_init(void)
{
unsigned int i;
- unsigned long sp;
for_each_possible_cpu(i) {
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- critirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+ void *sp;
+
+ sp = alloc_stack(ULONG_MAX, i);
+ critirq_ctx[i] = sp;
+ paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- dbgirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ dbgirq_ctx[i] = sp;
+ paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ mcheckirq_ctx[i] = sp;
+ paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
@@ -690,23 +714,24 @@ void __init emergency_stack_init(void)
for_each_possible_cpu(i) {
struct thread_info *ti;
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
}
@@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void)
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
- paca[cpu].data_offset = __per_cpu_offset[cpu];
+ paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
}
}
#endif
@@ -846,9 +871,6 @@ static void do_nothing(void *unused)
void rfi_flush_enable(bool enable)
{
- if (rfi_flush == enable)
- return;
-
if (enable) {
do_rfi_flush_fixups(enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
@@ -863,6 +885,10 @@ static void init_fallback_flush(void)
u64 l1d_size, limit;
int cpu;
+ /* Only allocate the fallback flush area once (at boot time). */
+ if (l1d_flush_fallback_area)
+ return;
+
l1d_size = ppc64_caches.l1d.size;
limit = min(ppc64_bolted_size(), ppc64_rma_size);
@@ -875,23 +901,24 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_size = l1d_size;
+ struct paca_struct *paca = paca_ptrs[cpu];
+ paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca->l1d_flush_size = l1d_size;
}
}
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
{
if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: Using fallback displacement flush\n");
+ pr_info("rfi-flush: fallback displacement flush available\n");
init_fallback_flush();
}
if (types & L1D_FLUSH_ORI)
- pr_info("rfi-flush: Using ori type flush\n");
+ pr_info("rfi-flush: ori type flush available\n");
if (types & L1D_FLUSH_MTTRIG)
- pr_info("rfi-flush: Using mttrig type flush\n");
+ pr_info("rfi-flush: mttrig type flush available\n");
enabled_flush_types = types;
@@ -902,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
+ bool enable;
+
if (val == 1)
- rfi_flush_enable(true);
+ enable = true;
else if (val == 0)
- rfi_flush_enable(false);
+ enable = false;
else
return -EINVAL;
+ /* Only do anything if we're changing state */
+ if (enable != rfi_flush)
+ rfi_flush_enable(enable);
+
return 0;
}
@@ -927,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void)
}
device_initcall(rfi_flush_debugfs_init);
#endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
- if (rfi_flush)
- return sprintf(buf, "Mitigation: RFI Flush\n");
-
- return sprintf(buf, "Vulnerable\n");
-}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 7c59d88..a6467f8 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#else /* CONFIG_PPC64 */
+extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a46de00..492f034 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
{
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
int ctx_has_vsx_region = 0;
#ifdef CONFIG_PPC64
@@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
{
struct sig_dbg_op op;
int i;
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
unsigned long new_msr = regs->msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bbe7634..e16ec7b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- if (!paca[nr].cpu_start) {
- paca[nr].cpu_start = 1;
+ if (!paca_ptrs[nr]->cpu_start) {
+ paca_ptrs[nr]->cpu_start = 1;
smp_mb();
return 0;
}
@@ -565,19 +565,28 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
}
#endif
+#ifdef CONFIG_NMI_IPI
+static void stop_this_cpu(struct pt_regs *regs)
+#else
static void stop_this_cpu(void *dummy)
+#endif
{
/* Remove this CPU */
set_cpu_online(smp_processor_id(), false);
- local_irq_disable();
+ hard_irq_disable();
+ spin_begin();
while (1)
- ;
+ spin_cpu_relax();
}
void smp_send_stop(void)
{
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
+#else
smp_call_function(stop_this_cpu, NULL, 0);
+#endif
}
struct thread_info *current_set[NR_CPUS];
@@ -657,7 +666,7 @@ void smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
#ifdef CONFIG_PPC64
- paca[boot_cpuid].__current = current;
+ paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
current_set[boot_cpuid] = task_thread_info(current);
@@ -748,8 +757,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
struct thread_info *ti = task_thread_info(idle);
#ifdef CONFIG_PPC64
- paca[cpu].__current = idle;
- paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->__current = idle;
+ paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
ti->cpu = cpu;
secondary_ti = current_set[cpu] = ti;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 04d0bbd..755dc98 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -20,6 +20,7 @@
#include <asm/firmware.h>
#include "cacheinfo.h"
+#include "setup.h"
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600,
static void sysfs_create_dscr_default(void)
{
- int err = 0;
- if (cpu_has_feature(CPU_FTR_DSCR))
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ int err = 0;
+ int cpu;
-void __init record_spr_defaults(void)
-{
- int cpu;
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- dscr_default = mfspr(SPRN_DSCR);
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- paca[cpu].dscr_default = dscr_default;
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
}
+
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a32823d..360e71d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -266,6 +266,9 @@ void accumulate_stolen_time(void)
static inline u64 calculate_stolen_time(u64 stop_tb)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
return scan_dispatch_log(stop_tb);
@@ -1234,7 +1237,7 @@ void calibrate_delay(void)
static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
ppc_md.get_rtc_time(tm);
- return rtc_valid_tm(tm);
+ return 0;
}
static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1e48d15..a2ef0c0 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
}
raw_local_irq_restore(flags);
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) == 0x100)
+ return;
+
crash_fadump(regs, "die oops");
if (kexec_should_crash(current))
@@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags;
- if (debugger(regs))
- return;
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) != 0x100) {
+ if (debugger(regs))
+ return;
+ }
flags = oops_begin(regs);
if (__die(str, regs, err))
@@ -460,7 +471,7 @@ static inline int check_io_access(struct pt_regs *regs)
/* single-step stuff */
#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs) do {} while(0)
#else
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
@@ -473,6 +484,7 @@ static inline int check_io_access(struct pt_regs *regs)
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
+#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE)
#endif
#if defined(CONFIG_E500)
@@ -988,6 +1000,7 @@ void single_step_exception(struct pt_regs *regs)
enum ctx_state prev_state = exception_enter();
clear_single_step(regs);
+ clear_br_trace(regs);
if (kprobe_post_handler(regs))
return;
@@ -1495,18 +1508,6 @@ bail:
exception_exit(prev_state);
}
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, regs->dar, SIGSEGV);
-
- exception_exit(prev_state);
-}
-
void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 22b01a3..b44ec10 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
+#ifdef CONFIG_PPC32
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_gettimeofday", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_gettime", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_getres", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_get_tbfreq", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_time", NULL
},
+#endif
};
/*
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 85ba80d..4b19da8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,9 +74,15 @@ kvm-hv-y += \
book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm.o
+
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o book3s_hv_rm_xive.o
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm_builtin.o
+
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
@@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
+ $(kvm-book3s_64-builtin-tm-objs-y) \
$(kvm-book3s_64-builtin-xics-objs-y)
endif
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 234531d..97d4a11 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
- return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_hva);
-
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index d2b3ec0..4ad5e28 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -14,7 +14,6 @@
extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot);
-extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
unsigned long end);
extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ef243fe..a670fa5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
return 0;
}
-int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
-{
- hva_handler_fn handler;
-
- handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
- kvm_handle_hva(kvm, hva, handler);
- return 0;
-}
-
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
hva_handler_fn handler;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d9bafe..a57eafe 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
{
int psize = MMU_BASE_PSIZE;
- if (pshift >= PMD_SHIFT)
+ if (pshift >= PUD_SHIFT)
+ psize = MMU_PAGE_1G;
+ else if (pshift >= PMD_SHIFT)
psize = MMU_PAGE_2M;
addr &= ~0xfffUL;
addr |= mmu_psize_defs[psize].ap << 5;
@@ -163,6 +165,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
asm volatile("ptesync": : :"memory");
}
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+{
+ unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+
+ asm volatile("ptesync": : :"memory");
+ /* RIC=1 PRS=0 R=1 IS=2 */
+ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
+ : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
+ asm volatile("ptesync": : :"memory");
+}
+
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
unsigned long clr, unsigned long set,
unsigned long addr, unsigned int shift)
@@ -223,9 +236,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = pud_alloc_one(kvm->mm, gpa);
pmd = NULL;
- if (pud && pud_present(*pud))
+ if (pud && pud_present(*pud) && !pud_huge(*pud))
pmd = pmd_offset(pud, gpa);
- else
+ else if (level <= 1)
new_pmd = pmd_alloc_one(kvm->mm, gpa);
if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
@@ -246,6 +259,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = NULL;
}
pud = pud_offset(pgd, gpa);
+ if (pud_huge(*pud)) {
+ unsigned long hgpa = gpa & PUD_MASK;
+
+ /*
+ * If we raced with another CPU which has just put
+ * a 1GB pte in after we saw a pmd page, try again.
+ */
+ if (level <= 1 && !new_pmd) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ /* Check if we raced and someone else has set the same thing */
+ if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+ /* Valid 1GB page here already, remove it */
+ old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+ ~0UL, 0, hgpa, PUD_SHIFT);
+ kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
+ if (old & _PAGE_DIRTY) {
+ unsigned long gfn = hgpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot,
+ gfn, PUD_SIZE);
+ }
+ }
+ if (level == 2) {
+ if (!pud_none(*pud)) {
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+ * table page. new_pmd will be NULL since level == 2.
+ */
+ new_pmd = pmd_offset(pud, 0);
+ pud_clear(pud);
+ kvmppc_radix_flush_pwc(kvm, gpa);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+ ret = 0;
+ goto out_unlock;
+ }
if (pud_none(*pud)) {
if (!new_pmd)
goto out_unlock;
@@ -264,6 +321,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
ret = -EAGAIN;
goto out_unlock;
}
+ /* Check if we raced and someone else has set the same thing */
+ if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
/* Valid 2MB page here already, remove it */
old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
~0UL, 0, lgpa, PMD_SHIFT);
@@ -276,35 +338,43 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
kvmppc_update_dirty_map(memslot,
gfn, PMD_SIZE);
}
- } else if (level == 1 && !pmd_none(*pmd)) {
- /*
- * There's a page table page here, but we wanted
- * to install a large page. Tell the caller and let
- * it try installing a normal page if it wants.
- */
- ret = -EBUSY;
- goto out_unlock;
}
- if (level == 0) {
- if (pmd_none(*pmd)) {
- if (!new_ptep)
- goto out_unlock;
- pmd_populate(kvm->mm, pmd, new_ptep);
- new_ptep = NULL;
- }
- ptep = pte_offset_kernel(pmd, gpa);
- if (pte_present(*ptep)) {
- /* PTE was previously valid, so invalidate it */
- old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
- 0, gpa, 0);
- kvmppc_radix_tlbie_page(kvm, gpa, 0);
- if (old & _PAGE_DIRTY)
- mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+ if (level == 1) {
+ if (!pmd_none(*pmd)) {
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+ * table page. new_ptep will be NULL since level == 1.
+ */
+ new_ptep = pte_offset_kernel(pmd, 0);
+ pmd_clear(pmd);
+ kvmppc_radix_flush_pwc(kvm, gpa);
}
- kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
- } else {
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+ ret = 0;
+ goto out_unlock;
}
+ if (pmd_none(*pmd)) {
+ if (!new_ptep)
+ goto out_unlock;
+ pmd_populate(kvm->mm, pmd, new_ptep);
+ new_ptep = NULL;
+ }
+ ptep = pte_offset_kernel(pmd, gpa);
+ if (pte_present(*ptep)) {
+ /* Check if someone else set the same thing */
+ if (pte_raw(*ptep) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+ /* PTE was previously valid, so invalidate it */
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
+ 0, gpa, 0);
+ kvmppc_radix_tlbie_page(kvm, gpa, 0);
+ if (old & _PAGE_DIRTY)
+ mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
ret = 0;
out_unlock:
@@ -325,11 +395,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long mmu_seq, pte_size;
unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot;
- struct page *page = NULL, *pages[1];
- long ret, npages, ok;
- unsigned int writing;
- struct vm_area_struct *vma;
- unsigned long flags;
+ struct page *page = NULL;
+ long ret;
+ bool writing;
+ bool upgrade_write = false;
+ bool *upgrade_p = &upgrade_write;
pte_t pte, *ptep;
unsigned long pgflags;
unsigned int shift, level;
@@ -369,122 +439,131 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
dsisr & DSISR_ISSTORE);
}
- /* used to check for invalidations in progress */
- mmu_seq = kvm->mmu_notifier_seq;
- smp_rmb();
-
writing = (dsisr & DSISR_ISSTORE) != 0;
- hva = gfn_to_hva_memslot(memslot, gfn);
+ if (memslot->flags & KVM_MEM_READONLY) {
+ if (writing) {
+ /* give the guest a DSI */
+ dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
+ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+ return RESUME_GUEST;
+ }
+ upgrade_p = NULL;
+ }
+
if (dsisr & DSISR_SET_RC) {
/*
* Need to set an R or C bit in the 2nd-level tables;
- * if the relevant bits aren't already set in the linux
- * page tables, fall through to do the gup_fast to
- * set them in the linux page tables too.
+ * since we are just helping out the hardware here,
+ * it is sufficient to do what the hardware does.
*/
- ok = 0;
pgflags = _PAGE_ACCESSED;
if (writing)
pgflags |= _PAGE_DIRTY;
- local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
- if (ptep) {
- pte = READ_ONCE(*ptep);
- if (pte_present(pte) &&
- (pte_val(pte) & pgflags) == pgflags)
- ok = 1;
- }
- local_irq_restore(flags);
- if (ok) {
- spin_lock(&kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
- spin_unlock(&kvm->mmu_lock);
- return RESUME_GUEST;
- }
- /*
- * We are walking the secondary page table here. We can do this
- * without disabling irq.
- */
- ptep = __find_linux_pte(kvm->arch.pgtable,
- gpa, NULL, &shift);
- if (ptep && pte_present(*ptep)) {
- kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
- gpa, shift);
- spin_unlock(&kvm->mmu_lock);
- return RESUME_GUEST;
- }
- spin_unlock(&kvm->mmu_lock);
+ /*
+ * We are walking the secondary page table here. We can do this
+ * without disabling irq.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = __find_linux_pte(kvm->arch.pgtable,
+ gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) &&
+ (!writing || pte_write(*ptep))) {
+ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+ gpa, shift);
+ dsisr &= ~DSISR_SET_RC;
}
+ spin_unlock(&kvm->mmu_lock);
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT | DSISR_SET_RC)))
+ return RESUME_GUEST;
}
- ret = -EFAULT;
- pfn = 0;
- pte_size = PAGE_SIZE;
- pgflags = _PAGE_READ | _PAGE_EXEC;
- level = 0;
- npages = get_user_pages_fast(hva, 1, writing, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
- if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pgflags = pgprot_val(vma->vm_page_prot);
- }
- up_read(&current->mm->mmap_sem);
- if (!pfn)
- return -EFAULT;
- } else {
- page = pages[0];
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /*
+ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+ * do it with !atomic && !async, which is how we call it.
+ * We always ask for write permission since the common case
+ * is that the page is writable.
+ */
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
pfn = page_to_pfn(page);
- if (PageCompound(page)) {
- pte_size <<= compound_order(compound_head(page));
- /* See if we can insert a 2MB large-page PTE here */
- if (pte_size >= PMD_SIZE &&
- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
- (hva & (PMD_SIZE - PAGE_SIZE))) {
- level = 1;
- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
- }
+ upgrade_write = true;
+ } else {
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, upgrade_p);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+ page = NULL;
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ page = NULL;
}
- /* See if we can provide write access */
- if (writing) {
- pgflags |= _PAGE_WRITE;
- } else {
- local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep && pte_write(*ptep))
- pgflags |= _PAGE_WRITE;
- local_irq_restore(flags);
+ }
+
+ /* See if we can insert a 1GB or 2MB large PTE here */
+ level = 0;
+ if (page && PageCompound(page)) {
+ pte_size = PAGE_SIZE << compound_order(compound_head(page));
+ if (pte_size >= PUD_SIZE &&
+ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+ (hva & (PUD_SIZE - PAGE_SIZE))) {
+ level = 2;
+ pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+ } else if (pte_size >= PMD_SIZE &&
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
}
}
/*
* Compute the PTE value that we need to insert.
*/
- pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
- if (pgflags & _PAGE_WRITE)
- pgflags |= _PAGE_DIRTY;
- pte = pfn_pte(pfn, __pgprot(pgflags));
-
- /* Allocate space in the tree and write the PTE */
- ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
- if (ret == -EBUSY) {
+ if (page) {
+ pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
+ _PAGE_ACCESSED;
+ if (writing || upgrade_write)
+ pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
+ pte = pfn_pte(pfn, __pgprot(pgflags));
+ } else {
/*
- * There's already a PMD where wanted to install a large page;
- * for now, fall back to installing a small page.
+ * Read the PTE from the process' radix tree and use that
+ * so we get the attribute bits.
*/
- level = 0;
- pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
- pte = pfn_pte(pfn, __pgprot(pgflags));
- ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+ local_irq_disable();
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ pte = *ptep;
+ local_irq_enable();
+ if (shift == PUD_SHIFT &&
+ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+ (hva & (PUD_SIZE - PAGE_SIZE))) {
+ level = 2;
+ } else if (shift == PMD_SHIFT &&
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ } else if (shift && shift != PAGE_SHIFT) {
+ /* Adjust PFN */
+ unsigned long mask = (1ul << shift) - PAGE_SIZE;
+ pte = __pte(pte_val(pte) | (hva & mask));
+ }
+ if (!(writing || upgrade_write))
+ pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
+ pte = __pte(pte_val(pte) | _PAGE_EXEC);
}
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
if (page) {
- if (!ret && (pgflags & _PAGE_WRITE))
+ if (!ret && (pte_val(pte) & _PAGE_WRITE))
set_page_dirty_lock(page);
put_page(page);
}
@@ -662,6 +741,10 @@ void kvmppc_free_radix(struct kvm *kvm)
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
if (!pud_present(*pud))
continue;
+ if (pud_huge(*pud)) {
+ pud_clear(pud);
+ continue;
+ }
pmd = pmd_offset(pud, 0);
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
if (pmd_is_leaf(*pmd)) {
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index c32e9bfe..6651f73 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
/*
* Synchronize with the MMU notifier callbacks in
- * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
* While we have the rmap lock, code running on other CPUs
* cannot finish unmapping the host real page that backs
* this guest real page, so we are OK to access the host
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9cb9448..4d07fca 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
@@ -170,7 +171,7 @@ static bool kvmppc_ipi_thread(int cpu)
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (cpu >= 0 && cpu < nr_cpu_ids) {
- if (paca[cpu].kvm_hstate.xics_phys) {
+ if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
xics_wake_cpu(cpu);
return true;
}
@@ -498,7 +499,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
* use 640 bytes of the structure though, so we should accept
* clients that set a size of 640.
*/
- if (len < 640)
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+ if (len < sizeof(struct lppaca))
break;
vpap = &tvcpu->arch.vpa;
err = 0;
@@ -741,6 +743,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
case H_SET_MODE_RESOURCE_SET_DAWR:
if (!kvmppc_power8_compatible(vcpu))
return H_P2;
+ if (!ppc_breakpoint_available())
+ return H_P2;
if (mflags)
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
@@ -1206,6 +1210,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
}
break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ break;
+#endif
+
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
break;
@@ -1978,7 +1995,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
* turn off the HFSCR bit, which causes those instructions to trap.
*/
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
- if (!cpu_has_feature(CPU_FTR_TM))
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ vcpu->arch.hfscr |= HFSCR_TM;
+ else if (!cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr &= ~HFSCR_TM;
if (cpu_has_feature(CPU_FTR_ARCH_300))
vcpu->arch.hfscr &= ~HFSCR_MSGP;
@@ -2140,7 +2159,7 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
tpaca->kvm_hstate.kvm_vcpu = NULL;
@@ -2173,7 +2192,7 @@ static void kvmppc_release_hwthread(int cpu)
{
struct paca_struct *tpaca;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
@@ -2239,9 +2258,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.thread_cpu = cpu;
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+ tpaca->kvm_hstate.fake_suspend = 0;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
tpaca->kvm_hstate.kvm_vcore = vc;
@@ -2264,7 +2284,7 @@ static void kvmppc_wait_for_nap(int n_threads)
* for any threads that still have a non-NULL vcore ptr.
*/
for (i = 1; i < n_threads; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcore)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
break;
if (i == n_threads) {
HMT_medium();
@@ -2274,7 +2294,7 @@ static void kvmppc_wait_for_nap(int n_threads)
}
HMT_medium();
for (i = 1; i < n_threads; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcore)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -2806,9 +2826,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
for (thr = 0; thr < controlled_threads; ++thr) {
- paca[pcpu + thr].kvm_hstate.tid = thr;
- paca[pcpu + thr].kvm_hstate.napping = 0;
- paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+ struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+ paca->kvm_hstate.tid = thr;
+ paca->kvm_hstate.napping = 0;
+ paca->kvm_hstate.kvm_split_mode = sip;
}
/* Initiate micro-threading (split-core) on POWER8 if required */
@@ -2923,7 +2945,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
} else if (hpt_on_radix) {
/* Wait for all threads to have seen final sync */
for (thr = 1; thr < controlled_threads; ++thr) {
- while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+ struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+ while (paca->kvm_hstate.kvm_split_mode) {
HMT_low();
barrier();
}
@@ -4351,7 +4375,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.flush_memslot = kvmppc_core_flush_memslot_hv,
.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
.commit_memory_region = kvmppc_core_commit_memory_region_hv,
- .unmap_hva = kvm_unmap_hva_hv,
.unmap_hva_range = kvm_unmap_hva_range_hv,
.age_hva = kvm_age_hva_hv,
.test_age_hva = kvm_test_age_hva_hv,
@@ -4388,7 +4411,7 @@ static int kvm_init_subcore_bitmap(void)
int node = cpu_to_node(first_cpu);
/* Ignore if it is already allocated. */
- if (paca[first_cpu].sibling_subcore_state)
+ if (paca_ptrs[first_cpu]->sibling_subcore_state)
continue;
sibling_subcore_state =
@@ -4403,7 +4426,8 @@ static int kvm_init_subcore_bitmap(void)
for (j = 0; j < threads_per_core; j++) {
int cpu = first_cpu + j;
- paca[cpu].sibling_subcore_state = sibling_subcore_state;
+ paca_ptrs[cpu]->sibling_subcore_state =
+ sibling_subcore_state;
}
}
return 0;
@@ -4430,7 +4454,7 @@ static int kvmppc_book3s_init_hv(void)
/*
* We need a way of accessing the XICS interrupt controller,
- * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+ * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
* indirectly, via OPAL.
*/
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 49a2c78..de18299 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu)
return;
/* Else poke the target with an IPI */
- xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
else
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index dc54373..0e84930 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r5, 0
mtspr SPRN_MMCRA, r5
isync
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r5, LPPACA_PMCINUSE(r3)
+ lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f86a202..bd63fa8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_SPRG_VDSO_WRITE,r3
/* Reload the host's PMU registers */
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r4, LPPACA_PMCINUSE(r3)
+ lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r4, 0
beq 23f /* skip if not */
BEGIN_FTR_SECTION
@@ -786,12 +785,18 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* Load guest PMU registers */
@@ -885,8 +890,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX(r4)
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
+ /*
+ * Handle broken DAWR case by not writing it. This means we
+ * can still store the DAWR register for migration.
+ */
+BEGIN_FTR_SECTION
mtspr SPRN_DAWR, r5
mtspr SPRN_DAWRX, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
@@ -914,11 +925,14 @@ BEGIN_FTR_SECTION
mtspr SPRN_ACOP, r6
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
+ nop
FTR_SECTION_ELSE
/* POWER9-only registers */
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
+ lbz r8, HSTATE_FAKE_SUSPEND(r13)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
@@ -1370,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* For softpatch interrupt, go off and do TM instruction emulation */
+ cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+ beq kvmppc_tm_emul
+#endif
+
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
@@ -1747,12 +1767,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* Increment yield count if they have a VPA */
@@ -1852,6 +1878,10 @@ BEGIN_FTR_SECTION
ld r6, STACK_SLOT_DAWR(r1)
ld r7, STACK_SLOT_DAWRX(r1)
mtspr SPRN_CIABR, r5
+ /*
+ * If the DAWR doesn't work, it's ok to write these here as
+ * this value should always be zero
+ */
mtspr SPRN_DAWR, r6
mtspr SPRN_DAWRX, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
@@ -2055,6 +2085,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtlr r0
blr
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+kvmppc_tm_emul:
+ /* Save instruction image in HEIR */
+ mfspr r3, SPRN_HEIR
+ stw r3, VCPU_HEIR(r9)
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ lbz r0, HSTATE_FAKE_SUSPEND(r13)
+ cmpwi r0, 0 /* keep exiting guest if in fake suspend */
+ bne guest_exit_cont
+ rldicl r3, r11, 64 - MSR_TS_S_LG, 62
+ cmpwi r3, 1 /* or if not in suspend state */
+ bne guest_exit_cont
+
+ /* Call C code to do the emulation */
+ mr r3, r9
+ bl kvmhv_p9_tm_emulation_early
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+ cmpwi r3, 0
+ beq guest_exit_cont /* continue exiting if not handled */
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ b fast_interrupt_c_return /* go back to guest if handled */
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
/*
* Check whether an HDSI is an HPTE not found fault or something else.
* If it is an HPTE not found fault that is due to the guest accessing
@@ -2507,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3,0
blr
+2:
+BEGIN_FTR_SECTION
+ /* POWER9 with disabled DAWR */
+ li r3, H_HARDWARE
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
@@ -2588,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
ld r9, HSTATE_KVM_VCPU(r13)
bl kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/*
@@ -2701,12 +2779,18 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* load up FP state */
@@ -3033,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
kvmppc_save_tm:
mflr r0
std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
/* Turn on TM. */
mfmsr r8
@@ -3047,6 +3132,24 @@ kvmppc_save_tm:
std r1, HSTATE_HOST_R1(r13)
li r3, TM_CAUSE_KVM_RESCHED
+BEGIN_FTR_SECTION
+ lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+ cmpwi r0, 0
+ beq 3f
+ rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+ beq 4f
+BEGIN_FTR_SECTION_NESTED(96)
+ bl pnv_power9_force_smt4_catch
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ nop
+ b 6f
+3:
+ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+ mfspr r6, SPRN_TEXASR
+ std r6, VCPU_ORIG_TEXASR(r9)
+6:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
li r5, 0
mtmsrd r5, 1
@@ -3058,6 +3161,43 @@ kvmppc_save_tm:
SET_SCRATCH0(r13)
GET_PACA(r13)
std r9, PACATMSCRATCH(r13)
+
+ /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
+BEGIN_FTR_SECTION
+ lbz r9, HSTATE_FAKE_SUSPEND(r13)
+ cmpwi r9, 0
+ beq 2f
+ /*
+ * We were in fake suspend, so we are not going to save the
+ * register state as the guest checkpointed state (since
+ * we already have it), therefore we can now use any volatile GPR.
+ */
+ /* Reload stack pointer and TOC. */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+ /* Set MSR RI now we have r1 and r13 back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+ HMT_MEDIUM
+ ld r6, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r6
+BEGIN_FTR_SECTION_NESTED(96)
+ bl pnv_power9_force_smt4_release
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ nop
+
+4:
+ mfspr r3, SPRN_PSSCR
+ /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+ li r0, PSSCR_FAKE_SUSPEND
+ andc r3, r3, r0
+ mtspr SPRN_PSSCR, r3
+ ld r9, HSTATE_KVM_VCPU(r13)
+ /* Don't save TEXASR, use value from last exit in real suspend state */
+ b 11f
+2:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
ld r9, HSTATE_KVM_VCPU(r13)
/* Get a few more GPRs free. */
@@ -3128,13 +3268,15 @@ kvmppc_save_tm:
* change these outside of a transaction, so they must always be
* context switched.
*/
+ mfspr r7, SPRN_TEXASR
+ std r7, VCPU_TEXASR(r9)
+11:
mfspr r5, SPRN_TFHAR
mfspr r6, SPRN_TFIAR
- mfspr r7, SPRN_TEXASR
std r5, VCPU_TFHAR(r9)
std r6, VCPU_TFIAR(r9)
- std r7, VCPU_TEXASR(r9)
+ addi r1, r1, PPC_MIN_STKFRM
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
@@ -3169,6 +3311,8 @@ kvmppc_restore_tm:
mtspr SPRN_TFIAR, r6
mtspr SPRN_TEXASR, r7
+ li r0, 0
+ stb r0, HSTATE_FAKE_SUSPEND(r13)
ld r5, VCPU_MSR(r4)
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
beqlr /* TM not active in guest */
@@ -3183,6 +3327,15 @@ kvmppc_restore_tm:
mtspr SPRN_TEXASR, r7
/*
+ * If we are doing TM emulation for the guest on a POWER9 DD2,
+ * then we don't actually do a trechkpt -- we either set up
+ * fake-suspend mode, or emulate a TM rollback.
+ */
+BEGIN_FTR_SECTION
+ b .Ldo_tm_fake_load
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
+ /*
* We need to load up the checkpointed state for the guest.
* We need to do this early as it will blow away any GPRs, VSRs and
* some SPRs.
@@ -3254,10 +3407,24 @@ kvmppc_restore_tm:
/* Set the MSR RI since we have our registers back. */
li r5, MSR_RI
mtmsrd r5, 1
-
+9:
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
+
+.Ldo_tm_fake_load:
+ cmpwi r5, 1 /* check for suspended state */
+ bgt 10f
+ stb r5, HSTATE_FAKE_SUSPEND(r13)
+ b 9b /* and return */
+10: stdu r1, -PPC_MIN_STKFRM(r1)
+ /* guest is in transactional state, so simulate rollback */
+ mr r3, r4
+ bl kvmhv_emulate_tm_rollback
+ nop
+ ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+ addi r1, r1, PPC_MIN_STKFRM
+ b 9b
#endif
/*
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644
index 0000000..bf710ad
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+ u64 texasr, tfiar;
+ u64 msr = vcpu->arch.shregs.msr;
+
+ tfiar = vcpu->arch.pc & ~0x3ull;
+ texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+ texasr |= TEXASR_SUSP;
+ if (msr & MSR_PR) {
+ texasr |= TEXASR_PR;
+ tfiar |= 1;
+ }
+ vcpu->arch.tfiar = tfiar;
+ /* Preserve ROT and TL fields of existing TEXASR */
+ vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated. The
+ * instruction image is in vcpu->arch.emul_inst. If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct. The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 msr = vcpu->arch.shregs.msr;
+ u64 newmsr, bescr;
+ int ra, rs;
+
+ switch (instr & 0xfc0007ff) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.shregs.srr0;
+ return RESUME_GUEST;
+
+ case PPC_INST_RFEBB:
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_EBB_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ bescr = vcpu->arch.bescr;
+ /* expect to see a S->T transition requested */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ ((bescr >> 30) & 3) == 2));
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ vcpu->arch.bescr = bescr;
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.ebbrr;
+ return RESUME_GUEST;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ /* check this is a Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ return RESUME_GUEST;
+
+ case PPC_INST_TSR:
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ /* L=1 => tresume, L=0 => tsuspend */
+ if (instr & (1 << 21)) {
+ if (MSR_TM_SUSPENDED(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ } else {
+ if (MSR_TM_TRANSACTIONAL(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+ }
+ vcpu->arch.shregs.msr = msr;
+ return RESUME_GUEST;
+
+ case PPC_INST_TRECLAIM:
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If no transaction active, generate TM bad thing */
+ if (!MSR_TM_ACTIVE(msr)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+ /* If failure was not previously recorded, recompute TEXASR */
+ if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+ ra = (instr >> 16) & 0x1f;
+ if (ra)
+ ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+ emulate_tx_failure(vcpu, ra);
+ }
+
+ copy_from_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+ return RESUME_GUEST;
+
+ case PPC_INST_TRECHKPT:
+ /* XXX do we need to check for PR=0 here? */
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If transaction active or TEXASR[FS] = 0, bad thing */
+ if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+
+ copy_to_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ vcpu->arch.shregs.msr = msr | MSR_TS_S;
+ return RESUME_GUEST;
+ }
+
+ /* What should we do here? We didn't recognize the instruction */
+ WARN_ON_ONCE(1);
+ return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644
index 0000000..d98ccfd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 newmsr, msr, bescr;
+ int rs;
+
+ switch (instr & 0xfc0007ff) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.shregs.srr0;
+ return 1;
+
+ case PPC_INST_RFEBB:
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ msr = vcpu->arch.shregs.msr;
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+ ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+ return 0;
+ bescr = mfspr(SPRN_BESCR);
+ /* expect to see a S->T transition requested */
+ if (((bescr >> 30) & 3) != 2)
+ return 0;
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ mtspr(SPRN_BESCR, bescr);
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = mfspr(SPRN_EBBRR);
+ return 1;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ msr = vcpu->arch.shregs.msr;
+ /* check this is a Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ return 1;
+
+ case PPC_INST_TSR:
+ /* we know the MSR has the TS field = S (0b01) here */
+ msr = vcpu->arch.shregs.msr;
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+ return 0;
+ /* L=1 => tresume => set TS to T (0b10) */
+ if (instr & (1 << 21))
+ vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ /* Set CR0 to 0b0010 */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state. We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
+ vcpu->arch.pc = vcpu->arch.tfhar;
+ copy_from_checkpoint(vcpu);
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3ae7523..d3f304d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
}
}
-static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
-{
- trace_kvm_unmap_hva(hva);
-
- do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
-
- return 0;
-}
-
static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
unsigned long end)
{
@@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.flush_memslot = kvmppc_core_flush_memslot_pr,
.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
.commit_memory_region = kvmppc_core_commit_memory_region_pr,
- .unmap_hva = kvm_unmap_hva_pr,
.unmap_hva_range = kvm_unmap_hva_range_pr,
.age_hva = kvm_age_hva_pr,
.test_age_hva = kvm_test_age_hva_pr,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 423b213..c878b4f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
/************* MMU Notifiers *************/
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
trace_kvm_unmap_hva(hva);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4d8b4d6..fa888bf 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -45,12 +45,6 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
-
- /* POWER4+ triggers a dec interrupt if the value is < 0 */
- if (vcpu->arch.dec & 0x80000000) {
- kvmppc_core_queue_dec(vcpu);
- return;
- }
#endif
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 52c2053..4e38764 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = hv_enabled;
break;
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_CAP_PPC_HTM:
r = hv_enabled &&
- (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
+ (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
break;
+#endif
default:
r = 0;
break;
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 85785a3..2f9a882 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit,
)
);
-TRACE_EVENT(kvm_unmap_hva,
- TP_PROTO(unsigned long hva),
- TP_ARGS(hva),
-
- TP_STRUCT__entry(
- __field( unsigned long, hva )
- ),
-
- TP_fast_assign(
- __entry->hva = hva;
- ),
-
- TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
-
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3c29c90..6539010 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -22,9 +22,11 @@ ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
extra-$(CONFIG_PPC64) += crtsavres.o
endif
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+ memcpy_power7.o
+
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
- memcpy_64.o memcmp_64.o pmem.o
+ string_64.o memcpy_64.o memcmp_64.o pmem.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4bcc9e7..8d5034f 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -21,7 +21,9 @@ _GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
b copypage_power7
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index ca5fc8f..8fa73b7 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -42,8 +42,6 @@ _GLOBAL(copypage_power7)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
/* setup read stream 0 */
dcbt 0,r4,0b01000 /* addr from */
dcbt 0,r7,0b01010 /* length and depth from */
@@ -52,7 +50,6 @@ _GLOBAL(copypage_power7)
dcbtst 0,r10,0b01010 /* length and depth to */
eieio
dcbt 0,r8,0b01010 /* all streams GO */
-.machine pop
#ifdef CONFIG_ALTIVEC
mflr r0
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 08da06e..5066773 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -20,11 +20,13 @@
.align 7
_GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
b __copy_tofrom_user_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
_GLOBAL(__copy_tofrom_user_base)
/* first check for a whole page copy on a page boundary */
cmpldi cr1,r5,16
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d416a4a..215e476 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -312,8 +312,6 @@ err1; stb r0,0(r3)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
/* setup read stream 0 */
dcbt 0,r6,0b01000 /* addr from */
dcbt 0,r7,0b01010 /* length and depth from */
@@ -322,7 +320,6 @@ err1; stb r0,0(r3)
dcbtst 0,r10,0b01010 /* length and depth to */
eieio
dcbt 0,r8,0b01010 /* all streams GO */
-.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 73697c4..35f80ab 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
patch_instruction(dest + 2, instrs[2]);
}
- printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+ printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index f4d6088..8d8265b 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -19,9 +19,11 @@ BEGIN_FTR_SECTION
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
#endif
FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
#ifndef SELFTEST
b memcpy_power7
#endif
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
#ifdef __LITTLE_ENDIAN__
/* dumb little-endian memcpy that will get replaced at runtime */
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 193909a..df7de9d 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -259,15 +259,12 @@ _GLOBAL(memcpy_power7)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
dcbt 0,r6,0b01000
dcbt 0,r7,0b01010
dcbtst 0,r9,0b01000
dcbtst 0,r10,0b01010
eieio
dcbt 0,r8,0b01010 /* GO */
-.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 70274b7..34d68f1 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
* Copy from userspace to a buffer, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
struct pt_regs *regs)
{
int err = 0;
@@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
* Copy from a buffer to userspace, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
struct pt_regs *regs)
{
int err = 0;
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 849f50c..cf77d75 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
mtspr(SPRN_M_TW, __pa(pgd) - offset);
/* Update context */
- mtspr(SPRN_M_CASID, id);
+ mtspr(SPRN_M_CASID, id - 1);
/* sync */
mb();
}
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 697b70a..7d0945b 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 866446c..c01d627 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec,
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
return true;
-
+ /*
+ * We should ideally do the vma pkey access check here. But in the
+ * fault path, handle_mm_fault() also does the same check. To avoid
+ * these multiple checks, we skip it here and handle access error due
+ * to pkeys later.
+ */
return false;
}
@@ -518,25 +523,16 @@ good_area:
#ifdef CONFIG_PPC_MEM_KEYS
/*
- * if the HPTE is not hashed, hardware will not detect
- * a key fault. Lets check if we failed because of a
- * software detected key fault.
+ * we skipped checking for access error due to key earlier.
+ * Check that using handle_mm_fault error return.
*/
if (unlikely(fault & VM_FAULT_SIGSEGV) &&
- !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
- is_exec, 0)) {
- /*
- * The PGD-PDT...PMD-PTE tree may not have been fully setup.
- * Hence we cannot walk the tree to locate the PTE, to locate
- * the key. Hence let's use vma_pkey() to get the key; instead
- * of get_mm_addr_key().
- */
+ !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
+
int pkey = vma_pkey(vma);
- if (likely(pkey)) {
- up_read(&mm->mmap_sem);
- return bad_key_fault_exception(regs, address, pkey);
- }
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
}
#endif /* CONFIG_PPC_MEM_KEYS */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 656933c..1d049c7 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_register_proc_table(unsigned long base, unsigned long page_size,
- unsigned long table_size)
-{
- unsigned long patb1 = base << 25; /* VSID */
-
- patb1 |= (page_size << 5); /* sllp */
- patb1 |= table_size;
-
- partition_tb->patb1 = cpu_to_be64(patb1);
- return 0;
-}
-
void __init hpte_init_native(void)
{
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
@@ -889,7 +877,4 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
mmu_hash_ops.flush_hash_range = native_flush_hash_range;
mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index cf290d41..0bd3790 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops);
* is provided by the firmware.
*/
-/* Pre-POWER4 CPUs (4k pages only)
+/*
+ * Fallback (4k pages only)
*/
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
@@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void)
mmu_psize_set_default_penc();
/* Default to 4K pages only */
- memcpy(mmu_psize_defs, mmu_psize_defaults_old,
- sizeof(mmu_psize_defaults_old));
+ memcpy(mmu_psize_defs, mmu_psize_defaults,
+ sizeof(mmu_psize_defaults));
/*
* Try to find the available page sizes in the device-tree
@@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
}
}
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -875,6 +876,12 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
- u64 lpsizes;
- unsigned char *hpsizes;
+ unsigned char *psizes;
unsigned long index, mask_index;
if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->mm_ctx_low_slices_psize;
+ psizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = get_paca()->mm_ctx_high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}
#else
@@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
@@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Get VSID */
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid)
return;
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 876da2b..f1153f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
/*
@@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
@@ -563,15 +562,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
#ifdef CONFIG_PPC_MM_SLICES
- unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
/* With radix we don't use slice, so derive it from vma*/
- if (!radix_enabled())
+ if (!radix_enabled()) {
+ unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
return 1UL << mmu_psize_to_shift(psize);
+ }
#endif
- if (!is_vm_hugetlb_page(vma))
- return PAGE_SIZE;
-
- return huge_page_size(hstate_vma(vma));
+ return vma_kernel_pagesize(vma);
}
static inline bool is_power_of_4(unsigned long x)
@@ -666,15 +664,26 @@ static int __init hugetlbpage_init(void)
shift = mmu_psize_to_shift(psize);
- if (add_huge_page_size(1ULL << shift) < 0)
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (shift > PGDIR_SHIFT)
continue;
-
+ else if (shift > PUD_SHIFT)
+ pdshift = PGDIR_SHIFT;
+ else if (shift > PMD_SHIFT)
+ pdshift = PUD_SHIFT;
+ else
+ pdshift = PMD_SHIFT;
+#else
if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT;
else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
+#endif
+
+ if (add_huge_page_size(1ULL << shift) < 0)
+ continue;
/*
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6419b33..3e59e5d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -88,18 +88,13 @@ void MMU_init(void);
int __map_without_bats;
int __map_without_ltlbs;
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
* Check for command-line options that affect what MMU_init will do.
*/
-void __init MMU_setup(void)
+static void __init MMU_setup(void)
{
/* Check for nobats option (used in mapin_ram). */
if (strstr(boot_command_line, "nobats")) {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fdb424a..51ce091 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,12 +68,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if H_PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
phys_addr_t kernstart_addr;
@@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p)
{
bool val;
- if (strlen(p) == 0)
+ if (!p)
val = true;
else if (kstrtobool(p, &val))
return -EINVAL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index fe8c611..737f8a4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
int page_is_ram(unsigned long pfn)
{
-#ifndef CONFIG_PPC64 /* XXX for now */
- return pfn < max_pfn;
-#else
- unsigned long paddr = (pfn << PAGE_SHIFT);
- struct memblock_region *reg;
-
- for_each_memblock(memory, reg)
- if (paddr >= reg->base && paddr < (reg->base + reg->size))
- return 1;
- return 0;
-#endif
+ return memblock_is_memory(__pfn_to_phys(pfn));
}
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
{
return -ENODEV;
}
@@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
resize_hpt_for_hotplug(memblock_phys_mem_size());
start = (unsigned long)__va(start);
- rc = create_section_mapping(start, start + size);
+ rc = create_section_mapping(start, start + size, nid);
if (rc) {
pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
@@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
EXPORT_SYMBOL_GPL(walk_system_ram_range);
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
@@ -224,7 +214,10 @@ void __init initmem_init(void)
* memblock_regions
*/
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+}
+void __init initmem_init(void)
+{
/* XXX need to clip this if using highmem? */
sparse_memory_present_with_active_regions(0);
sparse_init();
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index d503f34..b24ce40 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -39,12 +39,12 @@
#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
return (1<<30);
}
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+ struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
@@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
}
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
- unsigned long random_factor)
+ unsigned long random_factor,
+ struct rlimit *rlim_stack)
{
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = radix__arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
}
}
#else
/* dummy */
extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
- unsigned long random_factor);
+ unsigned long random_factor,
+ struct rlimit *rlim_stack);
#endif
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
random_factor = arch_mmap_rnd();
if (radix_enabled())
- return radix__arch_pick_mmap_layout(mm, random_factor);
+ return radix__arch_pick_mmap_layout(mm, random_factor,
+ rlim_stack);
/*
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980ba..b75194d 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
/*
- * In the case of exec, use the default limit,
- * otherwise inherit it from the mm we are duplicating.
- */
- if (!mm->context.slb_addr_limit)
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-
- /*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
* forced down to 4K.
@@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm)
* check against 0 is OK.
*/
if (mm->context.id == 0)
- slice_set_user_psize(mm, mmu_virtual_psize);
+ slice_init_new_context_exec(mm);
subpage_prot_init_new_context(mm);
@@ -186,6 +179,19 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+static void destroy_contexts(mm_context_t *ctx)
+{
+ int index, context_id;
+
+ spin_lock(&mmu_context_lock);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_remove(&mmu_context_ida, context_id);
+ }
+ spin_unlock(&mmu_context_lock);
+}
+
#ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm)
{
@@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
+ destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e0a2d8e..4c615fc 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
/*
* Taken from alloc_migrate_target with changes to remove CMA allocations
*/
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
- int **resultp)
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
{
gfp_t gfp_mask = GFP_USER;
struct page *new_page;
@@ -112,7 +111,7 @@ static int mm_iommu_move_page_from_cma(struct page *page)
put_page(page); /* Drop the gup reference */
ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CMA);
+ NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
if (ret) {
if (!list_empty(&cma_migrate_pages))
putback_movable_pages(&cma_migrate_pages);
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d65..be8f5c9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);
+#ifdef CONFIG_PPC_MM_SLICES
+ /*
+ * We have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ */
+ if (mm->context.id == 0)
+ slice_init_new_context_exec(mm);
+#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
return 0;
@@ -428,8 +439,8 @@ void __init mmu_context_init(void)
* -- BenH
*/
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
+ first_context = 1;
+ last_context = 16;
no_selective_tlbil = true;
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 57fbc55..c4c0a09 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
extern unsigned int rtas_data, rtas_size;
struct hash_pte;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index edd8d0b..57a5029 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -831,18 +831,13 @@ out:
of_node_put(rtas);
}
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
- int nid, cpu;
-
- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- max_pfn = max_low_pfn;
+ int cpu;
if (parse_numa_properties())
setup_nonnuma();
- memblock_dump_all();
-
/*
* Modify the set of possible NUMA nodes to reflect information
* available about the set of online nodes, and the set of nodes
@@ -853,6 +848,23 @@ void __init initmem_init(void)
find_possible_nodes();
+ setup_node_to_cpumask_map();
+
+ reset_numa_cpu_lookup_table();
+
+ for_each_present_cpu(cpu)
+ numa_setup_cpu(cpu);
+}
+
+void __init initmem_init(void)
+{
+ int nid;
+
+ max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ max_pfn = max_low_pfn;
+
+ memblock_dump_all();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -863,10 +875,6 @@ void __init initmem_init(void)
sparse_init();
- setup_node_to_cpumask_map();
-
- reset_numa_cpu_lookup_table();
-
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
@@ -876,8 +884,6 @@ void __init initmem_init(void)
*/
cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
- for_each_present_cpu(cpu)
- numa_setup_cpu(cpu);
}
static int __init early_numa(char *p)
@@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void)
for_each_possible_cpu(cpu) {
int i;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++)
counts[i] = hypervisor_counts[i];
@@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void)
for_each_possible_cpu(cpu) {
int i, changed = 0;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++) {
if (hypervisor_counts[i] != counts[i]) {
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 422e802..518518f 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -155,15 +155,15 @@ void mmu_cleanup_all(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid)
{
if (radix_enabled())
- return radix__create_section_mapping(start, end);
+ return radix__create_section_mapping(start, end, nid);
- return hash__create_section_mapping(start, end);
+ return hash__create_section_mapping(start, end, nid);
}
-int remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
{
if (radix_enabled())
return radix__remove_section_mapping(start, end);
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 469808e..199bfda 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -24,6 +24,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* vmemmap is the starting address of the virtual address space where
@@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2e10a96..f1891e2 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
return 0;
}
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pa = 0;
void *pt;
- pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+ if (region_start || region_end) /* has region hint */
+ pa = memblock_alloc_range(size, size, region_start, region_end,
+ MEMBLOCK_NONE);
+ else if (nid != -1) /* has node hint */
+ pa = memblock_alloc_base_nid(size, size,
+ MEMBLOCK_ALLOC_ANYWHERE,
+ nid, MEMBLOCK_NONE);
+
+ if (!pa)
+ pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+ BUG_ON(!pa);
+
+ pt = __va(pa);
memset(pt, 0, size);
return pt;
}
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
- unsigned int map_page_size)
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ smp_wmb();
+ return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
* Make sure task size is correct as per the max adddr
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- } else {
- pgdp = pgd_offset_k(ea);
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
- pudp = pud_offset(pgdp, ea);
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
+
+ if (unlikely(!slab_is_available()))
+ return early_map_kernel_page(ea, pa, flags, map_page_size,
+ nid, region_start, region_end);
+
+ /*
+ * Should make page table allocation functions be able to take a
+ * node, so we can place kernel page tables on the right nodes after
+ * boot.
+ */
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
}
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
set_the_pte:
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb();
return 0;
}
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
void radix__change_memory_range(unsigned long start, unsigned long end,
unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
}
static int __meminit create_physical_mapping(unsigned long start,
- unsigned long end)
+ unsigned long end,
+ int nid)
{
unsigned long vaddr, addr, mapping_size = 0;
pgprot_t prot;
@@ -267,7 +323,7 @@ retry:
else
prot = PAGE_KERNEL;
- rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+ rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc)
return rc;
}
@@ -276,7 +332,7 @@ retry:
return 0;
}
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
/*
* Create the linear mapping, using standard page size for now
*/
- for_each_memblock(memory, reg)
+ for_each_memblock(memory, reg) {
+ /*
+ * The memblock allocator is up at this point, so the
+ * page tables will be allocated within the range. No
+ * need or a node (which we don't have yet).
+ */
WARN_ON(create_physical_mapping(reg->base,
- reg->base + reg->size));
+ reg->base + reg->size,
+ -1));
+ }
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
* host.
*/
BUG_ON(PRTB_SIZE_SHIFT > 36);
- process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
/*
* Fill in the process table.
*/
@@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void)
#ifdef CONFIG_PCI
pci_io_base = ISA_IO_BASE;
#endif
-
- /*
- * For now radix also use the same frag size
- */
- __pte_frag_nr = H_PTE_FRAG_NR;
- __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+ __pte_frag_nr = RADIX_PTE_FRAG_NR;
+ __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
@@ -695,7 +754,7 @@ struct change_mapping_params {
unsigned long aligned_end;
};
-static int stop_machine_change_mapping(void *data)
+static int __meminit stop_machine_change_mapping(void *data)
{
struct change_mapping_params *params =
(struct change_mapping_params *)data;
@@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start);
- create_physical_mapping(params->end, params->aligned_end);
+ create_physical_mapping(params->aligned_start, params->start, -1);
+ create_physical_mapping(params->end, params->aligned_end, -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
/*
* clear the pte and potentially split the mapping helper
*/
-static void split_kernel_mapping(unsigned long addr, unsigned long end,
+static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
unsigned long size, pte_t *pte)
{
unsigned long mask = ~(size - 1);
@@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
}
}
-static void remove_pagetable(unsigned long start, unsigned long end)
+static void __meminit remove_pagetable(unsigned long start, unsigned long end)
{
unsigned long addr, next;
pud_t *pud_base;
@@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end)
radix__flush_tlb_kernel_range(start, end);
}
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- return create_physical_mapping(start, end);
+ return create_physical_mapping(start, end, nid);
}
-int radix__remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
{
remove_pagetable(start, end);
return 0;
@@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end)
#endif /* CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int map_page_size,
+ int nid)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
/* Create a PTE encoding */
unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+ int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+ int ret;
+
+ ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+ BUG_ON(ret);
- BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
remove_pagetable(start, start + page_size);
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d35d9ad..120a49b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
* mem_init() sets high_memory so only do the check after that.
*/
if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
- !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
+ page_is_ram(__phys_to_pfn(p))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
(unsigned long long)p, __builtin_return_address(0));
return NULL;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index adf469f..9bf659d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,11 +57,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
#ifdef CONFIG_PPC_BOOK3S_64
/*
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index ba71c54..0eafdf01 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -119,18 +119,15 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
+ initial_allocation_mask = ~0x0;
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
/*
- * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0, 1 are reserved.
* key 0 is the default key, which allows read/write/execute.
* key 1 is recommended not to be used. PowerISA(3.0) page 1015,
* programming note.
*/
- initial_allocation_mask = ~0x0;
-
- /* register mask is in BE format */
- pkey_amr_uamor_mask = ~0x0ul;
- pkey_iamr_mask = ~0x0ul;
-
for (i = 2; i < (pkeys_total - os_reserved); i++) {
initial_allocation_mask &= ~(0x1 << i);
pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread)
if (static_branch_likely(&pkey_disabled))
return;
- write_amr(read_amr() & pkey_amr_uamor_mask);
- write_iamr(read_iamr() & pkey_iamr_mask);
- write_uamor(read_uamor() & pkey_amr_uamor_mask);
+ thread->amr = read_amr() & pkey_amr_uamor_mask;
+ thread->iamr = read_iamr() & pkey_iamr_mask;
+ thread->uamor = read_uamor() & pkey_amr_uamor_mask;
}
static inline bool pkey_allows_readwrite(int pkey)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 13cfe41..66577cc 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,7 @@
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
+#include <linux/context_tracking.h>
#include <linux/mm_types.h>
#include <asm/udbg.h>
@@ -340,3 +341,110 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
}
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+ int bpsize, int ssize)
+{
+ unsigned long flags, vsid_data, esid_data;
+ enum slb_index index;
+ int slb_cache_index;
+
+ /*
+ * We are irq disabled, hence should be safe to access PACA.
+ */
+ index = get_paca()->stab_rr;
+
+ /*
+ * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+ */
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+
+ get_paca()->stab_rr = index;
+
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+ vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+ ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+ : "memory");
+
+ /*
+ * Now update slb cache entries
+ */
+ slb_cache_index = get_paca()->slb_cache_ptr;
+ if (slb_cache_index < SLB_CACHE_ENTRIES) {
+ /*
+ * We have space in slb cache for optimized switch_slb().
+ * Top 36 bits from esid_data as per ISA
+ */
+ get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+ get_paca()->slb_cache_ptr++;
+ } else {
+ /*
+ * Our cache is full and the current cache content strictly
+ * doesn't indicate the active SLB conents. Bump the ptr
+ * so that switch_slb() will ignore the cache.
+ */
+ get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ }
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long vsid;
+ int bpsize;
+
+ /*
+ * We are always above 1TB, hence use high user segment size.
+ */
+ vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+ bpsize = get_slice_psize(mm, ea);
+ insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+ unsigned long ea = regs->dar;
+ int context;
+
+ if (REGION_ID(ea) != USER_REGION_ID)
+ goto slb_bad_addr;
+
+ /*
+ * Are we beyound what the page table layout supports ?
+ */
+ if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ goto slb_bad_addr;
+
+ /* Lower address should have been handled by asm code */
+ if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+ goto slb_bad_addr;
+
+ /*
+ * consider this as bad access if we take a SLB miss
+ * on an address above addr limit.
+ */
+ if (ea >= current->mm->context.slb_addr_limit)
+ goto slb_bad_addr;
+
+ context = get_ea_context(&current->mm->context, ea);
+ if (!context)
+ goto slb_bad_addr;
+
+ handle_multi_context_slb_miss(context, ea);
+ exception_exit(prev_state);
+ return;
+
+slb_bad_addr:
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ else
+ bad_page_fault(regs, ea, SIGSEGV);
+ exception_exit(prev_state);
+}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3..a83fbd2 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
*/
_GLOBAL(slb_allocate)
/*
- * check for bad kernel/user address
- * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ * Check if the address falls within the range of the first context, or
+ * if we may need to handle multi context. For the first context we
+ * allocate the slb entry via the fast path below. For large address we
+ * branch out to C-code and see if additional contexts have been
+ * allocated.
+ * The test here is:
+ * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
*/
- rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+ rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
bne- 8f
srdi r9,r3,60 /* get region */
@@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
5:
/*
* Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
+ * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
*/
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
+ srdi r11,r10,1 /* index */
+ addi r9,r11,PACALOWSLICESPSIZE
+ lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
+ rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
6:
sldi r11,r11,2 /* index * 4 */
/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5..9cd87d1 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,32 +37,25 @@
#include <asm/hugetlb.h>
static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
#ifdef DEBUG
int _slice_debug = 1;
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
{
if (!_slice_debug)
return;
- pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
- pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+ pr_devel("%s low_slice: %*pbl\n", label,
+ (int)SLICE_NUM_LOW, &mask->low_slices);
+ pr_devel("%s high_slice: %*pbl\n", label,
+ (int)SLICE_NUM_HIGH, mask->high_slices);
}
#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
#else
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
#define slice_dbg(fmt...)
#endif
@@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long end = start + len - 1;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- (1u << GET_LOW_SLICE_INDEX(start));
@@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
start = SLICE_LOW_TOP;
+#endif
return !slice_area_is_free(mm, start, end - start);
}
@@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
unsigned long i;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
@@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
- unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
{
- unsigned char *hpsizes;
- int index, mask_index;
- unsigned long i;
- u64 lpsizes;
-
- ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &mm->context.mask_64k;
+#endif
+ if (psize == MMU_PAGE_4K)
+ return &mm->context.mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &mm->context.mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &mm->context.mask_16g;
+#endif
+ BUG();
+}
+#elif defined(CONFIG_PPC_8xx)
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ if (psize == mmu_virtual_psize)
+ return &mm->context.mask_base_psize;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_512K)
+ return &mm->context.mask_512k;
+ if (psize == MMU_PAGE_8M)
+ return &mm->context.mask_8m;
+#endif
+ BUG();
+}
+#else
+#error "Must define the slice masks for page sizes supported by the platform"
+#endif
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret->low_slices |= 1u << i;
+static bool slice_check_range_fits(struct mm_struct *mm,
+ const struct slice_mask *available,
+ unsigned long start, unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ u64 low_slices = 0;
- if (high_limit <= SLICE_LOW_TOP)
- return;
+ if (start < SLICE_LOW_TOP) {
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- __set_bit(i, ret->high_slices);
+ low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
}
-}
+ if ((low_slices & available->low_slices) != low_slices)
+ return false;
-static int slice_check_fit(struct mm_struct *mm,
- struct slice_mask mask, struct slice_mask available)
-{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
- /*
- * Make sure we just do bit compare only to the max
- * addr limit and not the full bit map size.
- */
- unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
+ if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+ unsigned long i;
- bitmap_and(result, mask.high_slices,
- available.high_slices, slice_count);
+ for (i = start_index; i < start_index + count; i++) {
+ if (!test_bit(i, available->high_slices))
+ return false;
+ }
+ }
- return (mask.low_slices & available.low_slices) == mask.low_slices &&
- bitmap_equal(result, mask.high_slices, slice_count);
+ return true;
}
static void slice_flush_segments(void *parm)
{
+#ifdef CONFIG_PPC64
struct mm_struct *mm = parm;
unsigned long flags;
@@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm)
local_irq_save(flags);
slb_flush_and_rebolt();
local_irq_restore(flags);
+#endif
}
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+ const struct slice_mask *mask, int psize)
{
int index, mask_index;
/* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *psize_mask, *old_mask;
unsigned long i, flags;
+ int old_psize;
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
slice_print_mask(" mask", mask);
+ psize_mask = slice_mask_for_size(mm, psize);
+
/* We need to use a spinlock here to protect against
* concurrent 64k -> 4k demotion ...
*/
spin_lock_irqsave(&slice_convert_lock, flags);
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ if (!(mask->low_slices & (1u << i)))
+ continue;
+
+ mask_index = i & 0x1;
+ index = i >> 1;
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ /* Update the slice_mask */
+ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask->low_slices &= ~(1u << i);
+ psize_mask->low_slices |= 1u << i;
+
+ /* Update the sizes array */
+ lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
hpsizes = mm->context.high_slices_psize;
for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ if (!test_bit(i, mask->high_slices))
+ continue;
+
mask_index = i & 0x1;
index = i >> 1;
- if (test_bit(i, mask.high_slices))
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
+
+ /* Update the slice_mask */
+ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ __clear_bit(i, old_mask->high_slices);
+ __set_bit(i, psize_mask->high_slices);
+
+ /* Update the sizes array */
+ hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
}
@@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
* 'available' slice_mark.
*/
static bool slice_scan_available(unsigned long addr,
- struct slice_mask available,
- int end,
- unsigned long *boundary_addr)
+ const struct slice_mask *available,
+ int end, unsigned long *boundary_addr)
{
unsigned long slice;
if (addr < SLICE_LOW_TOP) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
- return !!(available.low_slices & (1u << slice));
+ return !!(available->low_slices & (1u << slice));
} else {
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!test_bit(slice, available.high_slices);
+ return !!test_bit(slice, available->high_slices);
}
}
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
- struct slice_mask mask, int psize,
+ const struct slice_mask *mask, int psize,
int topdown, unsigned long high_limit)
{
if (topdown)
@@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
}
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_copy_mask(struct slice_mask *dst,
+ const struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices |= src->low_slices;
- bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ dst->low_slices = src->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
}
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices &= ~src->low_slices;
+ dst->low_slices = src1->low_slices | src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
- bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+static inline void slice_andnot_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
+{
+ dst->low_slices = src1->low_slices & ~src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
}
#ifdef CONFIG_PPC_64K_PAGES
@@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
int topdown)
{
- struct slice_mask mask;
struct slice_mask good_mask;
struct slice_mask potential_mask;
- struct slice_mask compat_mask;
+ const struct slice_mask *maskp;
+ const struct slice_mask *compat_maskp = NULL;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long page_size = 1UL << pshift;
@@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
}
if (high_limit > mm->context.slb_addr_limit) {
+ /*
+ * Increasing the slb_addr_limit does not require
+ * slice mask cache to be recalculated because it should
+ * be already initialised beyond the old address limit.
+ */
mm->context.slb_addr_limit = high_limit;
+
on_each_cpu(slice_flush_segments, mm, 1);
}
- /*
- * init different masks
- */
- mask.low_slices = 0;
- bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-
- /* silence stupid warning */;
- potential_mask.low_slices = 0;
- bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-
- compat_mask.low_slices = 0;
- bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-
/* Sanity checks */
BUG_ON(mm->task_size == 0);
BUG_ON(mm->context.slb_addr_limit == 0);
@@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- slice_mask_for_size(mm, psize, &good_mask, high_limit);
- slice_print_mask(" good_mask", good_mask);
+ maskp = slice_mask_for_size(mm, psize);
/*
* Here "good" means slices that are already the right page size,
@@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* search in good | compat | free, found => convert free.
*/
-#ifdef CONFIG_PPC_64K_PAGES
- /* If we support combo pages, we can allow 64k pages in 4k slices */
- if (psize == MMU_PAGE_64K) {
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
+ /*
+ * If we support combo pages, we can allow 64k pages in 4k slices
+ * The mask copies could be avoided in most cases here if we had
+ * a pointer to good mask for the next code to use.
+ */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
if (fixed)
- slice_or_mask(&good_mask, &compat_mask);
+ slice_or_mask(&good_mask, maskp, compat_maskp);
+ else
+ slice_copy_mask(&good_mask, maskp);
+ } else {
+ slice_copy_mask(&good_mask, maskp);
}
-#endif
+
+ slice_print_mask(" good_mask", &good_mask);
+ if (compat_maskp)
+ slice_print_mask(" compat_mask", compat_maskp);
/* First check hint if it's valid or if we have MAP_FIXED */
if (addr != 0 || fixed) {
- /* Build a mask for the requested range */
- slice_range_to_mask(addr, len, &mask);
- slice_print_mask(" mask", mask);
-
/* Check if we fit in the good mask. If we do, we just return,
* nothing else to do
*/
- if (slice_check_fit(mm, mask, good_mask)) {
+ if (slice_check_range_fits(mm, &good_mask, addr, len)) {
slice_dbg(" fits good !\n");
- return addr;
+ newaddr = addr;
+ goto return_addr;
}
} else {
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask,
+ newaddr = slice_find_area(mm, len, &good_mask,
psize, topdown, high_limit);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
*/
slice_dbg(" found area at 0x%lx\n", newaddr);
- return newaddr;
+ goto return_addr;
}
}
/*
@@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* empty and thus can be converted
*/
slice_mask_for_free(mm, &potential_mask, high_limit);
- slice_or_mask(&potential_mask, &good_mask);
- slice_print_mask(" potential", potential_mask);
+ slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+ slice_print_mask(" potential", &potential_mask);
- if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
- slice_dbg(" fits potential !\n");
- goto convert;
+ if (addr != 0 || fixed) {
+ if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+ slice_dbg(" fits potential !\n");
+ newaddr = addr;
+ goto convert;
+ }
}
/* If we have MAP_FIXED and failed the above steps, then error out */
@@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask,
- psize, topdown, high_limit);
- if (addr != -ENOMEM) {
- slice_dbg(" found area at 0x%lx\n", addr);
- return addr;
+ newaddr = slice_find_area(mm, len, &good_mask,
+ psize, topdown, high_limit);
+ if (newaddr != -ENOMEM) {
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ goto return_addr;
}
}
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
#ifdef CONFIG_PPC_64K_PAGES
- if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+ if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
- slice_or_mask(&potential_mask, &compat_mask);
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
}
#endif
- if (addr == -ENOMEM)
+ if (newaddr == -ENOMEM)
return -ENOMEM;
- slice_range_to_mask(addr, len, &mask);
- slice_dbg(" found potential area at 0x%lx\n", addr);
- slice_print_mask(" mask", mask);
+ slice_range_to_mask(newaddr, len, &potential_mask);
+ slice_dbg(" found potential area at 0x%lx\n", newaddr);
+ slice_print_mask(" mask", &potential_mask);
convert:
- slice_andnot_mask(&mask, &good_mask);
- slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
- slice_convert(mm, mask, psize);
+ /*
+ * Try to allocate the context before we do slice convert
+ * so that we handle the context allocation failure gracefully.
+ */
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+
+ slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+ if (compat_maskp && !fixed)
+ slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+ if (potential_mask.low_slices ||
+ (SLICE_NUM_HIGH &&
+ !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+ slice_convert(mm, &potential_mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
}
- return addr;
+ return newaddr;
+return_addr:
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+ return newaddr;
}
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
@@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- unsigned char *hpsizes;
+ unsigned char *psizes;
int index, mask_index;
- /*
- * Radix doesn't use slice, but can get enabled along with MMU_SLICE
- */
- if (radix_enabled()) {
-#ifdef CONFIG_PPC_64K_PAGES
- return MMU_PAGE_64K;
-#else
- return MMU_PAGE_4K;
-#endif
- }
+ VM_BUG_ON(radix_enabled());
+
if (addr < SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
+ psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+void slice_init_new_context_exec(struct mm_struct *mm)
{
- int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
- unsigned int old_psize;
- int i;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *mask;
+ unsigned int psize = mmu_virtual_psize;
- slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+ slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
- VM_BUG_ON(radix_enabled());
- spin_lock_irqsave(&slice_convert_lock, flags);
-
- old_psize = mm->context.user_psize;
- slice_dbg(" old_psize=%d\n", old_psize);
- if (old_psize == psize)
- goto bail;
+ /*
+ * In the case of exec, use the default limit. In the
+ * case of fork it is just inherited from the mm being
+ * duplicated.
+ */
+#ifdef CONFIG_PPC64
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#else
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
mm->context.user_psize = psize;
- wmb();
+ /*
+ * Set all slice psizes to the default.
+ */
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
-
-
-
- slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
- bail:
- spin_unlock_irqrestore(&slice_convert_lock, flags);
+ /*
+ * Slice mask cache starts zeroed, fill the default size cache.
+ */
+ mask = slice_mask_for_size(mm, psize);
+ mask->low_slices = ~0UL;
+ if (SLICE_NUM_HIGH)
+ bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
}
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
VM_BUG_ON(radix_enabled());
slice_range_to_mask(start, len, &mask);
- slice_convert(mm, mask, psize);
+ slice_convert(mm, &mask, psize);
}
#ifdef CONFIG_HUGETLB_PAGE
@@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
* for now as we only use slices with hugetlbfs enabled. This should
* be fixed as the generic code gets fixed.
*/
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
- struct slice_mask mask, available;
+ const struct slice_mask *maskp;
unsigned int psize = mm->context.user_psize;
- unsigned long high_limit = mm->context.slb_addr_limit;
- if (radix_enabled())
- return 0;
+ VM_BUG_ON(radix_enabled());
- slice_range_to_mask(addr, len, &mask);
- slice_mask_for_size(mm, psize, &available, high_limit);
+ maskp = slice_mask_for_size(mm, psize);
#ifdef CONFIG_PPC_64K_PAGES
/* We need to account for 4k slices too */
if (psize == MMU_PAGE_64K) {
- struct slice_mask compat_mask;
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
- slice_or_mask(&available, &compat_mask);
+ const struct slice_mask *compat_maskp;
+ struct slice_mask available;
+
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ slice_or_mask(&available, maskp, compat_maskp);
+ return !slice_check_range_fits(mm, &available, addr, len);
}
#endif
-#if 0 /* too verbose */
- slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
- mm, addr, len);
- slice_print_mask(" mask", mask);
- slice_print_mask(" available", available);
-#endif
- return !slice_check_fit(mm, mask, available);
+ return !slice_check_range_fits(mm, maskp, addr, len);
}
#endif
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a07f537..2fba617 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
rb |= set << PPC_BITLSHIFT(51);
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -668,7 +668,7 @@ void radix__flush_tlb_all(void)
rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
prs = 0; /* partition scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
asm volatile("ptesync": : :"memory");
@@ -706,7 +706,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
- unsigned int pid = mm->context.id;
+ unsigned long pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
- if (paca[sib].kvm_hstate.kvm_vcpu)
+ if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 9b23f12..87d71dd 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 44d67b1..2668cc4 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
/* Create cached_info and set spu_info[spu->number] to point to it.
* spu->number is a system-wide value, not a per-node value.
*/
- info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
index c579b16..f40e373 100644
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
unsigned int size, unsigned int offset, unsigned int guard_ptr,
unsigned int guard_val)
{
- struct vma_to_fileoffset_map *new =
- kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
+ struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
if (!new) {
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
__func__, __LINE__);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 57ebc65..82986d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
-obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
+obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f8908ea..3f66fcf 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
+
+ if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+ is_kernel_addr(mfspr(SPRN_SDAR)))
+ *addrp = 0;
}
static bool regs_sihv(struct pt_regs *regs)
@@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
/* invalid entry */
continue;
+ /*
+ * BHRB rolling buffer could very much contain the kernel
+ * addresses at this point. Check the privileges before
+ * exporting it to userspace (avoid exposure of regions
+ * where we could have speculative execution)
+ */
+ if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+ is_kernel_addr(addr))
+ continue;
+
/* Branches are read most recent first (ie. mfbhrb 0 is
* the most recent branch).
* There are two types of valid entries:
@@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu)
*/
write_mmcr0(cpuhw, val);
mb();
+ isync();
/*
* Disable instruction sampling if it was enabled
@@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu)
mtspr(SPRN_MMCRA,
cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
mb();
+ isync();
}
cpuhw->disabled = 1;
cpuhw->n_added = 0;
ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+ /*
+ * These are readable by userspace, may contain kernel
+ * addresses and are not switched by context switch, so clear
+ * them now to avoid leaking anything to userspace in general
+ * including to another process.
+ */
+ if (ppmu->flags & PPMU_ARCH_207S) {
+ mtspr(SPRN_SDAR, 0);
+ mtspr(SPRN_SIAR, 0);
+ }
+#endif
}
local_irq_restore(flags);
@@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
+static bool is_event_blacklisted(u64 ev)
+{
+ int i;
+
+ for (i=0; i < ppmu->n_blacklist_ev; i++) {
+ if (ppmu->blacklist_ev[i] == ev)
+ return true;
+ }
+
+ return false;
+}
+
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
@@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event)
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return -EOPNOTSUPP;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return err;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
case PERF_TYPE_RAW:
ev = event->attr.config;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
default:
return -ENOENT;
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
deleted file mode 100644
index ce6072f..0000000
--- a/arch/powerpc/perf/power4-pmu.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
- *
- * Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <linux/string.h>
-#include <asm/reg.h>
-#include <asm/cputable.h>
-
-/*
- * Bits in event code for POWER4
- */
-#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
-#define PM_PMC_MSK 0xf
-#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
-#define PM_UNIT_MSK 0xf
-#define PM_LOWER_SH 6
-#define PM_LOWER_MSK 1
-#define PM_LOWER_MSKS 0x40
-#define PM_BYTE_SH 4 /* Byte number of event bus to use */
-#define PM_BYTE_MSK 3
-#define PM_PMCSEL_MSK 7
-
-/*
- * Unit code values
- */
-#define PM_FPU 1
-#define PM_ISU1 2
-#define PM_IFU 3
-#define PM_IDU0 4
-#define PM_ISU1_ALT 6
-#define PM_ISU2 7
-#define PM_IFU_ALT 8
-#define PM_LSU0 9
-#define PM_LSU1 0xc
-#define PM_GPS 0xf
-
-/*
- * Bits in MMCR0 for POWER4
- */
-#define MMCR0_PMC1SEL_SH 8
-#define MMCR0_PMC2SEL_SH 1
-#define MMCR_PMCSEL_MSK 0x1f
-
-/*
- * Bits in MMCR1 for POWER4
- */
-#define MMCR1_TTM0SEL_SH 62
-#define MMCR1_TTC0SEL_SH 61
-#define MMCR1_TTM1SEL_SH 59
-#define MMCR1_TTC1SEL_SH 58
-#define MMCR1_TTM2SEL_SH 56
-#define MMCR1_TTC2SEL_SH 55
-#define MMCR1_TTM3SEL_SH 53
-#define MMCR1_TTC3SEL_SH 52
-#define MMCR1_TTMSEL_MSK 3
-#define MMCR1_TD_CP_DBG0SEL_SH 50
-#define MMCR1_TD_CP_DBG1SEL_SH 48
-#define MMCR1_TD_CP_DBG2SEL_SH 46
-#define MMCR1_TD_CP_DBG3SEL_SH 44
-#define MMCR1_DEBUG0SEL_SH 43
-#define MMCR1_DEBUG1SEL_SH 42
-#define MMCR1_DEBUG2SEL_SH 41
-#define MMCR1_DEBUG3SEL_SH 40
-#define MMCR1_PMC1_ADDER_SEL_SH 39
-#define MMCR1_PMC2_ADDER_SEL_SH 38
-#define MMCR1_PMC6_ADDER_SEL_SH 37
-#define MMCR1_PMC5_ADDER_SEL_SH 36
-#define MMCR1_PMC8_ADDER_SEL_SH 35
-#define MMCR1_PMC7_ADDER_SEL_SH 34
-#define MMCR1_PMC3_ADDER_SEL_SH 33
-#define MMCR1_PMC4_ADDER_SEL_SH 32
-#define MMCR1_PMC3SEL_SH 27
-#define MMCR1_PMC4SEL_SH 22
-#define MMCR1_PMC5SEL_SH 17
-#define MMCR1_PMC6SEL_SH 12
-#define MMCR1_PMC7SEL_SH 7
-#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
-
-static short mmcr1_adder_bits[8] = {
- MMCR1_PMC1_ADDER_SEL_SH,
- MMCR1_PMC2_ADDER_SEL_SH,
- MMCR1_PMC3_ADDER_SEL_SH,
- MMCR1_PMC4_ADDER_SEL_SH,
- MMCR1_PMC5_ADDER_SEL_SH,
- MMCR1_PMC6_ADDER_SEL_SH,
- MMCR1_PMC7_ADDER_SEL_SH,
- MMCR1_PMC8_ADDER_SEL_SH
-};
-
-/*
- * Bits in MMCRA
- */
-#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
-
-/*
- * Layout of constraint bits:
- * 6666555555555544444444443333333333222222222211111111110000000000
- * 3210987654321098765432109876543210987654321098765432109876543210
- * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
- * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
- * \SMPL ||\TTC3SEL
- * |\TTC_IFU_SEL
- * \TTM2SEL0
- *
- * SMPL - SAMPLE_ENABLE constraint
- * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
- *
- * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
- * 55: UC1 error 0x0080_0000_0000_0000
- * 54: FPU events needed 0x0040_0000_0000_0000
- * 53: ISU1 events needed 0x0020_0000_0000_0000
- * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
- *
- * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
- * 51: UC2 error 0x0008_0000_0000_0000
- * 50: FPU events needed 0x0004_0000_0000_0000
- * 49: IFU events needed 0x0002_0000_0000_0000
- * 48: LSU0 events needed 0x0001_0000_0000_0000
- *
- * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
- * 47: UC3 error 0x8000_0000_0000
- * 46: LSU0 events needed 0x4000_0000_0000
- * 45: IFU events needed 0x2000_0000_0000
- * 44: IDU0|ISU2 events needed 0x1000_0000_0000
- * 43: ISU1 events needed 0x0800_0000_0000
- *
- * TTM2SEL0
- * 42: 0 = IDU0 events needed
- * 1 = ISU2 events needed 0x0400_0000_0000
- *
- * TTC_IFU_SEL
- * 41: 0 = IFU.U events needed
- * 1 = IFU.L events needed 0x0200_0000_0000
- *
- * TTC3SEL
- * 40: 0 = LSU1.U events needed
- * 1 = LSU1.L events needed 0x0100_0000_0000
- *
- * PS1
- * 39: PS1 error 0x0080_0000_0000
- * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
- *
- * PS2
- * 35: PS2 error 0x0008_0000_0000
- * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
- *
- * B0
- * 28-31: Byte 0 event source 0xf000_0000
- * 1 = FPU
- * 2 = ISU1
- * 3 = IFU
- * 4 = IDU0
- * 7 = ISU2
- * 9 = LSU0
- * c = LSU1
- * f = GPS
- *
- * B1, B2, B3
- * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
- *
- * P8
- * 15: P8 error 0x8000
- * 14-15: Count of events needing PMC8
- *
- * P1..P7
- * 0-13: Count of events needing PMC1..PMC7
- *
- * Note: this doesn't allow events using IFU.U to be combined with events
- * using IFU.L, though that is feasible (using TTM0 and TTM2). However
- * there are no listed events for IFU.L (they are debug events not
- * verified for performance monitoring) so this shouldn't cause a
- * problem.
- */
-
-static struct unitinfo {
- unsigned long value, mask;
- int unit;
- int lowerbit;
-} p4_unitinfo[16] = {
- [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
- [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_ISU1_ALT] =
- { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IFU_ALT] =
- { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
- [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
- [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
- [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
- [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
-};
-
-static unsigned char direct_marked_event[8] = {
- (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
- (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
- (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
- (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
- (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
- (1<<3) | (1<<4) | (1<<5),
- /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
- (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
- (1<<4), /* PMC8: PM_MRK_LSU_FIN */
-};
-
-/*
- * Returns 1 if event counts things relating to marked instructions
- * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
- */
-static int p4_marked_instr_event(u64 event)
-{
- int pmc, psel, unit, byte, bit;
- unsigned int mask;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- psel = event & PM_PMCSEL_MSK;
- if (pmc) {
- if (direct_marked_event[pmc - 1] & (1 << psel))
- return 1;
- if (psel == 0) /* add events */
- bit = (pmc <= 4)? pmc - 1: 8 - pmc;
- else if (psel == 6) /* decode events */
- bit = 4;
- else
- return 0;
- } else
- bit = psel;
-
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- mask = 0;
- switch (unit) {
- case PM_LSU1:
- if (event & PM_LOWER_MSKS)
- mask = 1 << 28; /* byte 7 bit 4 */
- else
- mask = 6 << 24; /* byte 3 bits 1 and 2 */
- break;
- case PM_LSU0:
- /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
- mask = 0x083dff00;
- }
- return (mask >> (byte * 8 + bit)) & 1;
-}
-
-static int p4_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
-{
- int pmc, byte, unit, lower, sh;
- unsigned long mask = 0, value = 0;
- int grp = -1;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc > 8)
- return -1;
- sh = (pmc - 1) * 2;
- mask |= 2 << sh;
- value |= 1 << sh;
- grp = ((pmc - 1) >> 1) & 1;
- }
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- if (unit) {
- lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
-
- /*
- * Bus events on bytes 0 and 2 can be counted
- * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
- */
- if (!pmc)
- grp = byte & 1;
-
- if (!p4_unitinfo[unit].unit)
- return -1;
- mask |= p4_unitinfo[unit].mask;
- value |= p4_unitinfo[unit].value;
- sh = p4_unitinfo[unit].lowerbit;
- if (sh > 1)
- value |= (unsigned long)lower << sh;
- else if (lower != sh)
- return -1;
- unit = p4_unitinfo[unit].unit;
-
- /* Set byte lane select field */
- mask |= 0xfULL << (28 - 4 * byte);
- value |= (unsigned long)unit << (28 - 4 * byte);
- }
- if (grp == 0) {
- /* increment PMC1/2/5/6 field */
- mask |= 0x8000000000ull;
- value |= 0x1000000000ull;
- } else {
- /* increment PMC3/4/7/8 field */
- mask |= 0x800000000ull;
- value |= 0x100000000ull;
- }
-
- /* Marked instruction events need sample_enable set */
- if (p4_marked_instr_event(event)) {
- mask |= 1ull << 56;
- value |= 1ull << 56;
- }
-
- /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
- if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
- mask |= 1ull << 56;
-
- *maskp = mask;
- *valp = value;
- return 0;
-}
-
-static unsigned int ppc_inst_cmpl[] = {
- 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
-};
-
-static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-{
- int i, j, na;
-
- alt[0] = event;
- na = 1;
-
- /* 2 possibilities for PM_GRP_DISP_REJECT */
- if (event == 0x8003 || event == 0x0224) {
- alt[1] = event ^ (0x8003 ^ 0x0224);
- return 2;
- }
-
- /* 2 possibilities for PM_ST_MISS_L1 */
- if (event == 0x0c13 || event == 0x0c23) {
- alt[1] = event ^ (0x0c13 ^ 0x0c23);
- return 2;
- }
-
- /* several possibilities for PM_INST_CMPL */
- for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
- if (event == ppc_inst_cmpl[i]) {
- for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
- if (j != i)
- alt[na++] = ppc_inst_cmpl[j];
- break;
- }
- }
-
- return na;
-}
-
-static int p4_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
-{
- unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
- unsigned int pmc, unit, byte, psel, lower;
- unsigned int ttm, grp;
- unsigned int pmc_inuse = 0;
- unsigned int pmc_grp_use[2];
- unsigned char busbyte[4];
- unsigned char unituse[16];
- unsigned int unitlower = 0;
- int i;
-
- if (n_ev > 8)
- return -1;
-
- /* First pass to count resource use */
- pmc_grp_use[0] = pmc_grp_use[1] = 0;
- memset(busbyte, 0, sizeof(busbyte));
- memset(unituse, 0, sizeof(unituse));
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc_inuse & (1 << (pmc - 1)))
- return -1;
- pmc_inuse |= 1 << (pmc - 1);
- /* count 1/2/5/6 vs 3/4/7/8 use */
- ++pmc_grp_use[((pmc - 1) >> 1) & 1];
- }
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
- if (unit) {
- if (!pmc)
- ++pmc_grp_use[byte & 1];
- if (unit == 6 || unit == 8)
- /* map alt ISU1/IFU codes: 6->2, 8->3 */
- unit = (unit >> 1) - 1;
- if (busbyte[byte] && busbyte[byte] != unit)
- return -1;
- busbyte[byte] = unit;
- lower <<= unit;
- if (unituse[unit] && lower != (unitlower & lower))
- return -1;
- unituse[unit] = 1;
- unitlower |= lower;
- }
- }
- if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
- return -1;
-
- /*
- * Assign resources and set multiplexer selects.
- *
- * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
- * Each TTMx can only select one unit, but since
- * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
- * we have some choices.
- */
- if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
- unituse[6] = 1; /* Move 2 to 6 */
- unituse[2] = 0;
- }
- if (unituse[3] & (unituse[1] | unituse[2])) {
- unituse[8] = 1; /* Move 3 to 8 */
- unituse[3] = 0;
- unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
- }
- /* Check only one unit per TTMx */
- if (unituse[1] + unituse[2] + unituse[3] > 1 ||
- unituse[4] + unituse[6] + unituse[7] > 1 ||
- unituse[8] + unituse[9] > 1 ||
- (unituse[5] | unituse[10] | unituse[11] |
- unituse[13] | unituse[14]))
- return -1;
-
- /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
- mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
- << MMCR1_TTM0SEL_SH;
- mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
- << MMCR1_TTM1SEL_SH;
- mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
-
- /* Set TTCxSEL fields. */
- if (unitlower & 0xe)
- mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
- if (unitlower & 0xf0)
- mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
- if (unitlower & 0xf00)
- mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
- if (unitlower & 0x7000)
- mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
-
- /* Set byte lane select fields. */
- for (byte = 0; byte < 4; ++byte) {
- unit = busbyte[byte];
- if (!unit)
- continue;
- if (unit == 0xf) {
- /* special case for GPS */
- mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
- } else {
- if (!unituse[unit])
- ttm = unit - 1; /* 2->1, 3->2 */
- else
- ttm = unit >> 2;
- mmcr1 |= (unsigned long)ttm
- << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
- }
- }
-
- /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- psel = event[i] & PM_PMCSEL_MSK;
- if (!pmc) {
- /* Bus event or 00xxx direct event (off or cycles) */
- if (unit)
- psel |= 0x10 | ((byte & 2) << 2);
- for (pmc = 0; pmc < 8; ++pmc) {
- if (pmc_inuse & (1 << pmc))
- continue;
- grp = (pmc >> 1) & 1;
- if (unit) {
- if (grp == (byte & 1))
- break;
- } else if (pmc_grp_use[grp] < 4) {
- ++pmc_grp_use[grp];
- break;
- }
- }
- pmc_inuse |= 1 << pmc;
- } else {
- /* Direct event */
- --pmc;
- if (psel == 0 && (byte & 2))
- /* add events on higher-numbered bus */
- mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
- else if (psel == 6 && byte == 3)
- /* seem to need to set sample_enable here */
- mmcra |= MMCRA_SAMPLE_ENABLE;
- psel |= 8;
- }
- if (pmc <= 1)
- mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
- else
- mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
- if (pmc == 7) /* PMC8 */
- mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
- hwc[i] = pmc;
- if (p4_marked_instr_event(event[i]))
- mmcra |= MMCRA_SAMPLE_ENABLE;
- }
-
- if (pmc_inuse & 1)
- mmcr0 |= MMCR0_PMC1CE;
- if (pmc_inuse & 0xfe)
- mmcr0 |= MMCR0_PMCjCE;
-
- mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
-
- /* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
- return 0;
-}
-
-static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
- /*
- * Setting the PMCxSEL field to 0 disables PMC x.
- * (Note that pmc is 0-based here, not 1-based.)
- */
- if (pmc <= 1) {
- mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
- } else {
- mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
- if (pmc == 7)
- mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
- }
-}
-
-static int p4_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = 7,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
- [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-/*
- * Table of generalized cache-related events.
- * 0 means not supported, -1 means nonsensical, other values
- * are event codes.
- */
-static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
- [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x8c10, 0x3c10 },
- [C(OP_WRITE)] = { 0x7c10, 0xc13 },
- [C(OP_PREFETCH)] = { 0xc35, 0 },
- },
- [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { 0, 0 },
- },
- [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { 0, 0 },
- [C(OP_PREFETCH)] = { 0xc34, 0 },
- },
- [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x904 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x900 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x330, 0x331 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { -1, -1 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
-};
-
-static struct power_pmu power4_pmu = {
- .name = "POWER4/4+",
- .n_counter = 8,
- .max_alternatives = 5,
- .add_fields = 0x0000001100005555ul,
- .test_adder = 0x0011083300000000ul,
- .compute_mmcr = p4_compute_mmcr,
- .get_constraint = p4_get_constraint,
- .get_alternatives = p4_get_alternatives,
- .disable_pmc = p4_disable_pmc,
- .n_generic = ARRAY_SIZE(p4_generic_events),
- .generic_events = p4_generic_events,
- .cache_events = &power4_cache_events,
- .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
-};
-
-static int __init init_power4_pmu(void)
-{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
- return -ENODEV;
-
- return register_power_pmu(&power4_pmu);
-}
-
-early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index e99c6bf..7de344b 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT, 0x10012)
EVENT(PM_BR_2PATH, 0x20036)
/* ALternate branch event that are not strongly biased */
EVENT(PM_BR_2PATH_ALT, 0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2, 0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056)
+EVENT(PM_FLOP_CMPL, 0x100f4)
+EVENT(PM_MRK_NTF_FIN, 0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024)
+EVENT(PM_IFETCH_THROTTLE, 0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c)
+EVENT(PM_DTLB_MISS_16G, 0x1c058)
+EVENT(PM_DERAT_MISS_2M, 0x1c05a)
+EVENT(PM_DTLB_MISS_2M, 0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c)
+EVENT(PM_DTLB_MISS_4K, 0x2c056)
+EVENT(PM_DERAT_MISS_1G, 0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e)
+EVENT(PM_DTLB_MISS_64K, 0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156)
+EVENT(PM_DTLB_MISS_16M, 0x4c056)
+EVENT(PM_DTLB_MISS_1G, 0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 24b5b5b..2ca0b33 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -101,9 +101,45 @@ enum {
#define POWER9_MMCRA_IFM2 0x0000000080000000UL
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS 0x00002000
+
/* PowerISA v2.07 format attribute structure*/
extern struct attribute_group isa207_pmu_format_group;
+int p9_dd21_bl_ev[] = {
+ PM_MRK_ST_DONE_L2,
+ PM_RADIX_PWC_L1_HIT,
+ PM_FLOP_CMPL,
+ PM_MRK_NTF_FIN,
+ PM_RADIX_PWC_L2_HIT,
+ PM_IFETCH_THROTTLE,
+ PM_MRK_L2_TM_ST_ABORT_SISTER,
+ PM_RADIX_PWC_L3_HIT,
+ PM_RUN_CYC_SMT2_MODE,
+ PM_TM_TX_PASS_RUN_INST,
+ PM_DISP_HELD_SYNC_HOLD,
+};
+
+int p9_dd22_bl_ev[] = {
+ PM_DTLB_MISS_16G,
+ PM_DERAT_MISS_2M,
+ PM_DTLB_MISS_2M,
+ PM_MRK_DTLB_MISS_1G,
+ PM_DTLB_MISS_4K,
+ PM_DERAT_MISS_1G,
+ PM_MRK_DERAT_MISS_2M,
+ PM_MRK_DTLB_MISS_4K,
+ PM_MRK_DTLB_MISS_16G,
+ PM_DTLB_MISS_64K,
+ PM_MRK_DERAT_MISS_1G,
+ PM_MRK_DTLB_MISS_64K,
+ PM_DISP_HELD_SYNC_HOLD,
+ PM_DTLB_MISS_16M,
+ PM_DTLB_MISS_1G,
+ PM_MRK_DTLB_MISS_16M,
+};
+
/* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
{ PM_INST_DISP, PM_INST_DISP_ALT },
@@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = {
static int __init init_power9_pmu(void)
{
int rc = 0;
+ unsigned int pvr = mfspr(SPRN_PVR);
/* Comes from cpu_specs[] */
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
return -ENODEV;
+ /* Blacklist events */
+ if (!(pvr & PVR_POWER9_CUMULUS)) {
+ if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+ power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+ } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+ power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+ }
+ }
+
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
/*
* Since PM_INST_CMPL may not provide right counts in all
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index d50417e..96aaae6 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
- msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
+ msi = kzalloc(sizeof(*msi), GFP_KERNEL);
if (!msi) {
dev_err(&dev->dev, "No memory for MSI structure\n");
return -ENOMEM;
@@ -241,7 +241,8 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
if (!msi_irqs)
return -ENODEV;
- if (ppc4xx_setup_pcieh_hw(dev, res, msi))
+ err = ppc4xx_setup_pcieh_hw(dev, res, msi);
+ if (err)
goto error_out;
err = ppc4xx_msi_init_allocator(dev, msi);
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37..69d9f60 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
if (IS_ERR_VALUE(offset))
continue;
- ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
+ ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
if (!ocm_blk) {
printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
rh_free(ocm_reg->rh, offset);
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index f51fd35..7e966f4 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -147,7 +147,7 @@ static void qoriq_cpu_kill(unsigned int cpu)
for (i = 0; i < 500; i++) {
if (is_cpu_dead(cpu)) {
#ifdef CONFIG_PPC64
- paca[cpu].cpu_start = 0;
+ paca_ptrs[cpu]->cpu_start = 0;
#endif
return;
}
@@ -328,7 +328,7 @@ static int smp_85xx_kick_cpu(int nr)
return ret;
done:
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
generic_set_cpu_up(nr);
return ret;
@@ -409,14 +409,14 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
}
if (disable_threadbit) {
- while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
now = mftb();
if (!notified && now - start > 1000000) {
pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
__func__, smp_processor_id(),
disable_cpu,
- paca[disable_cpu].kexec_state);
+ paca_ptrs[disable_cpu]->kexec_state);
notified = true;
}
}
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index e1274db..2188d69 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd)
static void cpm_cascade(struct irq_desc *desc)
{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- int cascade_irq = cpm_get_irq();
-
- if (cascade_irq >= 0)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
+ generic_handle_irq(cpm_get_irq());
}
/* Initialize the internal interrupt controllers. The number of
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a429d85..67d3125 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,7 +61,7 @@ choice
help
There are two families of 64 bit PowerPC chips supported.
The most common ones are the desktop and server CPUs
- (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+ (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
The other are the "embedded" processors compliant with the
"Book 3E" variant of the architecture
@@ -87,7 +87,6 @@ endchoice
choice
prompt "CPU selection"
depends on PPC64
- default POWER8_CPU if CPU_LITTLE_ENDIAN
default GENERIC_CPU
help
This will create a kernel which is optimised for a particular CPU.
@@ -96,17 +95,18 @@ choice
If unsure, select Generic.
config GENERIC_CPU
- bool "Generic"
+ bool "Generic (POWER4 and above)"
depends on !CPU_LITTLE_ENDIAN
+config GENERIC_CPU
+ bool "Generic (POWER8 and above)"
+ depends on CPU_LITTLE_ENDIAN
+ select ARCH_HAS_FAST_MULTIPLIER
+
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-config POWER4_CPU
- bool "POWER4"
- depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-
config POWER5_CPU
bool "POWER5"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
@@ -125,6 +125,11 @@ config POWER8_CPU
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+config POWER9_CPU
+ bool "POWER9"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+
config E5500_CPU
bool "Freescale e5500"
depends on E500
@@ -326,6 +331,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
default y if PPC_BOOK3S_64
+ default y if PPC_8xx && HUGETLB_PAGE
default n
config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 6ea3f24..326d34e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device)
pr_devel("axon_msi: setting up dn %pOF\n", dn);
- msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+ msic = kzalloc(sizeof(*msic), GFP_KERNEL);
if (!msic) {
printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
dn);
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f84d52a..1aeac57 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -83,7 +83,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
pcpu = get_hard_smp_processor_id(lcpu);
/* Fixup atomic count: it exited inside IRQ handler. */
- task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
/*
* If the RTAS start-cpu token does not exist then presume the
@@ -126,7 +126,7 @@ static int smp_cell_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
return 0;
}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index d1e61e2..1200d0d 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
np);
- priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
pr_err("SPIDERPCI-IOWA:"
"Can't allocate struct spiderpci_iowa_private");
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
index b847e94..d9de848 100644
--- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa)
struct spu_lscsa *lscsa;
unsigned char *p;
- lscsa = vzalloc(sizeof(struct spu_lscsa));
+ lscsa = vzalloc(sizeof(*lscsa));
if (!lscsa)
return -ENOMEM;
csa->lscsa = lscsa;
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index ade8382..7206f3f 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base)
out_be32(io_base + FLIPPER_ICR, 0xffffffff);
}
-struct irq_domain * __init flipper_pic_init(struct device_node *np)
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
{
struct device_node *pi;
struct irq_domain *irq_domain = NULL;
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
index 7feb325..5c7e7ce 100644
--- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -169,7 +169,7 @@ static int ug_getc(void)
/*
* Transmits a character.
*/
-void ug_udbg_putc(char ch)
+static void ug_udbg_putc(char ch)
{
ug_putc(ch);
}
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 3fd683e..8bb46dc 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -44,6 +44,7 @@
#define HW_GPIO_BASE(idx) (idx * 0x20)
#define HW_GPIO_OUT(idx) (HW_GPIO_BASE(idx) + 0)
#define HW_GPIO_DIR(idx) (HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER (HW_GPIO_BASE(1) + 0x1c)
#define HW_GPIO_SHUTDOWN (1<<1)
#define HW_GPIO_SLOT_LED (1<<5)
@@ -79,21 +80,9 @@ void __init wii_memory_fixups(void)
BUG_ON(memblock.memory.cnt != 2);
BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
- /* trim unaligned tail */
- memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
- (phys_addr_t)ULLONG_MAX);
-
- /* determine hole, add & reserve them */
+ /* determine hole */
wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
wii_hole_size = p[1].base - wii_hole_start;
- memblock_add(wii_hole_start, wii_hole_size);
- memblock_reserve(wii_hole_start, wii_hole_size);
-
- BUG_ON(memblock.memory.cnt != 1);
- __memblock_dump_all();
-
- /* allow ioremapping the address space in the hole */
- __allow_ioremap_reserved = 1;
}
unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
@@ -176,6 +165,12 @@ static void wii_power_off(void)
local_irq_disable();
if (hw_gpio) {
+ /*
+ * set the owner of the shutdown pin to ARM, because it is
+ * accessed through the registers for the ARM, below
+ */
+ clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
/* make sure that the poweroff GPIO is configured as output */
setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
@@ -239,7 +234,7 @@ static int __init wii_device_probe(void)
if (!machine_is(wii))
return 0;
- of_platform_bus_probe(NULL, wii_of_bus, NULL);
+ of_platform_populate(NULL, wii_of_bus, NULL, NULL);
return 0;
}
device_initcall(wii_device_probe);
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 3408f31..fa89f30 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
const u32 *psteps, *prate, *addrp;
u32 steps;
- host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
if (host == NULL) {
printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
np);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index df3c93b..e0462fe 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
while (length >= 12) {
/* Allocate a structure */
- func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL);
+ func = kzalloc(sizeof(*func), GFP_KERNEL);
if (func == NULL)
goto bail;
kref_init(&func->ref);
@@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np,
return -EBUSY;
}
- dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (dev == NULL) {
DBG("pmf: no memory !\n");
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6c9d519..703a350 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
-obj-$(CONFIG_PPC_FTW) += nx-ftw.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 33c86c1..ddfc354 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
dev_pe = dev_pe->parent;
while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE |
- EEH_STATE_DMA_ACTIVE);
-
ret = eeh_ops->get_state(dev_pe, NULL);
- if (ret <= 0 || (ret & active_flags) == active_flags) {
+ if (ret <= 0 || eeh_state_active(ret)) {
dev_pe = dev_pe->parent;
continue;
}
@@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
struct eeh_pe *phb_pe, *parent_pe;
__be64 frozen_pe_no;
__be16 err_type, severity;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
long rc;
int state, ret = EEH_NEXT_ERR_NONE;
@@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
/* Frozen parent PE ? */
state = eeh_ops->get_state(parent_pe, NULL);
- if (state > 0 &&
- (state & active_flags) != active_flags)
+ if (state > 0 && !eeh_state_active(state))
*pe = parent_pe;
/* Next parent level */
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 443d5ca..1f12ab1 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -24,6 +24,7 @@
#include <asm/code-patching.h>
#include <asm/smp.h>
#include <asm/runlatch.h>
+#include <asm/dbell.h>
#include "powernv.h"
#include "subcore.h"
@@ -80,7 +81,7 @@ static int pnv_save_sprs_for_deep_states(void)
for_each_possible_cpu(cpu) {
uint64_t pir = get_hard_smp_processor_id(cpu);
- uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+ uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
if (rc != 0)
@@ -173,12 +174,12 @@ static void pnv_alloc_idle_core_states(void)
for (j = 0; j < threads_per_core; j++) {
int cpu = first_cpu + j;
- paca[cpu].core_idle_state_ptr = core_idle_state;
- paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
- paca[cpu].thread_mask = 1 << j;
+ paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
+ paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
+ paca_ptrs[cpu]->thread_mask = 1 << j;
if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
continue;
- paca[cpu].thread_sibling_pacas =
+ paca_ptrs[cpu]->thread_sibling_pacas =
kmalloc_node(paca_ptr_array_size,
GFP_KERNEL, node);
}
@@ -387,6 +388,78 @@ void power9_idle(void)
power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+ int cpu, cpu0, thr;
+ int awake_threads = 1; /* this thread is awake */
+ int poke_threads = 0;
+ int need_awake = threads_per_core;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+ /* order setting dont_stop vs testing requested_psscr */
+ mb();
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (!paca_ptrs[cpu0+thr]->requested_psscr)
+ ++awake_threads;
+ else
+ poke_threads |= (1 << thr);
+ }
+
+ /* If at least 3 threads are awake, the core is in SMT4 already */
+ if (awake_threads < need_awake) {
+ /* We have to wake some threads; we'll use msgsnd */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (poke_threads & (1 << thr)) {
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+ paca_ptrs[cpu0+thr]->hw_cpu_id);
+ }
+ }
+ /* now spin until at least 3 threads are awake */
+ do {
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if ((poke_threads & (1 << thr)) &&
+ !paca_ptrs[cpu0+thr]->requested_psscr) {
+ ++awake_threads;
+ poke_threads &= ~(1 << thr);
+ }
+ }
+ } while (awake_threads < need_awake);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+ int cpu, cpu0, thr;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+
+ /* clear all the dont_stop flags */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
#ifdef CONFIG_HOTPLUG_CPU
static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
{
@@ -434,7 +507,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
- srr1 = power9_idle_stop(psscr);
+ srr1 = power9_offline_stop(psscr);
} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
@@ -749,7 +822,8 @@ static int __init pnv_init_idle_states(void)
for (i = 0; i < threads_per_core; i++) {
int j = base_cpu + i;
- paca[j].thread_sibling_pacas[idx] = &paca[cpu];
+ paca_ptrs[j]->thread_sibling_pacas[idx] =
+ paca_ptrs[cpu];
}
}
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 0a253b6..69a4f9e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -410,6 +410,11 @@ struct npu_context {
void *priv;
};
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
/*
* Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
* if none are available.
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
int i;
for (i = 0; i < npu->mmio_atsd_count; i++) {
- if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
+ if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
return i;
}
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
static void put_mmio_atsd_reg(struct npu *npu, int reg)
{
- clear_bit(reg, &npu->mmio_atsd_usage);
+ clear_bit_unlock(reg, &npu->mmio_atsd_usage);
}
/* MMIO ATSD register offsets */
#define XTS_ATSD_AVA 1
#define XTS_ATSD_STAT 2
-static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
- unsigned long va)
+static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
+ unsigned long launch, unsigned long va)
{
- int mmio_atsd_reg;
-
- do {
- mmio_atsd_reg = get_mmio_atsd_reg(npu);
- cpu_relax();
- } while (mmio_atsd_reg < 0);
+ struct npu *npu = mmio_atsd_reg->npu;
+ int reg = mmio_atsd_reg->reg;
__raw_writeq(cpu_to_be64(va),
- npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
+ npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
eieio();
- __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
-
- return mmio_atsd_reg;
+ __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long pid, bool flush)
{
+ int i;
unsigned long launch;
- /* IS set to invalidate matching PID */
- launch = PPC_BIT(12);
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* IS set to invalidate matching PID */
+ launch = PPC_BIT(12);
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
+ /* PRS set to process-scoped */
+ launch |= PPC_BIT(13);
- /* AP */
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* AP */
+ launch |= (u64)
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
- /* Invalidating the entire process doesn't use a va */
- return mmio_launch_invalidate(npu, launch, 0);
+ /* Invalidating the entire process doesn't use a va */
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
+ }
}
-static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid, bool flush)
+static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long va, unsigned long pid, bool flush)
{
+ int i;
unsigned long launch;
- /* IS set to invalidate target VA */
- launch = 0;
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
- /* PRS set to process scoped */
- launch |= PPC_BIT(13);
+ /* IS set to invalidate target VA */
+ launch = 0;
- /* AP */
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* PRS set to process scoped */
+ launch |= PPC_BIT(13);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* AP */
+ launch |= (u64)
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- return mmio_launch_invalidate(npu, launch, va);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
+ }
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
-struct mmio_atsd_reg {
- struct npu *npu;
- int reg;
-};
-
static void mmio_invalidate_wait(
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
{
struct npu *npu;
int i, reg;
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
reg = mmio_atsd_reg[i].reg;
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
cpu_relax();
+ }
+}
- put_mmio_atsd_reg(npu, reg);
+/*
+ * Acquires all the address translation shootdown (ATSD) registers required to
+ * launch an ATSD on all links this npu_context is active on.
+ */
+static void acquire_atsd_reg(struct npu_context *npu_context,
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+ int i, j;
+ struct npu *npu;
+ struct pci_dev *npdev;
+ struct pnv_phb *nphb;
+ for (i = 0; i <= max_npu2_index; i++) {
+ mmio_atsd_reg[i].reg = -1;
+ for (j = 0; j < NV_MAX_LINKS; j++) {
+ /*
+ * There are no ordering requirements with respect to
+ * the setup of struct npu_context, but to ensure
+ * consistent behaviour we need to ensure npdev[][] is
+ * only read once.
+ */
+ npdev = READ_ONCE(npu_context->npdev[i][j]);
+ if (!npdev)
+ continue;
+
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
+ npu = &nphb->npu;
+ mmio_atsd_reg[i].npu = npu;
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+ while (mmio_atsd_reg[i].reg < 0) {
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+ cpu_relax();
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
+ * must be released in the same order they were acquired above in
+ * acquire_atsd_reg.
+ */
+static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+ int i;
+
+ for (i = 0; i <= max_npu2_index; i++) {
/*
- * The GPU requires two flush ATSDs to ensure all entries have
- * been flushed. We use PID 0 as it will never be used for a
- * process on the GPU.
+ * We can't rely on npu_context->npdev[][] being the same here
+ * as when acquire_atsd_reg() was called, hence we use the
+ * values stored in mmio_atsd_reg during the acquire phase
+ * rather than re-reading npdev[][].
*/
- if (flush)
- mmio_invalidate_pid(npu, 0, true);
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
}
}
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
static void mmio_invalidate(struct npu_context *npu_context, int va,
unsigned long address, bool flush)
{
- int i, j;
- struct npu *npu;
- struct pnv_phb *nphb;
- struct pci_dev *npdev;
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
* Loop over all the NPUs this process is active on and launch
* an invalidate.
*/
- for (i = 0; i <= max_npu2_index; i++) {
- mmio_atsd_reg[i].reg = -1;
- for (j = 0; j < NV_MAX_LINKS; j++) {
- npdev = npu_context->npdev[i][j];
- if (!npdev)
- continue;
-
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
- mmio_atsd_reg[i].npu = npu;
-
- if (va)
- mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid,
- flush);
- else
- mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid, flush);
-
- /*
- * The NPU hardware forwards the shootdown to all GPUs
- * so we only have to launch one shootdown per NPU.
- */
- break;
- }
+ acquire_atsd_reg(npu_context, mmio_atsd_reg);
+ if (va)
+ mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+ else
+ mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+
+ mmio_invalidate_wait(mmio_atsd_reg);
+ if (flush) {
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+ mmio_invalidate_wait(mmio_atsd_reg);
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+ mmio_invalidate_wait(mmio_atsd_reg);
}
-
- mmio_invalidate_wait(mmio_atsd_reg, flush);
- if (flush)
- /* Wait for the flush to complete */
- mmio_invalidate_wait(mmio_atsd_reg, false);
+ release_atsd_reg(mmio_atsd_reg);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
+ nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+ if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+ &nvlink_index)))
+ return ERR_PTR(-ENODEV);
+
if (!mm || mm->context.id == 0) {
/*
* Kernel thread contexts are not supported and context id 0 is
@@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
npu_context = mm->context.npu_context;
if (!npu_context) {
+ rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
- if (!npu_context)
- return ERR_PTR(-ENOMEM);
+ if (npu_context) {
+ kref_init(&npu_context->kref);
+ npu_context->mm = mm;
+ npu_context->mn.ops = &nv_nmmu_notifier_ops;
+ rc = __mmu_notifier_register(&npu_context->mn, mm);
+ }
+
+ if (rc) {
+ kfree(npu_context);
+ opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+ PCI_DEVID(gpdev->bus->number,
+ gpdev->devfn));
+ return ERR_PTR(rc);
+ }
mm->context.npu_context = npu_context;
- npu_context->mm = mm;
- npu_context->mn.ops = &nv_nmmu_notifier_ops;
- __mmu_notifier_register(&npu_context->mn, mm);
- kref_init(&npu_context->kref);
} else {
- kref_get(&npu_context->kref);
+ WARN_ON(!kref_get_unless_zero(&npu_context->kref));
}
npu_context->release_cb = cb;
npu_context->priv = priv;
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return ERR_PTR(-ENODEV);
- npu_context->npdev[npu->index][nvlink_index] = npdev;
+
+ /*
+ * npdev is a pci_dev pointer setup by the PCI code. We assign it to
+ * npdev[][] to indicate to the mmu notifiers that an invalidation
+ * should also be sent over this nvlink. The notifiers don't use any
+ * other fields in npu_context, so we just need to ensure that when they
+ * deference npu_context->npdev[][] it is either a valid pointer or
+ * NULL.
+ */
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
if (!nphb->npu.nmmu_flush) {
/*
@@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return;
- npu_context->npdev[npu->index][nvlink_index] = NULL;
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
kref_put(&npu_context->kref, pnv_npu2_release_context);
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 2fa3ac8..b370151 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -303,26 +303,9 @@ invalid_img:
return rc;
}
-/* Return CPUs to OPAL before starting FW update */
-static void flash_return_cpu(void *info)
-{
- int cpu = smp_processor_id();
-
- if (!cpu_online(cpu))
- return;
-
- /* Disable IRQ */
- hard_irq_disable();
-
- /* Return the CPU to OPAL */
- opal_return_cpu();
-}
-
/* This gets called just before system reboots */
-void opal_flash_term_callback(void)
+void opal_flash_update_print_message(void)
{
- struct cpumask mask;
-
if (update_flash_data.status != FLASH_IMG_READY)
return;
@@ -333,15 +316,6 @@ void opal_flash_term_callback(void)
/* Small delay to help getting the above message out */
msleep(500);
-
- /* Return secondary CPUs to firmware */
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
- if (!cpumask_empty(&mask))
- smp_call_function_many(&mask,
- flash_return_cpu, NULL, false);
- /* Hard disable interrupts */
- hard_irq_disable();
}
/*
@@ -418,12 +392,12 @@ static int alloc_image_buf(char *buffer, size_t count)
void *addr;
int size;
- if (count < sizeof(struct image_header_t)) {
+ if (count < sizeof(image_header)) {
pr_warn("FLASH: Invalid candidate image\n");
return -EINVAL;
}
- memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+ memcpy(&image_header, (void *)buffer, sizeof(image_header));
image_data.size = be32_to_cpu(image_header.size);
pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index c9e1a4f..4efc95b 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb,
pr_err("HMI: out of memory, Opal message event not handled\n");
return -ENOMEM;
}
- memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+ memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
list_add(&msg_node->list, &opal_hmi_evt_list);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index f6f55ab..2a14fda 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node,
if (nr_chips <= 0)
return -ENODEV;
- base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+ base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
if (!base_addr_arr)
return -ENOMEM;
- chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+ chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
if (!chipid_arr)
return -ENOMEM;
@@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node,
nr_chips))
goto error;
- pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
- GFP_KERNEL);
+ pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+ GFP_KERNEL);
if (!pmu_ptr->mem_info)
goto error;
@@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
u32 offset;
/* memory for pmu */
- pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
if (!pmu_ptr)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 8ddc1ac..dcb42bc 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb,
"handled\n");
return -ENOMEM;
}
- memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+ memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
spin_lock_irqsave(&opal_mem_err_lock, flags);
list_add(&msg_node->list, &opal_memory_err_list);
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 9db4398..ba2ff06 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -59,6 +59,10 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
}
+
+ if (rc)
+ return -EIO;
+
*index += count;
return count;
}
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index 7313b7f..74986b3 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -136,7 +136,7 @@ void __init opal_psr_init(void)
return;
}
- psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+ psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
GFP_KERNEL);
if (!psr_attrs)
return;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 7e5a235..541c9ea 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void)
if (!nr_attrs)
continue;
- sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+ sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
GFP_KERNEL);
if (!sgs[i].sgattrs)
goto out_sgs_sgattrs;
sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
- sizeof(struct attribute *),
+ sizeof(*sgs[i].sg.attrs),
GFP_KERNEL);
if (!sgs[i].sg.attrs) {
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 1b2936b..3da30c2 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 81c0a94..22d5e11 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
__func__, dev);
return SCOM_MAP_INVALID;
}
- m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+ m = kmalloc(sizeof(*m), GFP_KERNEL);
if (!m)
return NULL;
m->chip = be32_to_cpup(gcid);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c151827..48fbb41 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
* opal to trigger checkstop explicitly for error analysis.
* The FSP PRD component would have already got notified
* about this error through other channels.
+ * 4. We are running on a newer skiboot that by default does
+ * not cause a checkstop, drops us back to the kernel to
+ * extract context and state at the time of the error.
*/
- ppc_md.restart(NULL);
+ panic(msg);
}
int opal_machine_check(struct pt_regs *regs)
@@ -821,6 +824,9 @@ static int __init opal_init(void)
/* Create i2c platform devices */
opal_pdev_init("ibm,opal-i2c");
+ /* Handle non-volatile memory devices */
+ opal_pdev_init("pmem-region");
+
/* Setup a heatbeat thread if requested by OPAL */
opal_init_heartbeat();
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a0..cee003d 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
#include "pci.h"
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
- return of_node_get(hose->dn);
-}
-EXPORT_SYMBOL(pnv_pci_get_phb_node);
-
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index a6c92c7..3f9c69d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe(
static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
int num, struct iommu_table *tbl)
{
+ struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+ int num2 = (num == 0) ? 1 : 0;
long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
if (ret)
return ret;
- ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
- if (ret)
+ if (table_group->tables[num2])
+ pnv_npu_unset_window(npe, num2);
+
+ ret = pnv_npu_set_window(npe, num, tbl);
+ if (ret) {
pnv_pci_ioda2_unset_window(table_group, num);
+ if (table_group->tables[num2])
+ pnv_npu_set_window(npe, num2,
+ table_group->tables[num2]);
+ }
return ret;
}
@@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window(
struct iommu_table_group *table_group,
int num)
{
+ struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+ int num2 = (num == 0) ? 1 : 0;
long ret = pnv_pci_ioda2_unset_window(table_group, num);
if (ret)
return ret;
- return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+ if (!npe->table_group.tables[num])
+ return 0;
+
+ ret = pnv_npu_unset_window(npe, num);
+ if (ret)
+ return ret;
+
+ if (table_group->tables[num2])
+ ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
+
+ return ret;
}
static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
@@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb_id = be64_to_cpup(prop64);
pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
- phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+ phb = memblock_virt_alloc(sizeof(*phb), 0);
/* Allocate PCI controller */
phb->hose = hose = pcibios_alloc_controller(np);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 69d102c..b265ecc 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/iommu.h>
+#include <linux/sched/mm.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -38,6 +39,7 @@
#include "pci.h"
static DEFINE_MUTEX(p2p_mutex);
+static DEFINE_MUTEX(tunnel_mutex);
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
{
@@ -1092,6 +1094,139 @@ out:
}
EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind)
+{
+ struct device_node *np;
+ const __be32 *prop;
+ struct pnv_ioda_pe *pe;
+ uint16_t window_id;
+ int rc;
+
+ if (!radix_enabled())
+ return -ENXIO;
+
+ if (!(np = pnv_pci_get_phb_node(dev)))
+ return -ENXIO;
+
+ prop = of_get_property(np, "ibm,phb-indications", NULL);
+ of_node_put(np);
+
+ if (!prop || !prop[1])
+ return -ENXIO;
+
+ *asnind = (u64)be32_to_cpu(prop[1]);
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ /* Increase real window size to accept as_notify messages. */
+ window_id = (pe->pe_number << 1 ) + 1;
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number,
+ window_id, pe->tce_bypass_base,
+ (uint64_t)1 << 48);
+ return opal_error_code(rc);
+}
+EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel);
+
+int pnv_pci_disable_tunnel(struct pci_dev *dev)
+{
+ struct pnv_ioda_pe *pe;
+
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ /* Restore default real window size. */
+ pnv_pci_ioda2_set_bypass(pe, true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+ __be64 val;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ u64 tunnel_bar;
+ int rc;
+
+ if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+ if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+
+ hose = pci_bus_to_host(dev->bus);
+ phb = hose->private_data;
+
+ mutex_lock(&tunnel_mutex);
+ rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+ tunnel_bar = be64_to_cpu(val);
+ if (enable) {
+ /*
+ * Only one device per PHB can use atomics.
+ * Our policy is first-come, first-served.
+ */
+ if (tunnel_bar) {
+ if (tunnel_bar != addr)
+ rc = -EBUSY;
+ else
+ rc = 0; /* Setting same address twice is ok */
+ goto out;
+ }
+ } else {
+ /*
+ * The device that owns atomics and wants to release
+ * them must pass the same address with enable == 0.
+ */
+ if (tunnel_bar != addr) {
+ rc = -EPERM;
+ goto out;
+ }
+ addr = 0x0ULL;
+ }
+ rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+ rc = opal_error_code(rc);
+out:
+ mutex_unlock(&tunnel_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+#ifdef CONFIG_PPC64 /* for thread.tidr */
+int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid,
+ u32 *tid)
+{
+ struct mm_struct *mm = NULL;
+
+ if (task == NULL)
+ return -EINVAL;
+
+ mm = get_task_mm(task);
+ if (mm == NULL)
+ return -EINVAL;
+
+ *pid = mm->context.id;
+ mmput(mm);
+
+ *tid = task->thread.tidr;
+ *lpid = mfspr(SPRN_LPID);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info);
+#endif
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 092715b..ef8c9ce 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -38,57 +38,92 @@
#include <asm/smp.h>
#include <asm/tm.h>
#include <asm/setup.h>
+#include <asm/security_features.h>
#include "powernv.h"
+
+static bool fw_feature_is(const char *state, const char *name,
+ struct device_node *fw_features)
+{
+ struct device_node *np;
+ bool rc = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ rc = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+
+ return rc;
+}
+
+static void init_fw_feat_flags(struct device_node *np)
+{
+ if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ */
+ if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+ if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
static void pnv_setup_rfi_flush(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
- int enable;
+ bool enable;
/* Default to fallback in case fw-features are not available */
type = L1D_FLUSH_FALLBACK;
- enable = 1;
np = of_find_node_by_name(NULL, "ibm,opal");
fw_features = of_get_child_by_name(np, "fw-features");
of_node_put(np);
if (fw_features) {
- np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
- if (np && of_property_read_bool(np, "enabled"))
- type = L1D_FLUSH_MTTRIG;
+ init_fw_feat_flags(fw_features);
+ of_node_put(fw_features);
- of_node_put(np);
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ type = L1D_FLUSH_MTTRIG;
- np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
- if (np && of_property_read_bool(np, "enabled"))
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
type = L1D_FLUSH_ORI;
-
- of_node_put(np);
-
- /* Enable unless firmware says NOT to */
- enable = 2;
- np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
- if (np && of_property_read_bool(np, "disabled"))
- enable--;
-
- of_node_put(np);
-
- np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
- if (np && of_property_read_bool(np, "disabled"))
- enable--;
-
- np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
- if (np && of_property_read_bool(np, "disabled"))
- enable = 0;
-
- of_node_put(np);
- of_node_put(fw_features);
}
- setup_rfi_flush(type, enable > 0);
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+ setup_rfi_flush(type, enable);
}
static void __init pnv_setup_arch(void)
@@ -166,17 +201,12 @@ static void pnv_prepare_going_down(void)
*/
opal_event_shutdown();
- /* Soft disable interrupts */
- local_irq_disable();
+ /* Print flash update message if one is scheduled. */
+ opal_flash_update_print_message();
- /*
- * Return secondary CPUs to firwmare if a flash update
- * is pending otherwise we will get all sort of error
- * messages about CPU being stuck etc.. This will also
- * have the side effect of hard disabling interrupts so
- * past this point, the kernel is effectively dead.
- */
- opal_flash_term_callback();
+ smp_send_stop();
+
+ hard_irq_disable();
}
static void __noreturn pnv_restart(char *cmd)
@@ -258,7 +288,7 @@ static void pnv_kexec_wait_secondaries_down(void)
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter OPAL\n",
- i, paca[i].hw_cpu_id);
+ i, paca_ptrs[i]->hw_cpu_id);
notified = i;
}
@@ -270,7 +300,7 @@ static void pnv_kexec_wait_secondaries_down(void)
if (timeout-- == 0) {
printk(KERN_ERR "kexec: timed out waiting for "
"cpu %d (physical %d) to enter OPAL\n",
- i, paca[i].hw_cpu_id);
+ i, paca_ptrs[i]->hw_cpu_id);
break;
}
}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 9664c84..19af6de 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -80,7 +80,7 @@ static int pnv_smp_kick_cpu(int nr)
* If we already started or OPAL is not supported, we just
* kick the CPU via the PACA
*/
- if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+ if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
goto kick;
/*
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 596ae2e..4556300 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -280,7 +280,7 @@ void update_subcore_sibling_mask(void)
int offset = (tid / threads_per_subcore) * threads_per_subcore;
int mask = sibling_mask_first_cpu << offset;
- paca[cpu].subcore_sibling_mask = mask;
+ paca_ptrs[cpu]->subcore_sibling_mask = mask;
}
}
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index ca22f1e..4f7276e 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window)
return;
-free_name:
- kfree(window->dbgname);
- window->dbgname = NULL;
-
remove_dir:
debugfs_remove_recursive(window->dbgdir);
window->dbgdir = NULL;
+
+free_name:
+ kfree(window->dbgname);
+ window->dbgname = NULL;
}
void vas_instance_init_dbgdir(struct vas_instance *vinst)
{
struct dentry *d;
+ vas_init_dbgdir();
if (!vas_debugfs)
return;
@@ -201,8 +202,18 @@ free_name:
vinst->dbgdir = NULL;
}
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
void vas_init_dbgdir(void)
{
+ static bool first_time = true;
+
+ if (!first_time)
+ return;
+
+ first_time = false;
vas_debugfs = debugfs_create_dir("vas", NULL);
if (IS_ERR(vas_debugfs))
vas_debugfs = NULL;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644
index 0000000..a449b9f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT( vas_rx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_rx_win_attr *rxattr),
+
+ TP_ARGS(tsk, vasid, cop, rxattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_rx_win_attr *, rxattr)
+ __field(int, lnotify_lpid)
+ __field(int, lnotify_pid)
+ __field(int, lnotify_tid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lnotify_lpid = rxattr->lnotify_lpid;
+ __entry->lnotify_pid = rxattr->lnotify_pid;
+ __entry->lnotify_tid = rxattr->lnotify_tid;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lnotify_lpid, __entry->lnotify_pid,
+ __entry->lnotify_tid)
+);
+
+TRACE_EVENT( vas_tx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_tx_win_attr *txattr),
+
+ TP_ARGS(tsk, vasid, cop, txattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_tx_win_attr *, txattr)
+ __field(int, lpid)
+ __field(int, pidr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lpid = txattr->lpid;
+ __entry->pidr = txattr->pidr;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT( vas_paste_crb,
+
+ TP_PROTO(struct task_struct *tsk,
+ struct vas_window *win),
+
+ TP_ARGS(tsk, win),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(struct vas_window *, win)
+ __field(int, pid)
+ __field(int, vasid)
+ __field(int, winid)
+ __field(unsigned long, paste_kaddr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = win->vinst->vas_id;
+ __entry->winid = win->winid;
+ __entry->paste_kaddr = (unsigned long)win->paste_kaddr
+ ),
+
+ TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+ __entry->pid, __entry->vasid, __entry->winid,
+ __entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index b7c53a5..ff9f488 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -21,6 +21,9 @@
#include "vas.h"
#include "copy-paste.h"
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
/*
* Compute the paste address region for the window @window using the
* ->paste_base_addr and ->paste_win_id_shift we got from device tree.
@@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
struct vas_winctx winctx;
struct vas_instance *vinst;
+ trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
if (!rx_win_args_valid(cop, rxattr))
return ERR_PTR(-EINVAL);
@@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
struct vas_winctx winctx;
struct vas_instance *vinst;
+ trace_vas_tx_win_open(current, vasid, cop, attr);
+
if (!tx_win_args_valid(cop, attr))
return ERR_PTR(-EINVAL);
@@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
void *addr;
uint64_t val;
+ trace_vas_paste_crb(current, txwin);
+
/*
* Only NX windows are supported for now and hardware assumes
* report-enable flag is set for NX windows. Ensure software
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index aebbe95..5a2b24c 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -160,8 +160,6 @@ static int __init vas_init(void)
int found = 0;
struct device_node *dn;
- vas_init_dbgdir();
-
platform_driver_register(&vas_driver);
for_each_compatible_node(dn, NULL, "ibm,vas") {
@@ -169,8 +167,10 @@ static int __init vas_init(void)
found++;
}
- if (!found)
+ if (!found) {
+ platform_driver_unregister(&vas_driver);
return -ENODEV;
+ }
pr_devel("Found %d instances\n", found);
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 7f870ec..8c7009d 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
int result;
struct dma_chunk *c;
- c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c) {
result = -ENOMEM;
goto fail_alloc;
@@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
- c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c) {
result = -ENOMEM;
goto fail_alloc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 652d3e96..6ef77ca 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -234,7 +234,7 @@ static void pseries_cpu_die(unsigned int cpu)
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
*/
- paca[cpu].cpu_start = 0;
+ paca_ptrs[cpu]->cpu_start = 0;
}
/*
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index eeb1342..3fe1267 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -23,7 +23,12 @@
void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
{
- /* Don't risk a hypervisor call if we're crashing */
+ /*
+ * Don't risk a hypervisor call if we're crashing
+ * XXX: Why? The hypervisor is not crashing. It might be better
+ * to at least attempt unregister to avoid the hypervisor stepping
+ * on our memory.
+ */
if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
int ret;
int cpu = smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0ee4a46..adb996e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -99,7 +99,7 @@ void vpa_init(int cpu)
* reports that. All SPLPAR support SLB shadow buffer.
*/
if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
- addr = __pa(paca[cpu].slb_shadow_ptr);
+ addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
ret = register_slb_shadow(hwcpu, addr);
if (ret)
pr_err("WARNING: SLB shadow buffer registration for "
@@ -111,7 +111,7 @@ void vpa_init(int cpu)
/*
* Register dispatch trace log, if one has been allocated.
*/
- pp = &paca[cpu];
+ pp = paca_ptrs[cpu];
dtl = pp->dispatch_log;
if (dtl) {
pp->dtl_ridx = 0;
@@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
want_v = hpte_encode_avpn(vpn, psize, ssize);
- pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
- want_v, slot, flags, psize);
-
flags = (newpp & 7) | H_AVPN;
if (mmu_has_feature(MMU_FTR_KERNEL_RO))
/* Move pp0 into bit 8 (IBM 55) */
flags |= (newpp & HPTE_R_PP0) >> 55;
+ pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+ want_v, slot, flags, psize);
+
lpar_rc = plpar_pte_protect(flags, slot, want_v);
if (lpar_rc == H_NOT_FOUND) {
@@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return 0;
}
-/* Actually only used for radix, so far */
static int pseries_lpar_register_process_table(unsigned long base,
unsigned long page_size, unsigned long table_size)
{
long rc;
- unsigned long flags = PROC_TABLE_NEW;
+ unsigned long flags = 0;
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
if (radix_enabled())
flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
+ else
+ flags |= PROC_TABLE_HPT_SLB;
for (;;) {
rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
page_size, table_size);
@@ -760,6 +763,7 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+ register_process_table = pseries_lpar_register_process_table;
if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 0f7fb71..8a8033a 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -348,6 +348,9 @@ void post_mobility_fixup(void)
printk(KERN_ERR "Post-mobility device tree update "
"failed: %d\n", rc);
+ /* Possibly switch to a new RFI flush type */
+ pseries_setup_rfi_flush();
+
return;
}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1ae1d9f..60db2ee 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs);
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
#else
static inline void smp_init_pseries(void) { };
#endif
@@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
+void pseries_setup_rfi_flush(void);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1a52762..b55ad42 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -68,6 +68,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/kexec.h>
#include <asm/isa-bridge.h>
+#include <asm/security_features.h>
#include "pseries.h"
@@ -246,7 +247,7 @@ static int alloc_dispatch_logs(void)
return 0;
for_each_possible_cpu(cpu) {
- pp = &paca[cpu];
+ pp = paca_ptrs[cpu];
dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
if (!dtl) {
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
@@ -459,36 +460,78 @@ static void __init find_and_init_phbs(void)
of_pci_check_probe_only();
}
-static void pseries_setup_rfi_flush(void)
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+ /*
+ * The features below are disabled by default, so we instead look to see
+ * if firmware has *enabled* them, and set them if so.
+ */
+ if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ */
+ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+ if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_rfi_flush(void)
{
struct h_cpu_char_result result;
enum l1d_flush_type types;
bool enable;
long rc;
- /* Enable by default */
- enable = true;
+ /*
+ * Set features to the defaults assumed by init_cpu_char_feature_flags()
+ * so it can set/clear again any features that might have changed after
+ * migration, and in case the hypercall fails and it is not even called.
+ */
+ powerpc_security_features = SEC_FTR_DEFAULT;
rc = plpar_get_cpu_characteristics(&result);
- if (rc == H_SUCCESS) {
- types = L1D_FLUSH_NONE;
+ if (rc == H_SUCCESS)
+ init_cpu_char_feature_flags(&result);
- if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
- types |= L1D_FLUSH_MTTRIG;
- if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
- types |= L1D_FLUSH_ORI;
+ /*
+ * We're the guest so this doesn't apply to us, clear it to simplify
+ * handling of it elsewhere.
+ */
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
- /* Use fallback if nothing set in hcall */
- if (types == L1D_FLUSH_NONE)
- types = L1D_FLUSH_FALLBACK;
+ types = L1D_FLUSH_FALLBACK;
- if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
- (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
- enable = false;
- } else {
- /* Default to fallback if case hcall is not available */
- types = L1D_FLUSH_FALLBACK;
- }
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ types |= L1D_FLUSH_MTTRIG;
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+ types |= L1D_FLUSH_ORI;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
setup_rfi_flush(types, enable);
}
@@ -739,7 +782,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
/* PAPR says we can't set HYP */
dawrx &= ~DAWRX_HYP;
- return plapr_set_watchpoint0(dawr, dawrx);
+ return plpar_set_watchpoint0(dawr, dawrx);
}
#define CMO_CHARACTERISTICS_TOKEN 44
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 2e18482..3df4612 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -110,7 +110,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
}
/* Fixup atomic count: it exited inside IRQ handler. */
- task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
#ifdef CONFIG_HOTPLUG_CPU
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
goto out;
@@ -165,7 +165,7 @@ static int smp_pSeries_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
#ifdef CONFIG_HOTPLUG_CPU
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
@@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu)
hwcpu = get_hard_smp_processor_id(cpu);
}
- if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS)
+ if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
return 1;
return 0;
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index a6198d4..5ca3e22 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -38,6 +38,7 @@
#include <linux/suspend.h>
#include <linux/memblock.h>
#include <linux/gfp.h>
+#include <linux/kmemleak.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7306780..1d4e0ef6 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
int i;
u32 mask = 0;
- for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+ for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
return mask;
}
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index c4dae27..6243a7e 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/kernel.h>
+#include <linux/kmemleak.h>
#include <linux/bitmap.h>
#include <linux/bootmem.h>
#include <asm/msi_bitmap.h>
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 1459f4e..37bfbc5 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -164,7 +164,7 @@ void icp_native_cause_ipi_rm(int cpu)
* Just like the cause_ipi functions, it is required to
* include a full barrier before causing the IPI.
*/
- xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
mb();
__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 40c0611..34590150 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -246,7 +246,7 @@ notrace void xmon_xive_do_dump(int cpu)
u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
val & XIVE_ESB_VAL_P ? 'P' : 'p',
- val & XIVE_ESB_VAL_P ? 'Q' : 'q');
+ val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
}
#endif
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 82e1a3e..a0842f1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -41,6 +41,7 @@
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include <asm/plpar_wrappers.h>
#include <asm/cputable.h>
#include <asm/rtas.h>
#include <asm/sstep.h>
@@ -61,12 +62,6 @@
#include <asm/paca.h>
#endif
-#if defined(CONFIG_PPC_SPLPAR)
-#include <asm/plpar_wrappers.h>
-#else
-static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; };
-#endif
-
#include "nonstdio.h"
#include "dis-asm.h"
@@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr)
mtspr(SPRN_CIABR, ciabr);
return;
}
- plapr_set_ciabr(ciabr);
+ plpar_set_ciabr(ciabr);
}
/**
@@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr)
return 1;
}
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+ /* Enable xmon hooks if needed */
+ if (!xmon_on) {
+ printf("xmon: Enabling debugger hooks\n");
+ xmon_on = 1;
+ }
+}
+
static char *breakpoint_help_string =
"Breakpoint command usage:\n"
"b show breakpoints\n"
@@ -1297,6 +1302,10 @@ bpt_cmds(void)
static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
int mode;
case 'd': /* bd - hardware data breakpoint */
+ if (!ppc_breakpoint_available()) {
+ printf("Hardware data breakpoint not supported on this cpu\n");
+ break;
+ }
mode = 7;
cmd = inchar();
if (cmd == 'r')
@@ -1315,6 +1324,8 @@ bpt_cmds(void)
dabr.address &= ~HW_BRK_TYPE_DABR;
dabr.enabled = mode | BP_DABR;
}
+
+ force_enable_xmon();
break;
case 'i': /* bi - hardware instr breakpoint */
@@ -1335,6 +1346,7 @@ bpt_cmds(void)
if (bp != NULL) {
bp->enabled |= BP_CIABR;
iabr = bp;
+ force_enable_xmon();
}
break;
#endif
@@ -1399,8 +1411,10 @@ bpt_cmds(void)
if (!check_bp_loc(a))
break;
bp = new_breakpoint(a);
- if (bp != NULL)
+ if (bp != NULL) {
bp->enabled |= BP_TRAP;
+ force_enable_xmon();
+ }
break;
}
}
@@ -2327,7 +2341,7 @@ static void dump_one_paca(int cpu)
catch_memory_errors = 1;
sync();
- p = &paca[cpu];
+ p = paca_ptrs[cpu];
printf("paca for cpu 0x%x @ %px:\n", cpu, p);
@@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq);
#endif /* CONFIG_MAGIC_SYSRQ */
#ifdef CONFIG_DEBUG_FS
+static void clear_all_bpt(void)
+{
+ int i;
+
+ /* clear/unpatch all breakpoints */
+ remove_bpts();
+ remove_cpu_bpts();
+
+ /* Disable all breakpoints */
+ for (i = 0; i < NBPTS; ++i)
+ bpts[i].enabled = 0;
+
+ /* Clear any data or iabr breakpoints */
+ if (iabr || dabr.enabled) {
+ iabr = NULL;
+ dabr.enabled = 0;
+ }
+
+ printf("xmon: All breakpoints cleared\n");
+}
+
static int xmon_dbgfs_set(void *data, u64 val)
{
xmon_on = !!val;
xmon_init(xmon_on);
+ /* make sure all breakpoints removed when disabling */
+ if (!xmon_on)
+ clear_all_bpt();
return 0;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eaee708..32a0d5b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -120,6 +120,7 @@ config S390
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_DEVICES if !SMP
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
@@ -576,7 +577,7 @@ choice
config EXPOLINE_OFF
bool "spectre_v2=off"
-config EXPOLINE_MEDIUM
+config EXPOLINE_AUTO
bool "spectre_v2=auto"
config EXPOLINE_FULL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2ced323..c79936d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -47,9 +47,6 @@ cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
-#KBUILD_IMAGE is necessary for make rpm
-KBUILD_IMAGE :=arch/s390/boot/image
-
#
# Prevent tail-call optimizations, to get clearer backtraces:
#
@@ -84,7 +81,7 @@ ifdef CONFIG_EXPOLINE
CC_FLAGS_EXPOLINE += -mfunction-return=thunk
CC_FLAGS_EXPOLINE += -mindirect-branch-table
export CC_FLAGS_EXPOLINE
- cflags-y += $(CC_FLAGS_EXPOLINE)
+ cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
endif
endif
@@ -126,6 +123,9 @@ tools := arch/s390/tools
all: image bzImage
+#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
+KBUILD_IMAGE := $(boot)/bzImage
+
install: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 26d6a94..5766f7b 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -29,11 +29,16 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
$(call if_changed,ld)
-sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p'
+TRIM_HEAD_SIZE := 0x11000
-quiet_cmd_sizes = GEN $@
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p'
+
+quiet_cmd_sizes = GEN $@
cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+quiet_cmd_trim_head = TRIM $@
+ cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@
+
$(obj)/sizes.h: vmlinux
$(call if_changed,sizes)
@@ -43,10 +48,13 @@ $(obj)/head.o: $(obj)/sizes.h
CFLAGS_misc.o += -I$(objtree)/$(obj)
$(obj)/misc.o: $(obj)/sizes.h
-OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
-$(obj)/vmlinux.bin: vmlinux
+OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S
+$(obj)/vmlinux.bin.full: vmlinux
$(call if_changed,objcopy)
+$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full
+ $(call if_changed,trim_head)
+
vmlinux.bin.all-y := $(obj)/vmlinux.bin
suffix-$(CONFIG_KERNEL_GZIP) := gz
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index 231d149..9f94eca 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -23,12 +23,10 @@ ENTRY(startup_continue)
aghi %r15,-160
brasl %r14,decompress_kernel
# Set up registers for memory mover. We move the decompressed image to
- # 0x11000, starting at offset 0x11000 in the decompressed image so
- # that code living at 0x11000 in the image will end up at 0x11000 in
- # memory.
+ # 0x11000, where startup_continue of the decompressed image is supposed
+ # to be.
lgr %r4,%r2
lg %r2,.Loffset-.LPG1(%r13)
- la %r4,0(%r2,%r4)
lg %r3,.Lmvsize-.LPG1(%r13)
lgr %r5,%r3
# Move the memory mover someplace safe so it doesn't overwrite itself.
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index cecf38b..63838a1 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -27,8 +27,8 @@
/* Symbols defined by linker scripts */
extern char input_data[];
extern int input_len;
-extern char _text, _end;
-extern char _bss, _ebss;
+extern char _end[];
+extern char _bss[], _ebss[];
static void error(char *m);
@@ -144,7 +144,7 @@ unsigned long decompress_kernel(void)
{
void *output, *kernel_end;
- output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE);
+ output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE);
kernel_end = output + SZ__bss_start;
check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
@@ -166,8 +166,8 @@ unsigned long decompress_kernel(void)
* Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
* initialized afterwards since they reside in bss.
*/
- memset(&_bss, 0, &_ebss - &_bss);
- free_mem_ptr = (unsigned long) &_end;
+ memset(_bss, 0, _ebss - _bss);
+ free_mem_ptr = (unsigned long) _end;
free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
__decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 8150132..d43c2db 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
/* Sections to be discarded */
/DISCARD/ : {
*(.eh_frame)
+ *(__ex_table)
*(*__ksymtab*)
}
}
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index d607987..fa9b7dd 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1047,6 +1047,7 @@ static struct aead_alg gcm_aes_aead = {
static struct crypto_alg *aes_s390_algs_ptr[5];
static int aes_s390_algs_num;
+static struct aead_alg *aes_s390_aead_alg;
static int aes_s390_register_alg(struct crypto_alg *alg)
{
@@ -1065,7 +1066,8 @@ static void aes_s390_fini(void)
if (ctrblk)
free_page((unsigned long) ctrblk);
- crypto_unregister_aead(&gcm_aes_aead);
+ if (aes_s390_aead_alg)
+ crypto_unregister_aead(aes_s390_aead_alg);
}
static int __init aes_s390_init(void)
@@ -1123,6 +1125,7 @@ static int __init aes_s390_init(void)
ret = crypto_register_aead(&gcm_aes_aead);
if (ret)
goto out_err;
+ aes_s390_aead_alg = &gcm_aes_aead;
}
return 0;
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 0000000..955d620
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Check the length of an instruction sequence. The length may not be larger
+ * than 254 bytes and it has to be divisible by 2.
+ */
+.macro alt_len_check start,end
+ .if ( \end - \start ) > 254
+ .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
+ .endif
+ .if ( \end - \start ) % 2
+ .error "cpu alternatives instructions length is odd\n"
+ .endif
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .word \feature
+ .byte \orig_end - \orig_start
+ .byte \alt_end - \alt_start
+.endm
+
+/*
+ * Fill up @bytes with nops. The macro emits 6-byte nop instructions
+ * for the bulk of the area, possibly followed by a 4-byte and/or
+ * a 2-byte nop if the size of the area is not divisible by 6.
+ */
+.macro alt_pad_fill bytes
+ .fill ( \bytes ) / 6, 6, 0xc0040000
+ .fill ( \bytes ) % 6 / 4, 4, 0x47000000
+ .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700
+.endm
+
+/*
+ * Fill up @bytes with nops. If the number of bytes is larger
+ * than 6, emit a jg instruction to branch over all nops, then
+ * fill an area of size (@bytes - 6) with nop instructions.
+ */
+.macro alt_pad bytes
+ .if ( \bytes > 0 )
+ .if ( \bytes > 6 )
+ jg . + \bytes
+ alt_pad_fill \bytes - 6
+ .else
+ alt_pad_fill \bytes
+ .endif
+ .endif
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: alt_len_check 770b, 771b
+ alt_len_check 772b, 773b
+ alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
+774: .pushsection .altinstructions,"a"
+ alt_entry 772b, 774b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: alt_len_check 770b, 771b
+ alt_len_check 771b, 772b
+ alt_len_check 773b, 774b
+ .if ( 771b - 770b > 772b - 771b )
+ alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
+ .else
+ alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
+ .endif
+775: .pushsection .altinstructions,"a"
+ alt_entry 773b, 775b, 770b, 771b,\feature1
+ alt_entry 773b, 775b, 771b, 772b,\feature2
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 633f8da..20bce13 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -230,5 +230,5 @@ int ccw_device_siosl(struct ccw_device *);
extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
-struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
+struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
index 4773f74..20e0d22 100644
--- a/arch/s390/include/asm/chpid.h
+++ b/arch/s390/include/asm/chpid.h
@@ -9,7 +9,7 @@
#include <uapi/asm/chpid.h>
#include <asm/cio.h>
-struct channel_path_desc {
+struct channel_path_desc_fmt0 {
u8 flags;
u8 lsn;
u8 desc;
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index dc84a01..847a042 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -227,7 +227,7 @@ struct esw_eadm {
* a field is valid; a field not being valid is always passed as %0.
* If a unit check occurred, @ecw may contain sense data; this is retrieved
* by the common I/O layer itself if the device doesn't support concurrent
- * sense (so that the device driver never needs to perform basic sene itself).
+ * sense (so that the device driver never needs to perform basic sense itself).
* For unsolicited interrupts, the irb is passed as-is (expect for sense data,
* if applicable).
*/
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index dd08db4..f58d17e 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -29,12 +29,12 @@
/* CPU measurement facility support */
static inline int cpum_cf_avail(void)
{
- return MACHINE_HAS_LPP && test_facility(67);
+ return test_facility(40) && test_facility(67);
}
static inline int cpum_sf_avail(void)
{
- return MACHINE_HAS_LPP && test_facility(68);
+ return test_facility(40) && test_facility(68);
}
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index fb56fa3..0563fd3 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -32,8 +32,10 @@ struct css_general_char {
u32 fcx : 1; /* bit 88 */
u32 : 19;
u32 alt_ssi : 1; /* bit 108 */
- u32:1;
- u32 narf:1; /* bit 110 */
+ u32 : 1;
+ u32 narf : 1; /* bit 110 */
+ u32 : 12;
+ u32 util_str : 1;/* bit 123 */
} __packed;
extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index afb0f08..81cdb6b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -294,6 +294,7 @@ struct kvm_vcpu_stat {
u64 exit_userspace;
u64 exit_null;
u64 exit_external_request;
+ u64 exit_io_request;
u64 exit_external_interrupt;
u64 exit_stop_request;
u64 exit_validity;
@@ -310,16 +311,29 @@ struct kvm_vcpu_stat {
u64 exit_program_interruption;
u64 exit_instr_and_program;
u64 exit_operation_exception;
+ u64 deliver_ckc;
+ u64 deliver_cputm;
u64 deliver_external_call;
u64 deliver_emergency_signal;
u64 deliver_service_signal;
- u64 deliver_virtio_interrupt;
+ u64 deliver_virtio;
u64 deliver_stop_signal;
u64 deliver_prefix_signal;
u64 deliver_restart_signal;
- u64 deliver_program_int;
- u64 deliver_io_int;
+ u64 deliver_program;
+ u64 deliver_io;
+ u64 deliver_machine_check;
u64 exit_wait_state;
+ u64 inject_ckc;
+ u64 inject_cputm;
+ u64 inject_external_call;
+ u64 inject_emergency_signal;
+ u64 inject_mchk;
+ u64 inject_pfault_init;
+ u64 inject_program;
+ u64 inject_restart;
+ u64 inject_set_prefix;
+ u64 inject_stop_signal;
u64 instruction_epsw;
u64 instruction_gs;
u64 instruction_io_other;
@@ -644,7 +658,12 @@ struct kvm_vcpu_arch {
};
struct kvm_vm_stat {
- ulong remote_tlb_flush;
+ u64 inject_io;
+ u64 inject_float_mchk;
+ u64 inject_pfault_done;
+ u64 inject_service_signal;
+ u64 inject_virtio;
+ u64 remote_tlb_flush;
};
struct kvm_arch_memory_slot {
@@ -792,6 +811,7 @@ struct kvm_arch{
int css_support;
int use_irqchip;
int use_cmma;
+ int use_pfmfi;
int user_cpu_state_ctrl;
int user_sigp;
int user_stsi;
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 74eeec9..cbc7c3a 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -193,6 +193,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
static inline bool kvm_check_and_clear_guest_paused(void)
{
return false;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index db35c41a..c639c95 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,8 +22,8 @@ typedef struct {
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
- /* The mmu context uses CMMA. */
- unsigned int use_cmma:1;
+ /* The mmu context uses CMM. */
+ unsigned int uses_cmm:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 6c8ce15..324f6f4 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -31,7 +31,7 @@ static inline int init_new_context(struct task_struct *tsk,
(current->mm && current->mm->context.alloc_pgste);
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
- mm->context.use_cmma = 0;
+ mm->context.uses_cmm = 0;
#endif
switch (mm->context.asce_limit) {
case _REGION2_SIZE:
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 7df48e5..35bf28f 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -6,12 +6,10 @@
#include <linux/types.h>
-extern int nospec_call_disable;
-extern int nospec_return_disable;
+extern int nospec_disable;
void nospec_init_branches(void);
-void nospec_call_revert(s32 *start, s32 *end);
-void nospec_return_revert(s32 *start, s32 *end);
+void nospec_revert(s32 *start, s32 *end);
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index c7b4333..f0f9bcf 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -151,4 +151,7 @@ void vmem_map_init(void);
void *vmem_crst_alloc(unsigned long val);
pte_t *vmem_pte_alloc(void);
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages);
+void base_asce_free(unsigned long asce);
+
#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 79b7ffa..c00f7b0 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -390,10 +390,10 @@ static inline int scsw_cmd_is_valid_key(union scsw *scsw)
}
/**
- * scsw_cmd_is_valid_sctl - check fctl field validity
+ * scsw_cmd_is_valid_sctl - check sctl field validity
* @scsw: pointer to scsw
*
- * Return non-zero if the fctl field of the specified command mode scsw is
+ * Return non-zero if the sctl field of the specified command mode scsw is
* valid, zero otherwise.
*/
static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 2eb0c8a..124154f 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -25,7 +25,6 @@
#define MACHINE_FLAG_DIAG44 _BITUL(6)
#define MACHINE_FLAG_EDAT1 _BITUL(7)
#define MACHINE_FLAG_EDAT2 _BITUL(8)
-#define MACHINE_FLAG_LPP _BITUL(9)
#define MACHINE_FLAG_TOPOLOGY _BITUL(10)
#define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12)
@@ -66,7 +65,6 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 451c601..832be5c 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -68,25 +68,27 @@ typedef struct dasd_information2_t {
#define DASD_FORMAT_CDL 2
/*
* values to be used for dasd_information_t.features
- * 0x00: default features
- * 0x01: readonly (ro)
- * 0x02: use diag discipline (diag)
- * 0x04: set the device initially online (internal use only)
- * 0x08: enable ERP related logging
- * 0x10: allow I/O to fail on lost paths
- * 0x20: allow I/O to fail when a lock was stolen
- * 0x40: give access to raw eckd data
- * 0x80: enable discard support
+ * 0x100: default features
+ * 0x001: readonly (ro)
+ * 0x002: use diag discipline (diag)
+ * 0x004: set the device initially online (internal use only)
+ * 0x008: enable ERP related logging
+ * 0x010: allow I/O to fail on lost paths
+ * 0x020: allow I/O to fail when a lock was stolen
+ * 0x040: give access to raw eckd data
+ * 0x080: enable discard support
+ * 0x100: enable autodisable for IFCC errors (default)
*/
-#define DASD_FEATURE_DEFAULT 0x00
-#define DASD_FEATURE_READONLY 0x01
-#define DASD_FEATURE_USEDIAG 0x02
-#define DASD_FEATURE_INITIAL_ONLINE 0x04
-#define DASD_FEATURE_ERPLOG 0x08
-#define DASD_FEATURE_FAILFAST 0x10
-#define DASD_FEATURE_FAILONSLCK 0x20
-#define DASD_FEATURE_USERAW 0x40
-#define DASD_FEATURE_DISCARD 0x80
+#define DASD_FEATURE_READONLY 0x001
+#define DASD_FEATURE_USEDIAG 0x002
+#define DASD_FEATURE_INITIAL_ONLINE 0x004
+#define DASD_FEATURE_ERPLOG 0x008
+#define DASD_FEATURE_FAILFAST 0x010
+#define DASD_FEATURE_FAILONSLCK 0x020
+#define DASD_FEATURE_USERAW 0x040
+#define DASD_FEATURE_DISCARD 0x080
+#define DASD_FEATURE_PATH_AUTODISABLE 0x100
+#define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE
#define DASD_PARTN_BITS 2
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7f27e3d..b06a6f7 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -61,11 +61,11 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y += nospec-branch.o
extra-y += head.o head64.o vmlinux.lds
-obj-$(CONFIG_EXPOLINE) += nospec-branch.o
-CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE)
+CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 2247613..8e1f2ae 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -2,6 +2,7 @@
#include <linux/module.h>
#include <asm/alternative.h>
#include <asm/facility.h>
+#include <asm/nospec-branch.h>
#define MAX_PATCH_LEN (255 - 1)
@@ -15,29 +16,6 @@ static int __init disable_alternative_instructions(char *str)
early_param("noaltinstr", disable_alternative_instructions);
-static int __init nobp_setup_early(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (rc)
- return rc;
- if (enabled && test_facility(82))
- __set_facility(82, S390_lowcore.alt_stfle_fac_list);
- else
- __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
- return 0;
-}
-early_param("nobp", nobp_setup_early);
-
-static int __init nospec_setup_early(char *str)
-{
- __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
- return 0;
-}
-early_param("nospec", nospec_setup_early);
-
struct brcl_insn {
u16 opc;
s32 disp;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 587b195..cfe2c45 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -63,6 +63,7 @@ int main(void)
OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
+ OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
BLANK();
/* timeval/timezone offsets for use by vdso */
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ac707a9..b00b515 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -67,7 +67,7 @@ static noinline __init void init_kernel_storage_key(void)
#if PAGE_DEFAULT_KEY
unsigned long end_pfn, init_pfn;
- end_pfn = PFN_UP(__pa(&_end));
+ end_pfn = PFN_UP(__pa(_end));
for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
page_set_storage_key(init_pfn << PAGE_SHIFT,
@@ -242,8 +242,6 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
if (test_facility(3))
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(40))
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
if (test_facility(50) && test_facility(73)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
__ctl_set_bit(0, 55);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a5621ea..3f22f13 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/ctl_reg.h>
@@ -57,6 +58,8 @@ _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
_CIF_ASCE_SECONDARY | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
+_LPP_OFFSET = __LC_LPP
+
#define BASED(name) name-cleanup_critical(%r13)
.macro TRACE_IRQS_ON
@@ -162,65 +165,22 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
.endm
.macro BPOFF
- .pushsection .altinstr_replacement, "ax"
-660: .long 0xb2e8c000
- .popsection
-661: .long 0x47000000
- .pushsection .altinstructions, "a"
- .long 661b - .
- .long 660b - .
- .word 82
- .byte 4
- .byte 4
- .popsection
+ ALTERNATIVE "", ".long 0xb2e8c000", 82
.endm
.macro BPON
- .pushsection .altinstr_replacement, "ax"
-662: .long 0xb2e8d000
- .popsection
-663: .long 0x47000000
- .pushsection .altinstructions, "a"
- .long 663b - .
- .long 662b - .
- .word 82
- .byte 4
- .byte 4
- .popsection
+ ALTERNATIVE "", ".long 0xb2e8d000", 82
.endm
.macro BPENTER tif_ptr,tif_mask
- .pushsection .altinstr_replacement, "ax"
-662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop
- .word 0xc004, 0x0000, 0x0000 # 6 byte nop
- .popsection
-664: TSTMSK \tif_ptr,\tif_mask
- jz . + 8
- .long 0xb2e8d000
- .pushsection .altinstructions, "a"
- .long 664b - .
- .long 662b - .
- .word 82
- .byte 12
- .byte 12
- .popsection
+ ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
+ "", 82
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
- .pushsection .altinstr_replacement, "ax"
-662: jnz . + 8
- .long 0xb2e8d000
- .popsection
-664: jz . + 8
- .long 0xb2e8c000
- .pushsection .altinstructions, "a"
- .long 664b - .
- .long 662b - .
- .word 82
- .byte 8
- .byte 8
- .popsection
+ ALTERNATIVE "jz .+8; .long 0xb2e8c000", \
+ "jnz .+8; .long 0xb2e8d000", 82
.endm
#ifdef CONFIG_EXPOLINE
@@ -323,10 +283,8 @@ ENTRY(__switch_to)
aghi %r3,__TASK_pid
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
- jz 0f
- .insn s,0xb2800000,__LC_LPP # set program parameter
-0: BR_R1USE_R14
+ ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ BR_R1USE_R14
.L__critical_start:
@@ -339,10 +297,10 @@ ENTRY(__switch_to)
ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
lg %r12,__LC_CURRENT
- stg %r2,__SF_EMPTY(%r15) # save control block pointer
- stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
- mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags
+ stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer
+ stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ?
jno .Lsie_load_guest_gprs
brasl %r14,load_fpu_regs # load guest fp/vx regs
@@ -353,18 +311,18 @@ ENTRY(sie64a)
jz .Lsie_gmap
lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
.Lsie_gmap:
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
- BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
.Lsie_exit:
BPOFF
- BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
@@ -383,7 +341,7 @@ ENTRY(sie64a)
nopr 7
.globl sie_exit
sie_exit:
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
+ lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
xgr %r0,%r0 # clear guest registers to
xgr %r1,%r1 # prevent speculative use
@@ -392,11 +350,11 @@ sie_exit:
xgr %r4,%r4
xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_EMPTY+16(%r15) # return exit reason code
+ lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
BR_R1USE_R14
.Lsie_fault:
lghi %r14,-EFAULT
- stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
+ stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
j sie_exit
EX_TABLE(.Lrewind_pad6,.Lsie_fault)
@@ -685,7 +643,7 @@ ENTRY(pgm_check_handler)
slg %r14,BASED(.Lsie_critical_start)
clg %r14,BASED(.Lsie_critical_length)
jhe 0f
- lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
@@ -1285,10 +1243,8 @@ ENTRY(mcck_int_handler)
# PSW restart interrupt handler
#
ENTRY(restart_int_handler)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
- jz 0f
- .insn s,0xb2800000,__LC_LPP
-0: stg %r15,__LC_SAVE_AREA_RESTART
+ ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ stg %r15,__LC_SAVE_AREA_RESTART
lg %r15,__LC_RESTART_STACK
aghi %r15,-__PT_SIZE # create pt_regs on stack
xc 0(__PT_SIZE,%r15),0(%r15)
@@ -1397,8 +1353,8 @@ cleanup_critical:
clg %r9,BASED(.Lsie_crit_mcck_length)
jh 1f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1: BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
- lg %r9,__SF_EMPTY(%r15) # get control block pointer
+1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 1fc6d1f..5a83be9 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -159,7 +159,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
me->core_layout.size += me->arch.got_size;
me->arch.plt_offset = me->core_layout.size;
if (me->arch.plt_size) {
- if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable)
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
me->arch.plt_size += PLT_ENTRY_SIZE;
me->core_layout.size += me->arch.plt_size;
}
@@ -318,8 +318,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
info->plt_offset;
ip[0] = 0x0d10e310; /* basr 1,0 */
ip[1] = 0x100a0004; /* lg 1,10(1) */
- if (IS_ENABLED(CONFIG_EXPOLINE) &&
- !nospec_call_disable) {
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
unsigned int *ij;
ij = me->core_layout.base +
me->arch.plt_offset +
@@ -440,7 +439,7 @@ int module_finalize(const Elf_Ehdr *hdr,
void *aseg;
if (IS_ENABLED(CONFIG_EXPOLINE) &&
- !nospec_call_disable && me->arch.plt_size) {
+ !nospec_disable && me->arch.plt_size) {
unsigned int *ij;
ij = me->core_layout.base + me->arch.plt_offset +
@@ -467,11 +466,11 @@ int module_finalize(const Elf_Ehdr *hdr,
if (IS_ENABLED(CONFIG_EXPOLINE) &&
(!strcmp(".nospec_call_table", secname)))
- nospec_call_revert(aseg, aseg + s->sh_size);
+ nospec_revert(aseg, aseg + s->sh_size);
if (IS_ENABLED(CONFIG_EXPOLINE) &&
(!strcmp(".nospec_return_table", secname)))
- nospec_return_revert(aseg, aseg + s->sh_size);
+ nospec_revert(aseg, aseg + s->sh_size);
}
jump_label_apply_nops(me);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index c7a6276..8c867b4 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -15,7 +15,7 @@
#include <linux/hardirq.h>
#include <linux/log2.h>
#include <linux/kprobes.h>
-#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/time.h>
#include <linux/module.h>
#include <linux/sched/signal.h>
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9aff72d..14867ec 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,32 +1,108 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
+#include <linux/device.h>
#include <asm/nospec-branch.h>
-int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
-int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL);
+static int __init nobp_setup_early(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ if (enabled && test_facility(82)) {
+ /*
+ * The user explicitely requested nobp=1, enable it and
+ * disable the expoline support.
+ */
+ __set_facility(82, S390_lowcore.alt_stfle_fac_list);
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_disable = 1;
+ } else {
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ }
+ return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ pr_info("Spectre V2 mitigation: execute trampolines.\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ pr_info("Spectre V2 mitigation: limited branch prediction.\n");
+ return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ return sprintf(buf, "Mitigation: execute trampolines\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ return sprintf(buf, "Mitigation: limited branch prediction.\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+#endif
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
static int __init nospectre_v2_setup_early(char *str)
{
- nospec_call_disable = 1;
- nospec_return_disable = 1;
+ nospec_disable = 1;
return 0;
}
early_param("nospectre_v2", nospectre_v2_setup_early);
+static int __init spectre_v2_auto_early(void)
+{
+ if (IS_ENABLED(CC_USING_EXPOLINE)) {
+ /*
+ * The kernel has been compiled with expolines.
+ * Keep expolines enabled and disable nobp.
+ */
+ nospec_disable = 0;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ }
+ /*
+ * If the kernel has not been compiled with expolines the
+ * nobp setting decides what is done, this depends on the
+ * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+ */
+ return 0;
+}
+#ifdef CONFIG_EXPOLINE_AUTO
+early_initcall(spectre_v2_auto_early);
+#endif
+
static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
- nospec_call_disable = 0;
- nospec_return_disable = 0;
- }
- if (str && !strncmp(str, "off", 3)) {
- nospec_call_disable = 1;
- nospec_return_disable = 1;
- }
- if (str && !strncmp(str, "auto", 4)) {
- nospec_call_disable = 0;
- nospec_return_disable = 1;
+ nospec_disable = 0;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
}
+ if (str && !strncmp(str, "off", 3))
+ nospec_disable = 1;
+ if (str && !strncmp(str, "auto", 4))
+ spectre_v2_auto_early();
return 0;
}
early_param("spectre_v2", spectre_v2_setup_early);
@@ -79,15 +155,9 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
}
}
-void __init_or_module nospec_call_revert(s32 *start, s32 *end)
-{
- if (nospec_call_disable)
- __nospec_revert(start, end);
-}
-
-void __init_or_module nospec_return_revert(s32 *start, s32 *end)
+void __init_or_module nospec_revert(s32 *start, s32 *end)
{
- if (nospec_return_disable)
+ if (nospec_disable)
__nospec_revert(start, end);
}
@@ -95,6 +165,8 @@ extern s32 __nospec_call_start[], __nospec_call_end[];
extern s32 __nospec_return_start[], __nospec_return_end[];
void __init nospec_init_branches(void)
{
- nospec_call_revert(__nospec_call_start, __nospec_call_end);
- nospec_return_revert(__nospec_return_start, __nospec_return_end);
+ nospec_revert(__nospec_call_start, __nospec_call_end);
+ nospec_revert(__nospec_return_start, __nospec_return_end);
}
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a6a91f0..7b58a71 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -221,6 +221,8 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
+ if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+ conswitchp = &dummy_con;
}
#ifdef CONFIG_CRASH_DUMP
@@ -413,12 +415,12 @@ static void __init setup_resources(void)
struct memblock_region *reg;
int j;
- code_resource.start = (unsigned long) &_text;
- code_resource.end = (unsigned long) &_etext - 1;
- data_resource.start = (unsigned long) &_etext;
- data_resource.end = (unsigned long) &_edata - 1;
- bss_resource.start = (unsigned long) &__bss_start;
- bss_resource.end = (unsigned long) &__bss_stop - 1;
+ code_resource.start = (unsigned long) _text;
+ code_resource.end = (unsigned long) _etext - 1;
+ data_resource.start = (unsigned long) _etext;
+ data_resource.end = (unsigned long) _edata - 1;
+ bss_resource.start = (unsigned long) __bss_start;
+ bss_resource.end = (unsigned long) __bss_stop - 1;
for_each_memblock(memory, reg) {
res = memblock_virt_alloc(sizeof(*res), 8);
@@ -667,7 +669,7 @@ static void __init check_initrd(void)
*/
static void __init reserve_kernel(void)
{
- unsigned long start_pfn = PFN_UP(__pa(&_end));
+ unsigned long start_pfn = PFN_UP(__pa(_end));
#ifdef CONFIG_DMA_API_DEBUG
/*
@@ -888,9 +890,9 @@ void __init setup_arch(char **cmdline_p)
/* Is init_mm really needed? */
init_mm.start_code = PAGE_OFFSET;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = (unsigned long) _end;
parse_early_param();
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a4a9fe1..2f8f7d7 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -27,7 +27,6 @@
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
-#include <linux/kmemleak.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index ce329c8..75b7b30 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -153,8 +153,8 @@ int pfn_is_nosave(unsigned long pfn)
{
unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
- unsigned long end_rodata_pfn = PFN_DOWN(__pa(&__end_rodata)) - 1;
- unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+ unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
+ unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
/* Always save lowcore pages (LC protection might be enabled). */
if (pfn <= LC_PAGES)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index c24bfa7..8e2b864 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1050,8 +1050,7 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- /* fallthrough */
- }
+ } /* fallthrough */
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1077,8 +1076,7 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- /* fallthrough */
- }
+ } /* fallthrough */
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1113,8 +1111,7 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- /* fallthrough */
- }
+ } /* fallthrough */
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 07c6e81..a389fa8 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -50,18 +50,6 @@ u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
return ilen;
}
-static int handle_noop(struct kvm_vcpu *vcpu)
-{
- switch (vcpu->arch.sie_block->icptcode) {
- case 0x10:
- vcpu->stat.exit_external_request++;
- break;
- default:
- break; /* nothing */
- }
- return 0;
-}
-
static int handle_stop(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -465,8 +453,11 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
switch (vcpu->arch.sie_block->icptcode) {
case ICPT_EXTREQ:
+ vcpu->stat.exit_external_request++;
+ return 0;
case ICPT_IOREQ:
- return handle_noop(vcpu);
+ vcpu->stat.exit_io_request++;
+ return 0;
case ICPT_INST:
rc = handle_instruction(vcpu);
break;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b04616b..37d06e0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -391,6 +391,7 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc;
+ vcpu->stat.deliver_cputm++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
@@ -410,6 +411,7 @@ static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc;
+ vcpu->stat.deliver_ckc++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
@@ -595,6 +597,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_MCHK,
mchk.cr14, mchk.mcic);
+ vcpu->stat.deliver_machine_check++;
rc = __write_machine_check(vcpu, &mchk);
}
return rc;
@@ -710,7 +713,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
pgm_info.code, ilen);
- vcpu->stat.deliver_program_int++;
+ vcpu->stat.deliver_program++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
@@ -899,7 +902,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 4,
"deliver: virtio parm: 0x%x,parm64: 0x%llx",
inti->ext.ext_params, inti->ext.ext_params2);
- vcpu->stat.deliver_virtio_interrupt++;
+ vcpu->stat.deliver_virtio++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
inti->ext.ext_params,
@@ -975,7 +978,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
inti->io.subchannel_id >> 1 & 0x3,
inti->io.subchannel_nr);
- vcpu->stat.deliver_io_int++;
+ vcpu->stat.deliver_io++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
((__u32)inti->io.subchannel_id << 16) |
@@ -1004,7 +1007,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
memset(&io, 0, sizeof(io));
io.io_int_word = isc_to_int_word(isc);
- vcpu->stat.deliver_io_int++;
+ vcpu->stat.deliver_io++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_IO(1, 0, 0, 0),
((__u32)io.subchannel_id << 16) |
@@ -1268,6 +1271,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_program++;
VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
irq->u.pgm.code, 0);
@@ -1309,6 +1313,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_pfault_init++;
VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
irq->u.ext.ext_params2);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
@@ -1327,6 +1332,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
uint16_t src_id = irq->u.extcall.code;
+ vcpu->stat.inject_external_call++;
VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
src_id);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
@@ -1351,6 +1357,7 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+ vcpu->stat.inject_set_prefix++;
VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
irq->u.prefix.address);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
@@ -1371,6 +1378,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_stop_info *stop = &li->irq.stop;
int rc = 0;
+ vcpu->stat.inject_stop_signal++;
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
@@ -1395,6 +1403,7 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_restart++;
VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
@@ -1407,6 +1416,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_emergency_signal++;
VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
irq->u.emerg.code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
@@ -1427,6 +1437,7 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+ vcpu->stat.inject_mchk++;
VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
irq->u.mchk.mcic);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
@@ -1457,6 +1468,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_ckc++;
VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
@@ -1470,6 +1482,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ vcpu->stat.inject_cputm++;
VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
@@ -1596,6 +1609,7 @@ static int __inject_service(struct kvm *kvm,
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ kvm->stat.inject_service_signal++;
spin_lock(&fi->lock);
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
/*
@@ -1621,6 +1635,7 @@ static int __inject_virtio(struct kvm *kvm,
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ kvm->stat.inject_virtio++;
spin_lock(&fi->lock);
if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
spin_unlock(&fi->lock);
@@ -1638,6 +1653,7 @@ static int __inject_pfault_done(struct kvm *kvm,
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ kvm->stat.inject_pfault_done++;
spin_lock(&fi->lock);
if (fi->counters[FIRQ_CNTR_PFAULT] >=
(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
@@ -1657,6 +1673,7 @@ static int __inject_float_mchk(struct kvm *kvm,
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ kvm->stat.inject_float_mchk++;
spin_lock(&fi->lock);
fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
fi->mchk.mcic |= inti->mchk.mcic;
@@ -1672,6 +1689,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
struct list_head *list;
int isc;
+ kvm->stat.inject_io++;
isc = int_word_to_isc(inti->io.io_int_word);
if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 339ac09..64c9862 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -57,6 +57,7 @@
(KVM_MAX_VCPUS + LOCAL_IRQS))
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "userspace_handled", VCPU_STAT(exit_userspace) },
@@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_validity", VCPU_STAT(exit_validity) },
{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
{ "exit_external_request", VCPU_STAT(exit_external_request) },
+ { "exit_io_request", VCPU_STAT(exit_io_request) },
{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_pei", VCPU_STAT(exit_pei) },
@@ -78,16 +80,34 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
+ { "deliver_ckc", VCPU_STAT(deliver_ckc) },
+ { "deliver_cputm", VCPU_STAT(deliver_cputm) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
- { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+ { "deliver_virtio", VCPU_STAT(deliver_virtio) },
{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
- { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
- { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
+ { "deliver_program", VCPU_STAT(deliver_program) },
+ { "deliver_io", VCPU_STAT(deliver_io) },
+ { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "inject_ckc", VCPU_STAT(inject_ckc) },
+ { "inject_cputm", VCPU_STAT(inject_cputm) },
+ { "inject_external_call", VCPU_STAT(inject_external_call) },
+ { "inject_float_mchk", VM_STAT(inject_float_mchk) },
+ { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
+ { "inject_io", VM_STAT(inject_io) },
+ { "inject_mchk", VCPU_STAT(inject_mchk) },
+ { "inject_pfault_done", VM_STAT(inject_pfault_done) },
+ { "inject_program", VCPU_STAT(inject_program) },
+ { "inject_restart", VCPU_STAT(inject_restart) },
+ { "inject_service_signal", VM_STAT(inject_service_signal) },
+ { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
+ { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
+ { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
+ { "inject_virtio", VM_STAT(inject_virtio) },
{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
{ "instruction_gs", VCPU_STAT(instruction_gs) },
{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
@@ -152,13 +172,33 @@ static int nested;
module_param(nested, int, S_IRUGO);
MODULE_PARM_DESC(nested, "Nested virtualization support");
-/* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
-unsigned long kvm_s390_fac_list_mask_size(void)
+/*
+ * For now we handle at most 16 double words as this is what the s390 base
+ * kernel handles and stores in the prefix page. If we ever need to go beyond
+ * this, this requires changes to code, but the external uapi can stay.
+ */
+#define SIZE_INTERNAL 16
+
+/*
+ * Base feature mask that defines default mask for facilities. Consists of the
+ * defines in FACILITIES_KVM and the non-hypervisor managed bits.
+ */
+static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
+/*
+ * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
+ * and defines the facilities that can be enabled via a cpu model.
+ */
+static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
+
+static unsigned long kvm_s390_fac_size(void)
{
- BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
- return ARRAY_SIZE(kvm_s390_fac_list_mask);
+ BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
+ BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
+ BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
+ sizeof(S390_lowcore.stfle_fac_list));
+
+ return SIZE_INTERNAL;
}
/* available cpu features supported by kvm */
@@ -679,6 +719,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
kvm->arch.use_cmma = 1;
+ /* Not compatible with cmma. */
+ kvm->arch.use_pfmfi = 0;
ret = 0;
}
mutex_unlock(&kvm->lock);
@@ -1583,7 +1625,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
return -EINVAL;
/* CMMA is disabled or was not used, or the buffer has length zero */
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
- if (!bufsize || !kvm->mm->context.use_cmma) {
+ if (!bufsize || !kvm->mm->context.uses_cmm) {
memset(args, 0, sizeof(*args));
return 0;
}
@@ -1660,7 +1702,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
/*
* This function sets the CMMA attributes for the given pages. If the input
* buffer has zero length, no action is taken, otherwise the attributes are
- * set and the mm->context.use_cmma flag is set.
+ * set and the mm->context.uses_cmm flag is set.
*/
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
const struct kvm_s390_cmma_log *args)
@@ -1710,9 +1752,9 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&kvm->mm->mmap_sem);
- if (!kvm->mm->context.use_cmma) {
+ if (!kvm->mm->context.uses_cmm) {
down_write(&kvm->mm->mmap_sem);
- kvm->mm->context.use_cmma = 1;
+ kvm->mm->context.uses_cmm = 1;
up_write(&kvm->mm->mmap_sem);
}
out:
@@ -1967,20 +2009,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.sie_page2)
goto out_err;
- /* Populate the facility mask initially. */
- memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
- sizeof(S390_lowcore.stfle_fac_list));
- for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
- if (i < kvm_s390_fac_list_mask_size())
- kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
- else
- kvm->arch.model.fac_mask[i] = 0UL;
- }
-
- /* Populate the facility list initially. */
kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
- memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
- S390_ARCH_FAC_LIST_SIZE_BYTE);
+
+ for (i = 0; i < kvm_s390_fac_size(); i++) {
+ kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
+ (kvm_s390_fac_base[i] |
+ kvm_s390_fac_ext[i]);
+ kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
+ kvm_s390_fac_base[i];
+ }
/* we are always in czam mode - even on pre z14 machines */
set_kvm_facility(kvm->arch.model.fac_mask, 138);
@@ -2028,6 +2065,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.css_support = 0;
kvm->arch.use_irqchip = 0;
+ kvm->arch.use_pfmfi = sclp.has_pfmfi;
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
@@ -2454,8 +2492,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
-
- vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -2491,7 +2527,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= ECB_TE;
- if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
+ if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
if (test_kvm_facility(vcpu->kvm, 130))
vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
@@ -3023,7 +3059,7 @@ retry:
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
/*
- * Disable CMMA virtualization; we will emulate the ESSA
+ * Disable CMM virtualization; we will emulate the ESSA
* instruction manually, in order to provide additional
* functionalities needed for live migration.
*/
@@ -3033,11 +3069,11 @@ retry:
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
/*
- * Re-enable CMMA virtualization if CMMA is available and
- * was used.
+ * Re-enable CMM virtualization if CMMA is available and
+ * CMM has been used.
*/
if ((vcpu->kvm->arch.use_cmma) &&
- (vcpu->kvm->mm->context.use_cmma))
+ (vcpu->kvm->mm->context.uses_cmm))
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
goto retry;
}
@@ -4044,7 +4080,7 @@ static int __init kvm_s390_init(void)
}
for (i = 0; i < 16; i++)
- kvm_s390_fac_list_mask[i] |=
+ kvm_s390_fac_base[i] |=
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f55ac0e..1b5621f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -294,8 +294,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-unsigned long kvm_s390_fac_list_mask_size(void);
-extern unsigned long kvm_s390_fac_list_mask[];
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f0b4185..ebfa044 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1078,9 +1078,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
* value really needs to be written to; if the value is
* already correct, we do nothing and avoid the lock.
*/
- if (vcpu->kvm->mm->context.use_cmma == 0) {
+ if (vcpu->kvm->mm->context.uses_cmm == 0) {
down_write(&vcpu->kvm->mm->mmap_sem);
- vcpu->kvm->mm->context.use_cmma = 1;
+ vcpu->kvm->mm->context.uses_cmm = 1;
up_write(&vcpu->kvm->mm->mmap_sem);
}
/*
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 507f23b..7cdea2e 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -24,8 +24,8 @@ enum address_markers_idx {
static struct addr_marker address_markers[] = {
[IDENTITY_NR] = {0, "Identity Mapping"},
- [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
- [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
+ [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
[VMEMMAP_NR] = {0, "vmemmap Area"},
[VMALLOC_NR] = {0, "vmalloc Area"},
[MODULES_NR] = {0, "Modules Area"},
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 831bdcf..0a7627c 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
#define MIN_GAP (32*1024*1024)
#define MAX_GAP (STACK_TOP/6*5)
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
@@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
return TASK_UNMAPPED_BASE + rnd;
}
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+ struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@@ -184,7 +185,7 @@ check_asce_limit:
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index cb36415..562f729 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -6,8 +6,9 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/mm.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
@@ -366,3 +367,293 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if ((*batch)->nr == MAX_TABLE_BATCH)
tlb_flush_mmu(tlb);
}
+
+/*
+ * Base infrastructure required to generate basic asces, region, segment,
+ * and page tables that do not make use of enhanced features like EDAT1.
+ */
+
+static struct kmem_cache *base_pgt_cache;
+
+static unsigned long base_pgt_alloc(void)
+{
+ u64 *table;
+
+ table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
+ if (table)
+ memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+ return (unsigned long) table;
+}
+
+static void base_pgt_free(unsigned long table)
+{
+ kmem_cache_free(base_pgt_cache, (void *) table);
+}
+
+static unsigned long base_crst_alloc(unsigned long val)
+{
+ unsigned long table;
+
+ table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init((unsigned long *)table, val);
+ return table;
+}
+
+static void base_crst_free(unsigned long table)
+{
+ free_pages(table, CRST_ALLOC_ORDER);
+}
+
+#define BASE_ADDR_END_FUNC(NAME, SIZE) \
+static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
+ unsigned long end) \
+{ \
+ unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
+ \
+ return (next - 1) < (end - 1) ? next : end; \
+}
+
+BASE_ADDR_END_FUNC(page, _PAGE_SIZE)
+BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
+BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
+BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
+BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
+
+static inline unsigned long base_lra(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ : "=d" (real) : "a" (address) : "cc");
+ return real;
+}
+
+static int base_page_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *pte, next;
+
+ if (!alloc)
+ return 0;
+ pte = (unsigned long *) origin;
+ pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ do {
+ next = base_page_addr_end(addr, end);
+ *pte = base_lra(addr);
+ } while (pte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_segment_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *ste, next, table;
+ int rc;
+
+ ste = (unsigned long *) origin;
+ ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ do {
+ next = base_segment_addr_end(addr, end);
+ if (*ste & _SEGMENT_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_pgt_alloc();
+ if (!table)
+ return -ENOMEM;
+ *ste = table | _SEGMENT_ENTRY;
+ }
+ table = *ste & _SEGMENT_ENTRY_ORIGIN;
+ rc = base_page_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_pgt_free(table);
+ cond_resched();
+ } while (ste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region3_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rtte, next, table;
+ int rc;
+
+ rtte = (unsigned long *) origin;
+ rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
+ do {
+ next = base_region3_addr_end(addr, end);
+ if (*rtte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rtte = table | _REGION3_ENTRY;
+ }
+ table = *rtte & _REGION_ENTRY_ORIGIN;
+ rc = base_segment_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rtte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region2_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rste, next, table;
+ int rc;
+
+ rste = (unsigned long *) origin;
+ rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
+ do {
+ next = base_region2_addr_end(addr, end);
+ if (*rste & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rste = table | _REGION2_ENTRY;
+ }
+ table = *rste & _REGION_ENTRY_ORIGIN;
+ rc = base_region3_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region1_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rfte, next, table;
+ int rc;
+
+ rfte = (unsigned long *) origin;
+ rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
+ do {
+ next = base_region1_addr_end(addr, end);
+ if (*rfte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rfte = table | _REGION1_ENTRY;
+ }
+ table = *rfte & _REGION_ENTRY_ORIGIN;
+ rc = base_region2_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rfte++, addr = next, addr < end);
+ return 0;
+}
+
+/**
+ * base_asce_free - free asce and tables returned from base_asce_alloc()
+ * @asce: asce to be freed
+ *
+ * Frees all region, segment, and page tables that were allocated with a
+ * corresponding base_asce_alloc() call.
+ */
+void base_asce_free(unsigned long asce)
+{
+ unsigned long table = asce & _ASCE_ORIGIN;
+
+ if (!asce)
+ return;
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_SEGMENT:
+ base_segment_walk(table, 0, _REGION3_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION3:
+ base_region3_walk(table, 0, _REGION2_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION2:
+ base_region2_walk(table, 0, _REGION1_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION1:
+ base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+ break;
+ }
+ base_crst_free(table);
+}
+
+static int base_pgt_cache_init(void)
+{
+ static DEFINE_MUTEX(base_pgt_cache_mutex);
+ unsigned long sz = _PAGE_TABLE_SIZE;
+
+ if (base_pgt_cache)
+ return 0;
+ mutex_lock(&base_pgt_cache_mutex);
+ if (!base_pgt_cache)
+ base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
+ mutex_unlock(&base_pgt_cache_mutex);
+ return base_pgt_cache ? 0 : -ENOMEM;
+}
+
+/**
+ * base_asce_alloc - create kernel mapping without enhanced DAT features
+ * @addr: virtual start address of kernel mapping
+ * @num_pages: number of consecutive pages
+ *
+ * Generate an asce, including all required region, segment and page tables,
+ * that can be used to access the virtual kernel mapping. The difference is
+ * that the returned asce does not make use of any enhanced DAT features like
+ * e.g. large pages. This is required for some I/O functions that pass an
+ * asce, like e.g. some service call requests.
+ *
+ * Note: the returned asce may NEVER be attached to any cpu. It may only be
+ * used for I/O requests. tlb entries that might result because the
+ * asce was attached to a cpu won't be cleared.
+ */
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
+{
+ unsigned long asce, table, end;
+ int rc;
+
+ if (base_pgt_cache_init())
+ return 0;
+ end = addr + num_pages * PAGE_SIZE;
+ if (end <= _REGION3_SIZE) {
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_segment_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION2_SIZE) {
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region3_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION1_SIZE) {
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region2_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region1_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+ }
+ if (rc) {
+ base_asce_free(asce);
+ asce = 0;
+ }
+ return asce;
+}
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 424a1ba..90a8c9e 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -62,6 +62,13 @@ static struct facility_def facility_defs[] = {
}
},
{
+ /*
+ * FACILITIES_KVM contains the list of facilities that are part
+ * of the default facility mask and list that are passed to the
+ * initial CPU model. If no CPU model is used, this, together
+ * with the non-hypervisor managed bits, is the maximum list of
+ * guest facilities supported by KVM.
+ */
.name = "FACILITIES_KVM",
.bits = (int[]){
0, /* N3 instructions */
@@ -89,6 +96,19 @@ static struct facility_def facility_defs[] = {
-1 /* END */
}
},
+ {
+ /*
+ * FACILITIES_KVM_CPUMODEL contains the list of facilities
+ * that can be enabled by CPU model code if the host supports
+ * it. These facilities are not passed to the guest without
+ * CPU model support.
+ */
+
+ .name = "FACILITIES_KVM_CPUMODEL",
+ .bits = (int[]){
+ -1 /* END */
+ }
+ },
};
static void print_facility_list(struct facility_def *def)
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index 2c4771e..d7d232d 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -25,6 +25,7 @@
#include <linux/io.h>
#include <linux/clk.h>
#include <linux/errno.h>
+#include <linux/gpio/machine.h>
#include <mach/sh7785lcr.h>
#include <cpu/sh7785.h>
#include <asm/heartbeat.h>
@@ -243,8 +244,15 @@ static struct resource i2c_resources[] = {
},
};
+static struct gpiod_lookup_table i2c_gpio_table = {
+ .dev_id = "i2c.0",
+ .table = {
+ GPIO_LOOKUP("pfc-sh7757", 0, "reset-gpios", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct i2c_pca9564_pf_platform_data i2c_platform_data = {
- .gpio = 0,
.i2c_clock_speed = I2C_PCA_CON_330kHz,
.timeout = HZ,
};
@@ -283,6 +291,7 @@ static int __init sh7785lcr_devices_setup(void)
i2c_device.num_resources = ARRAY_SIZE(i2c_proto_resources);
}
+ gpiod_add_lookup_table(&i2c_gpio_table);
return platform_add_devices(sh7785lcr_devices,
ARRAY_SIZE(sh7785lcr_devices));
}
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index 627ce8e..c15cac9 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -104,12 +104,7 @@ static void error(char *x)
while(1); /* Halt */
}
-unsigned long __stack_chk_guard;
-
-void __stack_chk_guard_setup(void)
-{
- __stack_chk_guard = 0x000a0dff;
-}
+const unsigned long __stack_chk_guard = 0x000a0dff;
void __stack_chk_fail(void)
{
@@ -130,8 +125,6 @@ void decompress_kernel(void)
{
unsigned long output_addr;
- __stack_chk_guard_setup();
-
#ifdef CONFIG_SUPERH64
output_addr = (CONFIG_MEMORY_START + 0x2000);
#else
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 58aaa4f..eee9114 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -112,7 +112,7 @@ static void sh4_flush_dcache_page(void *arg)
struct page *page = arg;
unsigned long addr = (unsigned long)page_address(page);
#ifndef CONFIG_SMP
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping_file(page);
if (mapping && !mapping_mapped(mapping))
clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 6cd2aa3..ed25eba 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -136,7 +136,7 @@ static void __flush_dcache_page(unsigned long phys)
static void sh7705_flush_dcache_page(void *arg)
{
struct page *page = arg;
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping_file(page);
if (mapping && !mapping_mapped(mapping))
clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 358fe4e..4d36969 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -801,27 +801,12 @@ SUN_PI_(lda [%l4] ASI_M_MMUREGS, %l5) ! read sfsr last
RESTORE_ALL
.align 4
- .globl sys_nis_syscall
-sys_nis_syscall:
- mov %o7, %l5
- add %sp, STACKFRAME_SZ, %o0 ! pt_regs *regs arg
- call c_sys_nis_syscall
- mov %l5, %o7
-
sunos_execv:
.globl sunos_execv
b sys_execve
clr %i2
.align 4
- .globl sys_sparc_pipe
-sys_sparc_pipe:
- mov %o7, %l5
- add %sp, STACKFRAME_SZ, %o0 ! pt_regs *regs arg
- call sparc_pipe
- mov %l5, %o7
-
- .align 4
.globl sys_sigstack
sys_sigstack:
mov %o7, %l5
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index d66dde8..713670e 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -22,7 +22,6 @@
#include <linux/seq_file.h>
#include <linux/ftrace.h>
#include <linux/irq.h>
-#include <linux/kmemleak.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 220d0f3..41b20ed 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -664,12 +664,12 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
printk("PCI: Scanning PBM %s\n", node->full_name);
pci_add_resource_offset(&resources, &pbm->io_space,
- pbm->io_space.start);
+ pbm->io_offset);
pci_add_resource_offset(&resources, &pbm->mem_space,
- pbm->mem_space.start);
+ pbm->mem_offset);
if (pbm->mem64_space.flags)
pci_add_resource_offset(&resources, &pbm->mem64_space,
- pbm->mem_space.start);
+ pbm->mem64_offset);
pbm->busn.start = pbm->pci_first_busno;
pbm->busn.end = pbm->pci_last_busno;
pbm->busn.flags = IORESOURCE_BUS;
diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c
index 1e10fb26..38d46bc 100644
--- a/arch/sparc/kernel/pci_common.c
+++ b/arch/sparc/kernel/pci_common.c
@@ -344,26 +344,6 @@ static void pci_register_legacy_regions(struct resource *io_res,
p->end = p->start + 0x1ffffUL;
p->flags = IORESOURCE_BUSY;
request_resource(mem_res, p);
-
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return;
-
- p->name = "System ROM";
- p->start = mem_res->start + 0xf0000UL;
- p->end = p->start + 0xffffUL;
- p->flags = IORESOURCE_BUSY;
- request_resource(mem_res, p);
-
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return;
-
- p->name = "Video ROM";
- p->start = mem_res->start + 0xc0000UL;
- p->end = p->start + 0x7fffUL;
- p->flags = IORESOURCE_BUSY;
- request_resource(mem_res, p);
}
static void pci_register_iommu_region(struct pci_pbm_info *pbm)
@@ -397,6 +377,8 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
int i, saw_mem, saw_io;
int num_pbm_ranges;
+ /* Corresponding generic code in of_pci_get_host_bridge_resources() */
+
saw_mem = saw_io = 0;
pbm_ranges = of_get_property(pbm->op->dev.of_node, "ranges", &i);
if (!pbm_ranges) {
@@ -411,13 +393,16 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
for (i = 0; i < num_pbm_ranges; i++) {
const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
- unsigned long a, size;
+ unsigned long a, size, region_a;
u32 parent_phys_hi, parent_phys_lo;
+ u32 child_phys_mid, child_phys_lo;
u32 size_hi, size_lo;
int type;
parent_phys_hi = pr->parent_phys_hi;
parent_phys_lo = pr->parent_phys_lo;
+ child_phys_mid = pr->child_phys_mid;
+ child_phys_lo = pr->child_phys_lo;
if (tlb_type == hypervisor)
parent_phys_hi &= 0x0fffffff;
@@ -427,6 +412,8 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
type = (pr->child_phys_hi >> 24) & 0x3;
a = (((unsigned long)parent_phys_hi << 32UL) |
((unsigned long)parent_phys_lo << 0UL));
+ region_a = (((unsigned long)child_phys_mid << 32UL) |
+ ((unsigned long)child_phys_lo << 0UL));
size = (((unsigned long)size_hi << 32UL) |
((unsigned long)size_lo << 0UL));
@@ -441,6 +428,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
pbm->io_space.start = a;
pbm->io_space.end = a + size - 1UL;
pbm->io_space.flags = IORESOURCE_IO;
+ pbm->io_offset = a - region_a;
saw_io = 1;
break;
@@ -449,6 +437,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
pbm->mem_space.start = a;
pbm->mem_space.end = a + size - 1UL;
pbm->mem_space.flags = IORESOURCE_MEM;
+ pbm->mem_offset = a - region_a;
saw_mem = 1;
break;
@@ -457,6 +446,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
pbm->mem64_space.start = a;
pbm->mem64_space.end = a + size - 1UL;
pbm->mem64_space.flags = IORESOURCE_MEM;
+ pbm->mem64_offset = a - region_a;
saw_mem = 1;
break;
@@ -472,14 +462,22 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
prom_halt();
}
- printk("%s: PCI IO[%llx] MEM[%llx]",
- pbm->name,
- pbm->io_space.start,
- pbm->mem_space.start);
+ if (pbm->io_space.flags)
+ printk("%s: PCI IO %pR offset %llx\n",
+ pbm->name, &pbm->io_space, pbm->io_offset);
+ if (pbm->mem_space.flags)
+ printk("%s: PCI MEM %pR offset %llx\n",
+ pbm->name, &pbm->mem_space, pbm->mem_offset);
+ if (pbm->mem64_space.flags && pbm->mem_space.flags) {
+ if (pbm->mem64_space.start <= pbm->mem_space.end)
+ pbm->mem64_space.start = pbm->mem_space.end + 1;
+ if (pbm->mem64_space.start > pbm->mem64_space.end)
+ pbm->mem64_space.flags = 0;
+ }
+
if (pbm->mem64_space.flags)
- printk(" MEM64[%llx]",
- pbm->mem64_space.start);
- printk("\n");
+ printk("%s: PCI MEM64 %pR offset %llx\n",
+ pbm->name, &pbm->mem64_space, pbm->mem64_offset);
pbm->io_space.name = pbm->mem_space.name = pbm->name;
pbm->mem64_space.name = pbm->name;
diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h
index ac17296..4e3d151 100644
--- a/arch/sparc/kernel/pci_impl.h
+++ b/arch/sparc/kernel/pci_impl.h
@@ -100,6 +100,10 @@ struct pci_pbm_info {
struct resource mem_space;
struct resource mem64_space;
struct resource busn;
+ /* offset */
+ resource_size_t io_offset;
+ resource_size_t mem_offset;
+ resource_size_t mem64_offset;
/* Base of PCI Config space, can be per-PBM or shared. */
unsigned long config_space;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index c50182c..d3ea1f3 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -929,9 +929,9 @@ static inline void __local_flush_dcache_page(struct page *page)
#ifdef DCACHE_ALIASING_POSSIBLE
__flush_dcache_page(page_address(page),
((tlb_type == spitfire) &&
- page_mapping(page) != NULL));
+ page_mapping_file(page) != NULL));
#else
- if (page_mapping(page) != NULL &&
+ if (page_mapping_file(page) != NULL &&
tlb_type == spitfire)
__flush_icache_page(__pa(page_address(page)));
#endif
@@ -958,7 +958,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping(page) != NULL)
+ if (page_mapping_file(page) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -994,7 +994,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
pg_addr = page_address(page);
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
- if (page_mapping(page) != NULL)
+ if (page_mapping_file(page) != NULL)
data0 |= ((u64)1 << 32);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 7e7011a..489ffab 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -13,44 +13,6 @@
.text
-#define SIGN1(STUB,SYSCALL,REG1) \
- .align 32; \
- .globl STUB; \
-STUB: sethi %hi(SYSCALL), %g1; \
- jmpl %g1 + %lo(SYSCALL), %g0; \
- sra REG1, 0, REG1
-
-#define SIGN2(STUB,SYSCALL,REG1,REG2) \
- .align 32; \
- .globl STUB; \
-STUB: sethi %hi(SYSCALL), %g1; \
- sra REG1, 0, REG1; \
- jmpl %g1 + %lo(SYSCALL), %g0; \
- sra REG2, 0, REG2
-
-#define SIGN3(STUB,SYSCALL,REG1,REG2,REG3) \
- .align 32; \
- .globl STUB; \
-STUB: sra REG1, 0, REG1; \
- sethi %hi(SYSCALL), %g1; \
- sra REG2, 0, REG2; \
- jmpl %g1 + %lo(SYSCALL), %g0; \
- sra REG3, 0, REG3
-
-SIGN1(sys32_readahead, compat_sys_readahead, %o0)
-SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
-SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
-SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
-SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
-SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
-SIGN1(sys32_mq_open, compat_sys_mq_open, %o1)
-SIGN1(sys32_select, compat_sys_select, %o0)
-SIGN1(sys32_futex, compat_sys_futex, %o1)
-SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
-SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
-SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)
-SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2)
-
.globl sys32_mmap2
sys32_mmap2:
sethi %hi(sys_mmap), %g1
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f166e5b..b5da3bf 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -52,20 +52,14 @@
#include "systbls.h"
-asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low)
+COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, path, u32, high, u32, low)
{
- if ((int)high < 0)
- return -EINVAL;
- else
- return ksys_truncate(path, (high << 32) | low);
+ return ksys_truncate(path, ((u64)high << 32) | low);
}
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
+COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, u32, high, u32, low)
{
- if ((int)high < 0)
- return -EINVAL;
- else
- return ksys_ftruncate(fd, (high << 32) | low);
+ return ksys_ftruncate(fd, ((u64)high << 32) | low);
}
static int cp_compat_stat64(struct kstat *stat,
@@ -98,8 +92,8 @@ static int cp_compat_stat64(struct kstat *stat,
return err;
}
-asmlinkage long compat_sys_stat64(const char __user * filename,
- struct compat_stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(stat64, const char __user *, filename,
+ struct compat_stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_stat(filename, &stat);
@@ -109,8 +103,8 @@ asmlinkage long compat_sys_stat64(const char __user * filename,
return error;
}
-asmlinkage long compat_sys_lstat64(const char __user * filename,
- struct compat_stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(lstat64, const char __user *, filename,
+ struct compat_stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_lstat(filename, &stat);
@@ -120,8 +114,8 @@ asmlinkage long compat_sys_lstat64(const char __user * filename,
return error;
}
-asmlinkage long compat_sys_fstat64(unsigned int fd,
- struct compat_stat64 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(fstat64, unsigned int, fd,
+ struct compat_stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_fstat(fd, &stat);
@@ -131,9 +125,9 @@ asmlinkage long compat_sys_fstat64(unsigned int fd,
return error;
}
-asmlinkage long compat_sys_fstatat64(unsigned int dfd,
- const char __user *filename,
- struct compat_stat64 __user * statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(fstatat64, unsigned int, dfd,
+ const char __user *, filename,
+ struct compat_stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
@@ -194,61 +188,50 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
return ret;
}
-asmlinkage compat_ssize_t sys32_pread64(unsigned int fd,
- char __user *ubuf,
- compat_size_t count,
- unsigned long poshi,
- unsigned long poslo)
+COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, poshi, u32, poslo)
{
- return ksys_pread64(fd, ubuf, count, (poshi << 32) | poslo);
+ return ksys_pread64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
}
-asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd,
- char __user *ubuf,
- compat_size_t count,
- unsigned long poshi,
- unsigned long poslo)
+COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, poshi, u32, poslo)
{
- return ksys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo);
+ return ksys_pwrite64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
}
-asmlinkage long compat_sys_readahead(int fd,
- unsigned long offhi,
- unsigned long offlo,
- compat_size_t count)
+COMPAT_SYSCALL_DEFINE4(readahead, int, fd, u32, offhi, u32, offlo,
+ compat_size_t, count)
{
- return ksys_readahead(fd, (offhi << 32) | offlo, count);
+ return ksys_readahead(fd, ((u64)offhi << 32) | offlo, count);
}
-long compat_sys_fadvise64(int fd,
- unsigned long offhi,
- unsigned long offlo,
- compat_size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(fadvise64, int, fd, u32, offhi, u32, offlo,
+ compat_size_t, len, int, advice)
{
- return ksys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice);
+ return ksys_fadvise64_64(fd, ((u64)offhi << 32) | offlo, len, advice);
}
-long compat_sys_fadvise64_64(int fd,
- unsigned long offhi, unsigned long offlo,
- unsigned long lenhi, unsigned long lenlo,
- int advice)
+COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, u32, offhi, u32, offlo,
+ u32, lenhi, u32, lenlo, int, advice)
{
return ksys_fadvise64_64(fd,
- (offhi << 32) | offlo,
- (lenhi << 32) | lenlo,
+ ((u64)offhi << 32) | offlo,
+ ((u64)lenhi << 32) | lenlo,
advice);
}
-long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, unsigned int flags)
+COMPAT_SYSCALL_DEFINE6(sync_file_range, unsigned int, fd, u32, off_high, u32, off_low,
+ u32, nb_high, u32, nb_low, unsigned int, flags)
{
return ksys_sync_file_range(fd,
- (off_high << 32) | off_low,
- (nb_high << 32) | nb_low,
- flags);
+ ((u64)off_high << 32) | off_low,
+ ((u64)nb_high << 32) | nb_low,
+ flags);
}
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
- u32 lenhi, u32 lenlo)
+COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, u32, offhi, u32, offlo,
+ u32, lenhi, u32, lenlo)
{
return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
((loff_t)lenhi << 32) | lenlo);
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index d980da4..e8c3cb6 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -34,7 +34,7 @@
/* XXX Make this per-binary type, this way we can detect the type of
* XXX a binary. Every Sparc executable calls this very early on.
*/
-asmlinkage unsigned long sys_getpagesize(void)
+SYSCALL_DEFINE0(getpagesize)
{
return PAGE_SIZE; /* Possibly older binaries want 8192 on sun4's? */
}
@@ -73,7 +73,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way unix traditionally does this, though.
*/
-asmlinkage long sparc_pipe(struct pt_regs *regs)
+SYSCALL_DEFINE0(sparc_pipe)
{
int fd[2];
int error;
@@ -81,7 +81,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs)
error = do_pipe_flags(fd, 0);
if (error)
goto out;
- regs->u_regs[UREG_I1] = fd[1];
+ current_pt_regs()->u_regs[UREG_I1] = fd[1];
error = fd[0];
out:
return error;
@@ -98,9 +98,9 @@ int sparc_mmap_check(unsigned long addr, unsigned long len)
/* Linux version of mmap */
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags, unsigned long fd,
- unsigned long pgoff)
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags, unsigned long, fd,
+ unsigned long, pgoff)
{
/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
we have. */
@@ -108,17 +108,17 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
pgoff >> (PAGE_SHIFT - 12));
}
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags, unsigned long fd,
- unsigned long off)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags, unsigned long, fd,
+ unsigned long, off)
{
/* no alignment check? */
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}
-long sparc_remap_file_pages(unsigned long start, unsigned long size,
- unsigned long prot, unsigned long pgoff,
- unsigned long flags)
+SYSCALL_DEFINE5(sparc_remap_file_pages, unsigned long, start, unsigned long, size,
+ unsigned long, prot, unsigned long, pgoff,
+ unsigned long, flags)
{
/* This works on an existing mmap so we don't need to validate
* the range as that was done at the original mmap call.
@@ -127,11 +127,10 @@ long sparc_remap_file_pages(unsigned long start, unsigned long size,
(pgoff >> (PAGE_SHIFT - 12)), flags);
}
-/* we come to here via sys_nis_syscall so it can setup the regs argument */
-asmlinkage unsigned long
-c_sys_nis_syscall (struct pt_regs *regs)
+SYSCALL_DEFINE0(nis_syscall)
{
static int count = 0;
+ struct pt_regs *regs = current_pt_regs();
if (count++ > 5)
return -ENOSYS;
@@ -202,7 +201,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig,
return ret;
}
-asmlinkage long sys_getdomainname(char __user *name, int len)
+SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
{
int nlen, err;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ebb84dc..9ef8de6 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -39,7 +39,7 @@
/* #define DEBUG_UNIMP_SYSCALL */
-asmlinkage unsigned long sys_getpagesize(void)
+SYSCALL_DEFINE0(getpagesize)
{
return PAGE_SIZE;
}
@@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = mmap_rnd();
unsigned long gap;
@@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
- gap = rlimit(RLIMIT_STACK);
+ gap = rlim_stack->rlim_cur;
if (!test_thread_flag(TIF_32BIT) ||
(current->personality & ADDR_COMPAT_LAYOUT) ||
gap == RLIM_INFINITY ||
@@ -310,7 +310,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way unix traditionally does this, though.
*/
-SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
+SYSCALL_DEFINE0(sparc_pipe)
{
int fd[2];
int error;
@@ -318,7 +318,7 @@ SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
error = do_pipe_flags(fd, 0);
if (error)
goto out;
- regs->u_regs[UREG_I1] = fd[1];
+ current_pt_regs()->u_regs[UREG_I1] = fd[1];
error = fd[0];
out:
return error;
@@ -480,10 +480,10 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len,
return sys_mremap(addr, old_len, new_len, flags, new_addr);
}
-/* we come to here via sys_nis_syscall so it can setup the regs argument */
-asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs)
+SYSCALL_DEFINE0(nis_syscall)
{
static int count;
+ struct pt_regs *regs = current_pt_regs();
/* Don't make the system unusable, if someone goes stuck */
if (count++ > 5)
@@ -523,8 +523,6 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
exception_exit(prev_state);
}
-extern void check_pending(int signum);
-
SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
{
int nlen, err;
@@ -608,9 +606,9 @@ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
return 0;
}
-asmlinkage long sparc_memory_ordering(unsigned long model,
- struct pt_regs *regs)
+SYSCALL_DEFINE1(memory_ordering, unsigned long, model)
{
+ struct pt_regs *regs = current_pt_regs();
if (model >= 3)
return -EINVAL;
regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14);
@@ -644,7 +642,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
return ret;
}
-asmlinkage long sys_kern_features(void)
+SYSCALL_DEFINE0(kern_features)
{
return KERN_FEATURE_MIXED_MODE_STACK;
}
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index c5f9ec8..db42b4f 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -27,15 +27,6 @@ sys32_execveat:
#endif
.align 32
-sys_sparc_pipe:
- ba,pt %xcc, sys_sparc_pipe_real
- add %sp, PTREGS_OFF, %o0
-sys_nis_syscall:
- ba,pt %xcc, c_sys_nis_syscall
- add %sp, PTREGS_OFF, %o0
-sys_memory_ordering:
- ba,pt %xcc, sparc_memory_ordering
- add %sp, PTREGS_OFF, %o1
#ifdef CONFIG_COMPAT
sys32_sigstack:
ba,pt %xcc, do_sys32_sigstack
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 5a01cfe..bf01426 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -9,9 +9,9 @@
#include <asm/utrap.h>
-asmlinkage unsigned long sys_getpagesize(void);
-asmlinkage long sparc_pipe(struct pt_regs *regs);
-asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs);
+asmlinkage long sys_getpagesize(void);
+asmlinkage long sys_sparc_pipe(void);
+asmlinkage long sys_nis_syscall(void);
asmlinkage long sys_getdomainname(char __user *name, int len);
void do_rt_sigreturn(struct pt_regs *regs);
asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
@@ -23,7 +23,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs);
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
-long sparc_remap_file_pages(unsigned long start, unsigned long size,
+long sys_sparc_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
@@ -46,16 +46,15 @@ asmlinkage long sys_utrap_install(utrap_entry_t type,
utrap_handler_t new_d,
utrap_handler_t __user *old_p,
utrap_handler_t __user *old_d);
-asmlinkage long sparc_memory_ordering(unsigned long model,
- struct pt_regs *regs);
+asmlinkage long sys_memory_ordering(unsigned long model);
asmlinkage void sparc64_set_context(struct pt_regs *regs);
asmlinkage void sparc64_get_context(struct pt_regs *regs);
-asmlinkage long sys32_truncate64(const char __user * path,
- unsigned long high,
- unsigned long low);
-asmlinkage long sys32_ftruncate64(unsigned int fd,
- unsigned long high,
- unsigned long low);
+asmlinkage long compat_sys_truncate64(const char __user * path,
+ u32 high,
+ u32 low);
+asmlinkage long compat_sys_ftruncate64(unsigned int fd,
+ u32 high,
+ u32 low);
struct compat_stat64;
asmlinkage long compat_sys_stat64(const char __user * filename,
struct compat_stat64 __user *statbuf);
@@ -66,31 +65,31 @@ asmlinkage long compat_sys_fstat64(unsigned int fd,
asmlinkage long compat_sys_fstatat64(unsigned int dfd,
const char __user *filename,
struct compat_stat64 __user * statbuf, int flag);
-asmlinkage compat_ssize_t sys32_pread64(unsigned int fd,
+asmlinkage long compat_sys_pread64(unsigned int fd,
char __user *ubuf,
compat_size_t count,
- unsigned long poshi,
- unsigned long poslo);
-asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd,
+ u32 poshi,
+ u32 poslo);
+asmlinkage long compat_sys_pwrite64(unsigned int fd,
char __user *ubuf,
compat_size_t count,
- unsigned long poshi,
- unsigned long poslo);
+ u32 poshi,
+ u32 poslo);
asmlinkage long compat_sys_readahead(int fd,
- unsigned long offhi,
- unsigned long offlo,
+ unsigned offhi,
+ unsigned offlo,
compat_size_t count);
long compat_sys_fadvise64(int fd,
- unsigned long offhi,
- unsigned long offlo,
+ unsigned offhi,
+ unsigned offlo,
compat_size_t len, int advice);
long compat_sys_fadvise64_64(int fd,
- unsigned long offhi, unsigned long offlo,
- unsigned long lenhi, unsigned long lenlo,
+ unsigned offhi, unsigned offlo,
+ unsigned lenhi, unsigned lenlo,
int advice);
-long sys32_sync_file_range(unsigned int fd,
- unsigned long off_high, unsigned long off_low,
- unsigned long nb_high, unsigned long nb_low,
+long compat_sys_sync_file_range(unsigned int fd,
+ unsigned off_high, unsigned off_low,
+ unsigned nb_high, unsigned nb_low,
unsigned int flags);
asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
u32 lenhi, u32 lenlo);
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 731b25d..12bee14 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -55,7 +55,7 @@ sys_call_table:
/*175*/ .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
/*180*/ .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_ni_syscall
/*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname
-/*190*/ .long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+/*190*/ .long sys_init_module, sys_personality, sys_sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
/*195*/ .long sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_sparc_sigaction, sys_sgetmask
/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir
/*205*/ .long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 293c1cb..387ef99 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -32,12 +32,12 @@ sys_call_table32:
/*50*/ .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl
.word sys_reboot, sys32_mmap2, sys_symlink, sys_readlink, sys32_execve
/*60*/ .word sys_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize
- .word sys_msync, sys_vfork, sys32_pread64, sys32_pwrite64, sys_geteuid
+ .word sys_msync, sys_vfork, compat_sys_pread64, compat_sys_pwrite64, sys_geteuid
/*70*/ .word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
- .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys_getgroups16
-/*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64
+ .word sys_madvise, sys_vhangup, compat_sys_truncate64, sys_mincore, sys_getgroups16
+/*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, compat_sys_ftruncate64
.word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
-/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid
+/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, compat_sys_select, sys_setfsgid
.word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
/*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending
.word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
@@ -47,7 +47,7 @@ sys_call_table32:
.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
-/*140*/ .word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
+/*140*/ .word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit
.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
@@ -60,20 +60,20 @@ sys_call_table32:
/*190*/ .word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl
.word sys_epoll_wait, sys_ioprio_set, sys_getppid, compat_sys_sparc_sigaction, sys_sgetmask
/*200*/ .word sys_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir
- .word sys32_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, sys32_fadvise64
-/*210*/ .word sys32_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo
+ .word compat_sys_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, compat_sys_fadvise64
+/*210*/ .word compat_sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo
.word compat_sys_ipc, sys32_sigreturn, sys_clone, sys_ioprio_get, compat_sys_adjtimex
/*220*/ .word compat_sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
-/*230*/ .word sys32_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64
+/*230*/ .word compat_sys_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64
.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
/*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, compat_sys_sched_rr_get_interval, compat_sys_nanosleep
/*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
- .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
-/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+ .word compat_sys_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, compat_sys_clock_nanosleep
+/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_sys_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
-/*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
+/*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink
.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
/*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
@@ -88,7 +88,7 @@ sys_call_table32:
/*330*/ .word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
- .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
+ .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
/*360*/ .word sys_statx
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index cb9ebac..8aeb1aa 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -206,9 +206,9 @@ inline void flush_dcache_page_impl(struct page *page)
#ifdef DCACHE_ALIASING_POSSIBLE
__flush_dcache_page(page_address(page),
((tlb_type == spitfire) &&
- page_mapping(page) != NULL));
+ page_mapping_file(page) != NULL));
#else
- if (page_mapping(page) != NULL &&
+ if (page_mapping_file(page) != NULL &&
tlb_type == spitfire)
__flush_icache_page(__pa(page_address(page)));
#endif
@@ -490,7 +490,7 @@ void flush_dcache_page(struct page *page)
this_cpu = get_cpu();
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (mapping && !mapping_mapped(mapping)) {
int dirty = test_bit(PG_dcache_dirty, &page->flags);
if (dirty) {
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b5cfab7..3d72d2d 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -128,7 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
goto no_cache_flush;
/* A real file page? */
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!mapping)
goto no_cache_flush;
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
index a5e08e2..1d9132b 100644
--- a/arch/unicore32/include/asm/cacheflush.h
+++ b/arch/unicore32/include/asm/cacheflush.h
@@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_user_range(vma, page, addr, len) \
flush_dcache_page(page)
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index 3bb0a29..66bb9f6 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -20,12 +20,6 @@
#include <mach/memory.h>
/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
* PAGE_OFFSET - the virtual address of the start of the kernel image
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
diff --git a/arch/unicore32/mm/flush.c b/arch/unicore32/mm/flush.c
index 6d4c096..74f4d63 100644
--- a/arch/unicore32/mm/flush.c
+++ b/arch/unicore32/mm/flush.c
@@ -83,7 +83,7 @@ void flush_dcache_page(struct page *page)
if (page == ZERO_PAGE(0))
return;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (mapping && !mapping_mapped(mapping))
clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 4f5a532..0c94b7b 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -503,7 +503,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
if (page == ZERO_PAGE(0))
return;
- mapping = page_mapping(page);
+ mapping = page_mapping_file(page);
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
__flush_dcache_page(mapping, page);
if (mapping)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2edc49e..cfecc22 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -21,7 +21,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
@@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
+struct hv_vp_assist_page **hv_vp_assist_page;
+EXPORT_SYMBOL_GPL(hv_vp_assist_page);
+
u32 hv_max_vp_index;
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
+ struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
hv_get_vp_index(msr_vp_index);
@@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu)
if (msr_vp_index > hv_max_vp_index)
hv_max_vp_index = msr_vp_index;
+ if (!hv_vp_assist_page)
+ return 0;
+
+ if (!*hvp)
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+
+ if (*hvp) {
+ u64 val;
+
+ val = vmalloc_to_pfn(*hvp);
+ val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
+ }
+
return 0;
}
@@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu)
struct hv_reenlightenment_control re_ctrl;
unsigned int new_cpu;
+ if (hv_vp_assist_page && hv_vp_assist_page[cpu])
+ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
+
if (hv_reenlightenment_cb == NULL)
return 0;
@@ -224,6 +247,7 @@ void hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
+ int cpuhp;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
@@ -241,9 +265,17 @@ void hyperv_init(void)
if (!hv_vp_index)
return;
- if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
- hv_cpu_init, hv_cpu_die) < 0)
+ hv_vp_assist_page = kcalloc(num_possible_cpus(),
+ sizeof(*hv_vp_assist_page), GFP_KERNEL);
+ if (!hv_vp_assist_page) {
+ ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
goto free_vp_index;
+ }
+
+ cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, hv_cpu_die);
+ if (cpuhp < 0)
+ goto free_vp_assist_page;
/*
* Setup the hypercall page and enable hypercalls.
@@ -256,7 +288,7 @@ void hyperv_init(void)
hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- goto free_vp_index;
+ goto remove_cpuhp_state;
}
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -304,6 +336,11 @@ register_msr_cs:
return;
+remove_cpuhp_state:
+ cpuhp_remove_state(cpuhp);
+free_vp_assist_page:
+ kfree(hv_vp_assist_page);
+ hv_vp_assist_page = NULL;
free_vp_index:
kfree(hv_vp_index);
hv_vp_index = NULL;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6c0c3a3..416cb0e 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -1,6 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_HYPERV_H
-#define _ASM_X86_HYPERV_H
+
+/*
+ * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
+ * Specification (TLFS):
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ */
+
+#ifndef _ASM_X86_HYPERV_TLFS_H
+#define _ASM_X86_HYPERV_TLFS_H
#include <linux/types.h>
@@ -14,6 +21,7 @@
#define HYPERV_CPUID_FEATURES 0x40000003
#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
#define HYPERV_CPUID_MIN 0x40000005
@@ -159,6 +167,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
+/* Recommend using enlightened VMCS */
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14)
+
/*
* Crash notification flag.
*/
@@ -192,7 +203,7 @@
#define HV_X64_MSR_EOI 0x40000070
#define HV_X64_MSR_ICR 0x40000071
#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
/* Define synthetic interrupt controller model specific registers. */
#define HV_X64_MSR_SCONTROL 0x40000080
@@ -240,6 +251,55 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor.
+ */
+union hv_x64_msr_hypercall_contents {
+ u64 as_uint64;
+ struct {
+ u64 enable:1;
+ u64 reserved:11;
+ u64 guest_physical_address:52;
+ };
+};
+
+/*
+ * TSC page layout.
+ */
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+ u64 reserved2[509];
+};
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how Linux guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * Linux and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100
+ * 55:48 - Distro specific identification
+ * 47:16 - Linux kernel version number
+ * 15:0 - Distro specific identification
+ *
+ *
+ */
+
+#define HV_LINUX_VENDOR_ID 0x8100
+
/* TSC emulation after migration */
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
@@ -278,10 +338,13 @@ struct hv_tsc_emulation_status {
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
+#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
@@ -301,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT {
HV_GENERIC_SET_ALL,
};
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
@@ -321,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
#define HV_SYNIC_SINT_COUNT (16)
/* Define the expected SynIC version. */
#define HV_SYNIC_VERSION_1 (0x1)
+/* Valid SynIC vectors are 16-255. */
+#define HV_SYNIC_FIRST_VALID_VECTOR (16)
#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
@@ -415,6 +490,216 @@ struct hv_timer_message_payload {
__u64 delivery_time; /* When the message was delivered */
};
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved;
+ __u64 vtl_control[2];
+ __u64 nested_enlightenments_control[2];
+ __u32 enlighten_vmentry;
+ __u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+ u32 revision_id;
+ u32 abort;
+
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+
+ u64 host_cr0;
+ u64 host_cr3;
+ u64 host_cr4;
+
+ u64 host_ia32_sysenter_esp;
+ u64 host_ia32_sysenter_eip;
+ u64 host_rip;
+ u32 host_ia32_sysenter_cs;
+
+ u32 pin_based_vm_exec_control;
+ u32 vm_exit_controls;
+ u32 secondary_vm_exec_control;
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+
+ u64 guest_es_base;
+ u64 guest_cs_base;
+ u64 guest_ss_base;
+ u64 guest_ds_base;
+ u64 guest_fs_base;
+ u64 guest_gs_base;
+ u64 guest_ldtr_base;
+ u64 guest_tr_base;
+ u64 guest_gdtr_base;
+ u64 guest_idtr_base;
+
+ u64 padding64_1[3];
+
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+
+ u64 cr3_target_value0;
+ u64 cr3_target_value1;
+ u64 cr3_target_value2;
+ u64 cr3_target_value3;
+
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+
+ u32 cr3_target_count;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_msr_load_count;
+
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 vmcs_link_pointer;
+
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+
+ u64 guest_pending_dbg_exceptions;
+ u64 guest_sysenter_esp;
+ u64 guest_sysenter_eip;
+
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+
+ u64 cr0_guest_host_mask;
+ u64 cr4_guest_host_mask;
+ u64 cr0_read_shadow;
+ u64 cr4_read_shadow;
+ u64 guest_cr0;
+ u64 guest_cr3;
+ u64 guest_cr4;
+ u64 guest_dr7;
+
+ u64 host_fs_base;
+ u64 host_gs_base;
+ u64 host_tr_base;
+ u64 host_gdtr_base;
+ u64 host_idtr_base;
+ u64 host_rsp;
+
+ u64 ept_pointer;
+
+ u16 virtual_processor_id;
+ u16 padding16[3];
+
+ u64 padding64_2[5];
+ u64 guest_physical_address;
+
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+
+ u64 exit_qualification;
+ u64 exit_io_instruction_ecx;
+ u64 exit_io_instruction_esi;
+ u64 exit_io_instruction_edi;
+ u64 exit_io_instruction_eip;
+
+ u64 guest_linear_address;
+ u64 guest_rsp;
+ u64 guest_rflags;
+
+ u32 guest_interruptibility_info;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 vm_entry_controls;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+
+ u64 guest_rip;
+
+ u32 hv_clean_fields;
+ u32 hv_padding_32;
+ u32 hv_synthetic_controls;
+ u32 hv_enlightenments_control;
+ u32 hv_vp_id;
+
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 padding64_4[4];
+ u64 guest_bndcfgs;
+ u64 padding64_5[7];
+ u64 xss_exit_bitmap;
+ u64 padding64_6[7];
+};
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
+
#define HV_STIMER_ENABLE (1ULL << 0)
#define HV_STIMER_PERIODIC (1ULL << 1)
#define HV_STIMER_LAZY (1ULL << 2)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b605a5b..949c977 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
#include <asm/msr-index.h>
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
+#include <asm/hyperv-tlfs.h>
#define KVM_MAX_VCPUS 288
#define KVM_SOFT_MAX_VCPUS 240
@@ -73,6 +74,7 @@
#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
+#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -498,6 +500,7 @@ struct kvm_vcpu_arch {
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
bool apicv_active;
+ bool load_eoi_exitmap_pending;
DECLARE_BITMAP(ioapic_handled_vectors, 256);
unsigned long apic_attention;
int32_t apic_arb_prio;
@@ -571,7 +574,7 @@ struct kvm_vcpu_arch {
} exception;
struct kvm_queued_interrupt {
- bool pending;
+ bool injected;
bool soft;
u8 nr;
} interrupt;
@@ -754,6 +757,12 @@ struct kvm_hv {
u64 hv_crash_ctl;
HV_REFERENCE_TSC_PAGE tsc_ref;
+
+ struct idr conn_to_evt;
+
+ u64 hv_reenlightenment_control;
+ u64 hv_tsc_emulation_control;
+ u64 hv_tsc_emulation_status;
};
enum kvm_irqchip_mode {
@@ -762,15 +771,6 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
-struct kvm_sev_info {
- bool active; /* SEV enabled guest */
- unsigned int asid; /* ASID used for this guest */
- unsigned int handle; /* SEV firmware handle */
- int fd; /* SEV device fd */
- unsigned long pages_locked; /* Number of pages locked */
- struct list_head regions_list; /* List of registered regions */
-};
-
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -800,13 +800,13 @@ struct kvm_arch {
struct mutex apic_map_lock;
struct kvm_apic_map *apic_map;
- unsigned int tss_addr;
bool apic_access_page_done;
gpa_t wall_clock;
- bool ept_identity_pagetable_done;
- gpa_t ept_identity_map_addr;
+ bool mwait_in_guest;
+ bool hlt_in_guest;
+ bool pause_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -849,17 +849,8 @@ struct kvm_arch {
bool disabled_lapic_found;
- /* Struct members for AVIC */
- u32 avic_vm_id;
- u32 ldr_mode;
- struct page *avic_logical_id_table_page;
- struct page *avic_physical_id_table_page;
- struct hlist_node hnode;
-
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
-
- struct kvm_sev_info sev_info;
};
struct kvm_vm_stat {
@@ -936,6 +927,8 @@ struct kvm_x86_ops {
bool (*cpu_has_high_real_mode_segbase)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
+ struct kvm *(*vm_alloc)(void);
+ void (*vm_free)(struct kvm *);
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
@@ -1007,6 +1000,7 @@ struct kvm_x86_ops {
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
+ int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
@@ -1109,6 +1103,17 @@ struct kvm_arch_async_pf {
extern struct kvm_x86_ops *kvm_x86_ops;
+#define __KVM_HAVE_ARCH_VM_ALLOC
+static inline struct kvm *kvm_arch_alloc_vm(void)
+{
+ return kvm_x86_ops->vm_alloc();
+}
+
+static inline void kvm_arch_free_vm(struct kvm *kvm)
+{
+ return kvm_x86_ops->vm_free(kvm);
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@@ -1187,6 +1192,8 @@ enum emulation_result {
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_RETRY (1 << 3)
#define EMULTYPE_NO_REEXECUTE (1 << 4)
+#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
+#define EMULTYPE_VMWARE (1 << 6)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
@@ -1204,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 7b407dd..3aea265 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
#ifdef CONFIG_KVM_GUEST
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
+unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
@@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
static inline u32 kvm_read_and_reset_pf_reason(void)
{
return 0;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e73c4d0..b90e796 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,90 +6,23 @@
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <asm/io.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HVCPUID_VERSION_FEATURES).
- */
-enum hv_cpuid_function {
- HVCPUID_VERSION_FEATURES = 0x00000001,
- HVCPUID_VENDOR_MAXFUNCTION = 0x40000000,
- HVCPUID_INTERFACE = 0x40000001,
-
- /*
- * The remaining functions depend on the value of
- * HVCPUID_INTERFACE
- */
- HVCPUID_VERSION = 0x40000002,
- HVCPUID_FEATURES = 0x40000003,
- HVCPUID_ENLIGHTENMENT_INFO = 0x40000004,
- HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005,
-};
-
struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
+ u32 nested_features;
u32 max_vp_index;
u32 max_lp_index;
};
extern struct ms_hyperv_info ms_hyperv;
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 guest_physical_address:52;
- };
-};
/*
- * TSC page layout.
- */
-
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
- u64 reserved2[509];
-};
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0 - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID 0x8100
-
-/*
- * Generate the guest ID based on the guideline described above.
+ * Generate the guest ID.
*/
static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
@@ -228,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
-#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET 48
-#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
-
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
@@ -307,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
*/
extern u32 *hv_vp_index;
extern u32 hv_max_vp_index;
+extern struct hv_vp_assist_page **hv_vp_assist_page;
+
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+ if (!hv_vp_assist_page)
+ return NULL;
+
+ return hv_vp_assist_page[cpu];
+}
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
@@ -343,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
static inline void hyperv_stop_tsc_emulation(void) {};
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+ return NULL;
+}
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c9084de..53d5b1b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -353,7 +353,21 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
+#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
+#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4)
+#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6)
+#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8)
+#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10)
+
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR
+#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2)
+#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4)
+#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6)
+#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8)
+#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10)
+
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
#define MSR_F15H_PTSC 0xc0010280
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b0ccd48..4fa4206 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -407,9 +407,19 @@ union irq_stack_union {
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
DECLARE_INIT_PER_CPU(irq_stack_union);
+static inline unsigned long cpu_kernelmode_gs_base(int cpu)
+{
+ return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+}
+
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
+
+#if IS_ENABLED(CONFIG_KVM)
+/* Save actual FS/GS selectors and bases to current->thread */
+void save_fsgs_for_kvm(void);
+#endif
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
/*
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 0487ac0..93b462e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 intercept_dr;
u32 intercept_exceptions;
u64 intercept;
- u8 reserved_1[42];
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
u16 pause_filter_count;
u64 iopm_base_pa;
u64 msrpm_base_pa;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 199e15b..ce8b4da 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -122,12 +122,14 @@ struct x86_init_pci {
* @guest_late_init: guest late init
* @x2apic_available: X2APIC detection
* @init_mem_mapping: setup early mappings during init_mem_mapping()
+ * @init_after_bootmem: guest init after boot allocator is finished
*/
struct x86_hyper_init {
void (*init_platform)(void);
void (*guest_late_init)(void);
bool (*x2apic_available)(void);
void (*init_mem_mapping)(void);
+ void (*init_after_bootmem)(void);
};
/**
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index f3a9604..c535c2f 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -354,8 +354,25 @@ struct kvm_xcrs {
__u64 padding[16];
};
-/* definition of registers in kvm_run */
+#define KVM_SYNC_X86_REGS (1UL << 0)
+#define KVM_SYNC_X86_SREGS (1UL << 1)
+#define KVM_SYNC_X86_EVENTS (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+ (KVM_SYNC_X86_REGS| \
+ KVM_SYNC_X86_SREGS| \
+ KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
struct kvm_sync_regs {
+ /* Members of this structure are potentially malicious.
+ * Care must be taken by code reading, esp. interpreting,
+ * data fields from them inside KVM to prevent TOCTOU and
+ * double-fetch types of vulnerabilities.
+ */
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
};
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6cfa9c8..4c851eb 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -3,15 +3,16 @@
#define _UAPI_ASM_X86_KVM_PARA_H
#include <linux/types.h>
-#include <asm/hyperv.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
-/* This CPUID returns a feature bitmap in eax. Before enabling a particular
- * paravirtualization, the appropriate feature bit should be checked.
+/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
+ * a particular paravirtualization, the appropriate feature bit should
+ * be checked in eax. The performance hint feature bit should be checked
+ * in edx.
*/
#define KVM_CPUID_FEATURES 0x40000001
#define KVM_FEATURE_CLOCKSOURCE 0
@@ -28,6 +29,8 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_HINTS_DEDICATED 0
+
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
*/
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 348cf48..4702fbd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -487,7 +487,7 @@ void load_percpu_segment(int cpu)
loadsegment(fs, __KERNEL_PERCPU);
#else
__loadsegment_simple(gs, 0);
- wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
load_stack_canary_segment();
}
@@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
/*
* The following percpu variables are hot. Align current_task to
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4488cf0..031082c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -22,7 +22,7 @@
#include <linux/kexec.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/desc.h>
#include <asm/irq_regs.h>
@@ -216,8 +216,8 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
- ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
- ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+ ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
+ ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
@@ -225,11 +225,12 @@ static void __init ms_hyperv_init_platform(void)
/*
* Extract host information.
*/
- if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) {
- hv_host_info_eax = cpuid_eax(HVCPUID_VERSION);
- hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION);
- hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
- hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
+ if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
+ HYPERV_CPUID_VERSION) {
+ hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
+ hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
+ hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
+ hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
hv_host_info_eax, hv_host_info_ebx >> 16,
@@ -243,6 +244,11 @@ static void __init ms_hyperv_init_platform(void)
x86_platform.calibrate_cpu = hv_get_tsc_khz;
}
+ if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
+ ms_hyperv.nested_features =
+ cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
+ }
+
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index fae86e3..7867417 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void)
}
#ifdef CONFIG_SMP
+static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ static_branch_disable(&virt_spin_lock_key);
+}
+
static void __init kvm_smp_prepare_boot_cpu(void)
{
/*
@@ -546,6 +553,7 @@ static void __init kvm_guest_init(void)
}
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
@@ -556,6 +564,7 @@ static void __init kvm_guest_init(void)
kvm_setup_vsyscall_timeinfo();
#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
@@ -605,6 +614,11 @@ unsigned int kvm_arch_para_features(void)
return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
}
+unsigned int kvm_arch_para_hints(void)
+{
+ return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
+}
+
static uint32_t __init kvm_detect(void)
{
return kvm_cpuid_base();
@@ -635,6 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
int cpu;
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -730,6 +745,9 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
+ if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ return;
+
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1443711..77625b6 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,7 +6,6 @@
#include <linux/bootmem.h>
#include <linux/gfp.h>
#include <linux/pci.h>
-#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9eb448c..4b100fe 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task)
save_base_legacy(task, task->thread.gsindex, GS);
}
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * While a process is running,current->thread.fsbase and current->thread.gsbase
+ * may not match the corresponding CPU registers (see save_base_legacy()). KVM
+ * wants an efficient way to save and restore FSBASE and GSBASE.
+ * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
+ */
+void save_fsgs_for_kvm(void)
+{
+ save_fsgs(current);
+}
+EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
+#endif
+
static __always_inline void loadseg(enum which_selector which,
unsigned short sel)
{
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ebda84a..3ab8676 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
.guest_late_init = x86_init_noop,
.x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop,
+ .init_after_bootmem = x86_init_noop,
},
.acpi = {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b671fc2..82055b9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
return -EINVAL;
}
+ best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best &&
+ (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT);
+ F(TOPOEXT) | F(PERFCTR_CORE);
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d91eaeb..b3705ae 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
#include "x86.h"
#include "tss.h"
#include "mmu.h"
+#include "pmu.h"
/*
* Operand types
@@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
return ctxt->ops->cpl(ctxt) > iopl;
}
+#define VMWARE_PORT_VMPORT (0x5658)
+#define VMWARE_PORT_VMRPC (0x5659)
+
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
u16 port, u16 len)
{
@@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
unsigned mask = (1 << len) - 1;
unsigned long base;
+ /*
+ * VMware allows access to these ports even if denied
+ * by TSS I/O permission bitmap. Mimic behavior.
+ */
+ if (enable_vmware_backdoor &&
+ ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
+ return true;
+
ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
if (!tr_seg.p)
return false;
@@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
+ /*
+ * VMware allows access to these Pseduo-PMCs even when read via RDPMC
+ * in Ring3 when CR4.PCE=0.
+ */
+ if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
+ return X86EMUL_CONTINUE;
+
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
ctxt->ops->check_pmc(ctxt, rcx))
return emulate_gp(ctxt, 0);
@@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = {
ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
};
+static const struct gprefix pfx_0f_10_0f_11 = {
+ I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
+};
+
static const struct gprefix pfx_0f_28_0f_29 = {
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
};
@@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = {
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
/* 0x10 - 0x1F */
- N, N, N, N, N, N, N, N,
+ GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
+ GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
+ N, N, N, N, N, N,
D(ImplicitOps | ModRM | SrcMem | NoAccess),
N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
/* 0x20 - 0x2F */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index dc97f25..98618e3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <linux/sched/cputime.h>
+#include <linux/eventfd.h>
#include <asm/apicdef.h>
#include <trace/events/kvm.h>
@@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
return false;
}
+static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
+ int vector)
+{
+ if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
+ return;
+
+ if (synic_has_vector_connected(synic, vector))
+ __set_bit(vector, synic->vec_bitmap);
+ else
+ __clear_bit(vector, synic->vec_bitmap);
+
+ if (synic_has_vector_auto_eoi(synic, vector))
+ __set_bit(vector, synic->auto_eoi_bitmap);
+ else
+ __clear_bit(vector, synic->auto_eoi_bitmap);
+}
+
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
u64 data, bool host)
{
- int vector;
+ int vector, old_vector;
+ bool masked;
vector = data & HV_SYNIC_SINT_VECTOR_MASK;
- if (vector < 16 && !host)
+ masked = data & HV_SYNIC_SINT_MASKED;
+
+ /*
+ * Valid vectors are 16-255, however, nested Hyper-V attempts to write
+ * default '0x10000' value on boot and this should not #GP. We need to
+ * allow zero-initing the register from host as well.
+ */
+ if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
return 1;
/*
* Guest may configure multiple SINTs to use the same vector, so
@@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
* bitmap of vectors with auto-eoi behavior. The bitmaps are
* updated here, and atomically queried on fast paths.
*/
+ old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
atomic64_set(&synic->sint[sint], data);
- if (synic_has_vector_connected(synic, vector))
- __set_bit(vector, synic->vec_bitmap);
- else
- __clear_bit(vector, synic->vec_bitmap);
+ synic_update_vector(synic, old_vector);
- if (synic_has_vector_auto_eoi(synic, vector))
- __set_bit(vector, synic->auto_eoi_bitmap);
- else
- __clear_bit(vector, synic->auto_eoi_bitmap);
+ synic_update_vector(synic, vector);
/* Load SynIC vectors into EOI exit bitmap */
kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
@@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_RESET:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
r = true;
break;
}
@@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
kvm_make_request(KVM_REQ_HV_RESET, vcpu);
}
break;
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ hv->hv_reenlightenment_control = data;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ hv->hv_tsc_emulation_control = data;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
+ hv->hv_tsc_emulation_status = data;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv->vp_index = (u32)data;
break;
- case HV_X64_MSR_APIC_ASSIST_PAGE: {
+ case HV_X64_MSR_VP_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
- if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+ if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0))
return 1;
break;
}
- gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+ gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
@@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_RESET:
data = 0;
break;
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ data = hv->hv_reenlightenment_control;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ data = hv->hv_tsc_emulation_control;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
+ data = hv->hv_tsc_emulation_status;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
- case HV_X64_MSR_APIC_ASSIST_PAGE:
+ case HV_X64_MSR_VP_ASSIST_PAGE:
data = hv->hv_vapic;
break;
case HV_X64_MSR_VP_RUNTIME:
@@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
return 1;
}
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
+{
+ struct eventfd_ctx *eventfd;
+
+ if (unlikely(!fast)) {
+ int ret;
+ gpa_t gpa = param;
+
+ if ((gpa & (__alignof__(param) - 1)) ||
+ offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
+ return HV_STATUS_INVALID_ALIGNMENT;
+
+ ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
+ if (ret < 0)
+ return HV_STATUS_INVALID_ALIGNMENT;
+ }
+
+ /*
+ * Per spec, bits 32-47 contain the extra "flag number". However, we
+ * have no use for it, and in all known usecases it is zero, so just
+ * report lookup failure if it isn't.
+ */
+ if (param & 0xffff00000000ULL)
+ return HV_STATUS_INVALID_PORT_ID;
+ /* remaining bits are reserved-zero */
+ if (param & ~KVM_HYPERV_CONN_ID_MASK)
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ /* conn_to_evt is protected by vcpu->kvm->srcu */
+ eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+ if (!eventfd)
+ return HV_STATUS_INVALID_PORT_ID;
+
+ eventfd_signal(eventfd, 1);
+ return HV_STATUS_SUCCESS;
+}
+
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
- u64 param, ingpa, outgpa, ret;
- uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+ u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
+ uint16_t code, rep_idx, rep_cnt;
bool fast, longmode;
/*
@@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
/* Hypercall continuation is not supported yet */
if (rep_cnt || rep_idx) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
goto set_result;
}
@@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
kvm_vcpu_on_spin(vcpu, true);
break;
- case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
+ ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+ if (ret != HV_STATUS_INVALID_PORT_ID)
+ break;
+ /* maybe userspace knows this conn_id: fall through */
+ case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (!vcpu_to_synic(vcpu)->active) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
@@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_complete_userspace;
return 0;
default:
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
set_result:
- ret = res | (((u64)rep_done & 0xfff) << 32);
kvm_hv_hypercall_set_result(vcpu, ret);
return 1;
}
+
+void kvm_hv_init_vm(struct kvm *kvm)
+{
+ mutex_init(&kvm->arch.hyperv.hv_lock);
+ idr_init(&kvm->arch.hyperv.conn_to_evt);
+}
+
+void kvm_hv_destroy_vm(struct kvm *kvm)
+{
+ struct eventfd_ctx *eventfd;
+ int i;
+
+ idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
+ eventfd_ctx_put(eventfd);
+ idr_destroy(&kvm->arch.hyperv.conn_to_evt);
+}
+
+static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
+{
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct eventfd_ctx *eventfd;
+ int ret;
+
+ eventfd = eventfd_ctx_fdget(fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ mutex_lock(&hv->hv_lock);
+ ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
+ GFP_KERNEL);
+ mutex_unlock(&hv->hv_lock);
+
+ if (ret >= 0)
+ return 0;
+
+ if (ret == -ENOSPC)
+ ret = -EEXIST;
+ eventfd_ctx_put(eventfd);
+ return ret;
+}
+
+static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
+{
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct eventfd_ctx *eventfd;
+
+ mutex_lock(&hv->hv_lock);
+ eventfd = idr_remove(&hv->conn_to_evt, conn_id);
+ mutex_unlock(&hv->hv_lock);
+
+ if (!eventfd)
+ return -ENOENT;
+
+ synchronize_srcu(&kvm->srcu);
+ eventfd_ctx_put(eventfd);
+ return 0;
+}
+
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
+{
+ if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
+ (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
+ return -EINVAL;
+
+ if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
+ return kvm_hv_eventfd_deassign(kvm, args->conn_id);
+ return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index e637631..837465d 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
+void kvm_hv_init_vm(struct kvm *kvm);
+void kvm_hv_destroy_vm(struct kvm *kvm);
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
+
#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index f171051..faa2648 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
*/
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
{
+ /*
+ * FIXME: interrupt.injected represents an interrupt that it's
+ * side-effects have already been applied (e.g. bit from IRR
+ * already moved to ISR). Therefore, it is incorrect to rely
+ * on interrupt.injected to know if there is a pending
+ * interrupt in the user-mode LAPIC.
+ * This leads to nVMX/nSVM not be able to distinguish
+ * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+ * pending interrupt or should re-inject an injected
+ * interrupt.
+ */
if (!lapic_in_kernel(v))
- return v->arch.interrupt.pending;
+ return v->arch.interrupt.injected;
if (kvm_cpu_has_extint(v))
return 1;
@@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
+ /*
+ * FIXME: interrupt.injected represents an interrupt that it's
+ * side-effects have already been applied (e.g. bit from IRR
+ * already moved to ISR). Therefore, it is incorrect to rely
+ * on interrupt.injected to know if there is a pending
+ * interrupt in the user-mode LAPIC.
+ * This leads to nVMX/nSVM not be able to distinguish
+ * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+ * pending interrupt or should re-inject an injected
+ * interrupt.
+ */
if (!lapic_in_kernel(v))
- return v->arch.interrupt.pending;
+ return v->arch.interrupt.injected;
if (kvm_cpu_has_extint(v))
return 1;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f500293..9619dcc 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
if (!test_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_avail))
- kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
+ kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
@@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags &= ~HF_GUEST_MASK;
+
+ if (vcpu->arch.load_eoi_exitmap_pending) {
+ vcpu->arch.load_eoi_exitmap_pending = false;
+ kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+ }
}
static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 391dda8..70dcb55 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ /*
+ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+ * version first and level-triggered interrupts never get EOIed in
+ * IOAPIC.
+ */
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ !ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 56c3601..edce055 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+ return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 763bb3b..8494dba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3031,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return RET_PF_RETRY;
}
- return RET_PF_EMULATE;
+ return -EFAULT;
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 5abae72..6288e9d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -452,14 +452,21 @@ error:
* done by is_rsvd_bits_set() above.
*
* We set up the value of exit_qualification to inject:
- * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+ * [2:0] - Derive from the access bits. The exit_qualification might be
+ * out of date if it is serving an EPT misconfiguration.
* [5:3] - Calculated by the page walk of the guest EPT page tables
* [7:8] - Derived from [7:8] of real exit_qualification
*
* The other bits are set to 0.
*/
if (!(errcode & PFERR_RSVD_MASK)) {
- vcpu->arch.exit_qualification &= 0x187;
+ vcpu->arch.exit_qualification &= 0x180;
+ if (write_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
+ if (user_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
+ if (fetch_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
}
#endif
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 026db42..58ead7d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
}
+bool is_vmware_backdoor_pmc(u32 pmc_idx)
+{
+ switch (pmc_idx) {
+ case VMWARE_BACKDOOR_PMC_HOST_TSC:
+ case VMWARE_BACKDOOR_PMC_REAL_TIME:
+ case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+ return true;
+ }
+ return false;
+}
+
+static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
+{
+ u64 ctr_val;
+
+ switch (idx) {
+ case VMWARE_BACKDOOR_PMC_HOST_TSC:
+ ctr_val = rdtsc();
+ break;
+ case VMWARE_BACKDOOR_PMC_REAL_TIME:
+ ctr_val = ktime_get_boot_ns();
+ break;
+ case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+ ctr_val = ktime_get_boot_ns() +
+ vcpu->kvm->arch.kvmclock_offset;
+ break;
+ default:
+ return 1;
+ }
+
+ *data = ctr_val;
+ return 0;
+}
+
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (is_vmware_backdoor_pmc(idx))
+ return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
+
pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
if (!pmc)
return 1;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index a9a62b9..ba8898e 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -9,6 +9,10 @@
/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
#define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf)
+#define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000
+#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
+#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
+
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
@@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+bool is_vmware_backdoor_pmc(u32 pmc_idx);
+
extern struct kvm_pmu_ops intel_pmu_ops;
extern struct kvm_pmu_ops amd_pmu_ops;
#endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index cd94443..1495a73 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -19,6 +19,21 @@
#include "lapic.h"
#include "pmu.h"
+enum pmu_type {
+ PMU_TYPE_COUNTER = 0,
+ PMU_TYPE_EVNTSEL,
+};
+
+enum index {
+ INDEX_ZERO = 0,
+ INDEX_ONE,
+ INDEX_TWO,
+ INDEX_THREE,
+ INDEX_FOUR,
+ INDEX_FIVE,
+ INDEX_ERROR,
+};
+
/* duplicated from amd_perfmon_event_map, K7 and above should work. */
static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
};
+static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+{
+ struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+ if (type == PMU_TYPE_COUNTER)
+ return MSR_F15H_PERF_CTR;
+ else
+ return MSR_F15H_PERF_CTL;
+ } else {
+ if (type == PMU_TYPE_COUNTER)
+ return MSR_K7_PERFCTR0;
+ else
+ return MSR_K7_EVNTSEL0;
+ }
+}
+
+static enum index msr_to_index(u32 msr)
+{
+ switch (msr) {
+ case MSR_F15H_PERF_CTL0:
+ case MSR_F15H_PERF_CTR0:
+ case MSR_K7_EVNTSEL0:
+ case MSR_K7_PERFCTR0:
+ return INDEX_ZERO;
+ case MSR_F15H_PERF_CTL1:
+ case MSR_F15H_PERF_CTR1:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_PERFCTR1:
+ return INDEX_ONE;
+ case MSR_F15H_PERF_CTL2:
+ case MSR_F15H_PERF_CTR2:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_PERFCTR2:
+ return INDEX_TWO;
+ case MSR_F15H_PERF_CTL3:
+ case MSR_F15H_PERF_CTR3:
+ case MSR_K7_EVNTSEL3:
+ case MSR_K7_PERFCTR3:
+ return INDEX_THREE;
+ case MSR_F15H_PERF_CTL4:
+ case MSR_F15H_PERF_CTR4:
+ return INDEX_FOUR;
+ case MSR_F15H_PERF_CTL5:
+ case MSR_F15H_PERF_CTR5:
+ return INDEX_FIVE;
+ default:
+ return INDEX_ERROR;
+ }
+}
+
+static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
+ enum pmu_type type)
+{
+ switch (msr) {
+ case MSR_F15H_PERF_CTL0:
+ case MSR_F15H_PERF_CTL1:
+ case MSR_F15H_PERF_CTL2:
+ case MSR_F15H_PERF_CTL3:
+ case MSR_F15H_PERF_CTL4:
+ case MSR_F15H_PERF_CTL5:
+ case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+ if (type != PMU_TYPE_EVNTSEL)
+ return NULL;
+ break;
+ case MSR_F15H_PERF_CTR0:
+ case MSR_F15H_PERF_CTR1:
+ case MSR_F15H_PERF_CTR2:
+ case MSR_F15H_PERF_CTR3:
+ case MSR_F15H_PERF_CTR4:
+ case MSR_F15H_PERF_CTR5:
+ case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+ if (type != PMU_TYPE_COUNTER)
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+
+ return &pmu->gp_counters[msr_to_index(msr)];
+}
+
static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
u8 event_select,
u8 unit_mask)
@@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
{
- return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0);
+ unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER);
+ struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+ /*
+ * The idx is contiguous. The MSRs are not. The counter MSRs
+ * are interleaved with the event select MSRs.
+ */
+ pmc_idx *= 2;
+ }
+
+ return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
@@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int ret = false;
- ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
- get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
+ get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
return ret;
}
@@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- /* MSR_K7_PERFCTRn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+ /* MSR_PERFCTRn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
*data = pmc_read_counter(pmc);
return 0;
}
- /* MSR_K7_EVNTSELn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ /* MSR_EVNTSELn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
if (pmc) {
*data = pmc->eventsel;
return 0;
@@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
- /* MSR_K7_PERFCTRn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+ /* MSR_PERFCTRn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc->counter += data - pmc_read_counter(pmc);
return 0;
}
- /* MSR_K7_EVNTSELn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ /* MSR_EVNTSELn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
if (pmc) {
if (data == pmc->eventsel)
return 0;
@@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+ pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
+ else
+ pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xffffffff00200000ull;
/* not applicable to AMD; but clean them to prevent any fall out */
@@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
+ BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+
+ for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
+ for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9d2043f..b58787d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = {
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
+struct kvm_svm {
+ struct kvm kvm;
+
+ /* Struct members for AVIC */
+ u32 avic_vm_id;
+ u32 ldr_mode;
+ struct page *avic_logical_id_table_page;
+ struct page *avic_physical_id_table_page;
+ struct hlist_node hnode;
+
+ struct kvm_sev_info sev_info;
+};
+
struct kvm_vcpu;
struct nested_state {
@@ -276,6 +298,54 @@ static bool npt_enabled = true;
static bool npt_enabled;
#endif
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * pause_filter_count: On processors that support Pause filtering(indicated
+ * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
+ * count value. On VMRUN this value is loaded into an internal counter.
+ * Each time a pause instruction is executed, this counter is decremented
+ * until it reaches zero at which time a #VMEXIT is generated if pause
+ * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause
+ * Intercept Filtering for more details.
+ * This also indicate if ple logic enabled.
+ *
+ * pause_filter_thresh: In addition, some processor families support advanced
+ * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
+ * the amount of time a guest is allowed to execute in a pause loop.
+ * In this mode, a 16-bit pause filter threshold field is added in the
+ * VMCB. The threshold value is a cycle count that is used to reset the
+ * pause counter. As with simple pause filtering, VMRUN loads the pause
+ * count value from VMCB into an internal counter. Then, on each pause
+ * instruction the hardware checks the elapsed number of cycles since
+ * the most recent pause instruction against the pause filter threshold.
+ * If the elapsed cycle count is greater than the pause filter threshold,
+ * then the internal pause count is reloaded from the VMCB and execution
+ * continues. If the elapsed cycle count is less than the pause filter
+ * threshold, then the internal pause count is decremented. If the count
+ * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
+ * triggered. If advanced pause filtering is supported and pause filter
+ * threshold field is set to zero, the filter will operate in the simpler,
+ * count only mode.
+ */
+
+static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
+module_param(pause_filter_thresh, ushort, 0444);
+
+static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
+module_param(pause_filter_count, ushort, 0444);
+
+/* Default doubles per-vcpu window every exit. */
+static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(pause_filter_count_grow, ushort, 0444);
+
+/* Default resets per-vcpu window every exit to pause_filter_count. */
+static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(pause_filter_count_shrink, ushort, 0444);
+
+/* Default is to compute the maximum so we can never overflow. */
+static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
+module_param(pause_filter_count_max, ushort, 0444);
+
/* allow nested paging (virtualized MMU) for all guests */
static int npt = true;
module_param(npt, int, S_IRUGO);
@@ -352,6 +422,12 @@ struct enc_region {
unsigned long size;
};
+
+static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+{
+ return container_of(kvm, struct kvm_svm, kvm);
+}
+
static inline bool svm_sev_enabled(void)
{
return max_sev_asid;
@@ -359,14 +435,14 @@ static inline bool svm_sev_enabled(void)
static inline bool sev_guest(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->active;
}
static inline int sev_get_asid(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->asid;
}
@@ -1083,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
}
/* Note:
- * This hash table is used to map VM_ID to a struct kvm_arch,
+ * This hash table is used to map VM_ID to a struct kvm_svm,
* when handling AMD IOMMU GALOG notification to schedule in
* a particular vCPU.
*/
@@ -1100,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
static int avic_ga_log_notifier(u32 ga_tag)
{
unsigned long flags;
- struct kvm_arch *ka = NULL;
+ struct kvm_svm *kvm_svm;
struct kvm_vcpu *vcpu = NULL;
u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
@@ -1108,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag)
pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
- struct kvm *kvm = container_of(ka, struct kvm, arch);
- struct kvm_arch *vm_data = &kvm->arch;
-
- if (vm_data->avic_vm_id != vm_id)
+ hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
+ if (kvm_svm->avic_vm_id != vm_id)
continue;
- vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
break;
}
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
@@ -1172,6 +1245,42 @@ err:
return rc;
}
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ int old = control->pause_filter_count;
+
+ control->pause_filter_count = __grow_ple_window(old,
+ pause_filter_count,
+ pause_filter_count_grow,
+ pause_filter_count_max);
+
+ if (control->pause_filter_count != old)
+ mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ int old = control->pause_filter_count;
+
+ control->pause_filter_count =
+ __shrink_ple_window(old,
+ pause_filter_count,
+ pause_filter_count_shrink,
+ pause_filter_count);
+ if (control->pause_filter_count != old)
+ mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1202,6 +1311,14 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
+ /* Check for pause filtering support */
+ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+ pause_filter_count = 0;
+ pause_filter_thresh = 0;
+ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+ pause_filter_thresh = 0;
+ }
+
if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -1328,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
static void avic_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb;
- struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
- phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
- phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
+ phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
+ phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1363,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm)
set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
set_exception_intercept(svm, DB_VECTOR);
+ /*
+ * Guest access to VMware backdoor ports could legitimately
+ * trigger #GP because of TSS I/O permission bitmap.
+ * We intercept those #GP and allow access to them anyway
+ * as VMware does.
+ */
+ if (enable_vmware_backdoor)
+ set_exception_intercept(svm, GP_VECTOR);
set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI);
@@ -1371,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_RDPMC);
set_intercept(svm, INTERCEPT_CPUID);
set_intercept(svm, INTERCEPT_INVD);
- set_intercept(svm, INTERCEPT_HLT);
set_intercept(svm, INTERCEPT_INVLPG);
set_intercept(svm, INTERCEPT_INVLPGA);
set_intercept(svm, INTERCEPT_IOIO_PROT);
@@ -1389,11 +1513,14 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_XSETBV);
set_intercept(svm, INTERCEPT_RSM);
- if (!kvm_mwait_in_guest()) {
+ if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
set_intercept(svm, INTERCEPT_MONITOR);
set_intercept(svm, INTERCEPT_MWAIT);
}
+ if (!kvm_hlt_in_guest(svm->vcpu.kvm))
+ set_intercept(svm, INTERCEPT_HLT);
+
control->iopm_base_pa = __sme_set(iopm_base);
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK;
@@ -1449,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
- if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- control->pause_filter_count = 3000;
+ if (pause_filter_count) {
+ control->pause_filter_count = pause_filter_count;
+ if (pause_filter_thresh)
+ control->pause_filter_thresh = pause_filter_thresh;
set_intercept(svm, INTERCEPT_PAUSE);
+ } else {
+ clr_intercept(svm, INTERCEPT_PAUSE);
}
if (kvm_vcpu_apicv_active(&svm->vcpu))
@@ -1488,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
unsigned int index)
{
u64 *avic_physical_id_table;
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
return NULL;
- avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
+ avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
return &avic_physical_id_table[index];
}
@@ -1576,7 +1707,7 @@ static void __sev_asid_free(int asid)
static void sev_asid_free(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
__sev_asid_free(sev->asid);
}
@@ -1616,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
int write)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
unsigned long npages, npinned, size;
unsigned long locked, lock_limit;
struct page **pages;
@@ -1667,7 +1798,7 @@ err:
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
unsigned long npages)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
release_pages(pages, npages);
kvfree(pages);
@@ -1705,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
kfree(region);
}
+static struct kvm *svm_vm_alloc(void)
+{
+ struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+ return &kvm_svm->kvm;
+}
+
+static void svm_vm_free(struct kvm *kvm)
+{
+ kfree(to_kvm_svm(kvm));
+}
+
static void sev_vm_destroy(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
@@ -1736,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm)
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
- struct kvm_arch *vm_data = &kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
if (!avic)
return;
- if (vm_data->avic_logical_id_table_page)
- __free_page(vm_data->avic_logical_id_table_page);
- if (vm_data->avic_physical_id_table_page)
- __free_page(vm_data->avic_physical_id_table_page);
+ if (kvm_svm->avic_logical_id_table_page)
+ __free_page(kvm_svm->avic_logical_id_table_page);
+ if (kvm_svm->avic_physical_id_table_page)
+ __free_page(kvm_svm->avic_physical_id_table_page);
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_del(&vm_data->hnode);
+ hash_del(&kvm_svm->hnode);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
@@ -1761,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
int err = -ENOMEM;
- struct kvm_arch *vm_data = &kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+ struct kvm_svm *k2;
struct page *p_page;
struct page *l_page;
- struct kvm_arch *ka;
u32 vm_id;
if (!avic)
@@ -1775,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm)
if (!p_page)
goto free_avic;
- vm_data->avic_physical_id_table_page = p_page;
+ kvm_svm->avic_physical_id_table_page = p_page;
clear_page(page_address(p_page));
/* Allocating logical APIC ID table (4KB) */
@@ -1783,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm)
if (!l_page)
goto free_avic;
- vm_data->avic_logical_id_table_page = l_page;
+ kvm_svm->avic_logical_id_table_page = l_page;
clear_page(page_address(l_page));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
@@ -1795,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm)
}
/* Is it still in use? Only possible if wrapped at least once */
if (next_vm_id_wrapped) {
- hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
- struct kvm *k2 = container_of(ka, struct kvm, arch);
- struct kvm_arch *vd2 = &k2->arch;
- if (vd2->avic_vm_id == vm_id)
+ hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
+ if (k2->avic_vm_id == vm_id)
goto again;
}
}
- vm_data->avic_vm_id = vm_id;
- hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+ kvm_svm->avic_vm_id = vm_id;
+ hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
return 0;
@@ -2535,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm)
static int ud_interception(struct vcpu_svm *svm)
{
- int er;
-
- er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
- if (er == EMULATE_USER_EXIT)
- return 0;
- if (er != EMULATE_DONE)
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
- return 1;
+ return handle_ud(&svm->vcpu);
}
static int ac_interception(struct vcpu_svm *svm)
@@ -2551,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm)
return 1;
}
+static int gp_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u32 error_code = svm->vmcb->control.exit_info_1;
+ int er;
+
+ WARN_ON_ONCE(!enable_vmware_backdoor);
+
+ er = emulate_instruction(vcpu,
+ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ else if (er != EMULATE_DONE)
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ return 1;
+}
+
static bool is_erratum_383(void)
{
int err, i;
@@ -2639,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, in, string, ret;
+ int size, in, string;
unsigned port;
++svm->vcpu.stat.io_exits;
@@ -2651,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm)
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
svm->next_rip = svm->vmcb->control.exit_info_2;
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
- /*
- * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
- if (in)
- return kvm_fast_pio_in(vcpu, size, port) && ret;
- else
- return kvm_fast_pio_out(vcpu, size, port) && ret;
+ return kvm_fast_pio(&svm->vcpu, size, port, in);
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -4233,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel = (svm_get_cpl(vcpu) == 0);
+ if (pause_filter_thresh)
+ grow_ple_window(vcpu);
+
kvm_vcpu_on_spin(vcpu, in_kernel);
return 1;
}
@@ -4323,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
{
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
int index;
u32 *logical_apic_id_table;
int dlid = GET_APIC_LOGICAL_ID(ldr);
@@ -4345,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
index = (cluster << 2) + apic;
}
- logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+ logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
return &logical_apic_id_table[index];
}
@@ -4425,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
u32 mod = (dfr >> 28) & 0xf;
@@ -4434,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
* If this changes, we need to flush the AVIC logical
* APID id table.
*/
- if (vm_data->ldr_mode == mod)
+ if (kvm_svm->ldr_mode == mod)
return 0;
- clear_page(page_address(vm_data->avic_logical_id_table_page));
- vm_data->ldr_mode = mod;
+ clear_page(page_address(kvm_svm->avic_logical_id_table_page));
+ kvm_svm->ldr_mode = mod;
if (svm->ldr_reg)
avic_handle_ldr_update(vcpu);
@@ -4558,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
[SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
+ [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = nop_on_interception,
@@ -4606,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
+ pr_err("%-20s%d\n", "pause filter threshold:",
+ control->pause_filter_thresh);
pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
@@ -5073,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
/* Try to enable guest_mode in IRTE */
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
AVIC_HPA_MASK);
- pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+ pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
svm->vcpu.vcpu_id);
pi.is_guest_mode = true;
pi.vcpu_data = &vcpu_info;
@@ -5237,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
+static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+ return 0;
+}
+
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5538,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_before_handle_nmi(&svm->vcpu);
+ kvm_before_interrupt(&svm->vcpu);
stgi();
/* Any pending NMI will happen here */
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_after_handle_nmi(&svm->vcpu);
+ kvm_after_interrupt(&svm->vcpu);
sync_cr8_to_lapic(vcpu);
@@ -5921,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
+ if (pause_filter_thresh)
+ shrink_ple_window(vcpu);
}
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
@@ -6037,7 +6192,7 @@ static int sev_asid_new(void)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
int asid, ret;
ret = -EBUSY;
@@ -6102,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return __sev_issue_cmd(sev->fd, id, data, error);
}
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_start *start;
struct kvm_sev_launch_start params;
void *dh_blob, *session_blob;
@@ -6207,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
@@ -6283,7 +6438,7 @@ e_free:
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = (void __user *)(uintptr_t)argp->data;
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_measure *data;
struct kvm_sev_launch_measure params;
void __user *p = NULL;
@@ -6351,7 +6506,7 @@ e_free:
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_finish *data;
int ret;
@@ -6371,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_guest_status params;
struct sev_data_guest_status *data;
int ret;
@@ -6403,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
unsigned long dst, int size,
int *error, bool enc)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_dbg *data;
int ret;
@@ -6635,7 +6790,7 @@ err:
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret *data;
struct kvm_sev_launch_secret params;
struct page **pages;
@@ -6759,7 +6914,7 @@ out:
static int svm_register_enc_region(struct kvm *kvm,
struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct enc_region *region;
int ret = 0;
@@ -6801,7 +6956,7 @@ e_free:
static struct enc_region *
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct list_head *head = &sev->regions_list;
struct enc_region *i;
@@ -6859,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_free = svm_free_vcpu,
.vcpu_reset = svm_vcpu_reset,
+ .vm_alloc = svm_vm_alloc,
+ .vm_free = svm_vm_free,
.vm_init = avic_vm_init,
.vm_destroy = svm_vm_destroy,
@@ -6925,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
+ .set_identity_map_addr = svm_set_identity_map_addr,
.get_tdp_level = get_npt_level,
.get_mt_mask = svm_get_mt_mask,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 92496b9..aafcc98 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -52,9 +52,11 @@
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
+#include <asm/mshyperv.h>
#include "trace.h"
#include "pmu.h"
+#include "vmx_evmcs.h"
#define __ex(x) __kvm_handle_fault_on_reboot(x)
#define __ex_clear(x, reg) \
@@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
-#define KVM_VM_CR0_ALWAYS_ON \
- (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
+#define KVM_VM_CR0_ALWAYS_ON \
+ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
+ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
+#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
-#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
-#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
-#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
- INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
-static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
-module_param(ple_gap, int, S_IRUGO);
-
-static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
-module_param(ple_window, int, S_IRUGO);
+static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
+module_param(ple_window, uint, 0444);
/* Default doubles per-vcpu window every exit. */
-static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
-module_param(ple_window_grow, int, S_IRUGO);
+static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, uint, 0444);
/* Default resets per-vcpu window every exit to ple_window. */
-static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
-module_param(ple_window_shrink, int, S_IRUGO);
+static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, uint, 0444);
/* Default is to compute the maximum so we can never overflow. */
-static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-module_param(ple_window_max, int, S_IRUGO);
+static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, uint, 0444);
extern const ulong vmx_return;
+struct kvm_vmx {
+ struct kvm kvm;
+
+ unsigned int tss_addr;
+ bool ept_identity_pagetable_done;
+ gpa_t ept_identity_map_addr;
+};
+
#define NR_AUTOLOAD_MSRS 8
struct vmcs {
@@ -424,6 +427,35 @@ struct __packed vmcs12 {
*/
#define VMCS12_MAX_FIELD_INDEX 0x17
+struct nested_vmx_msrs {
+ /*
+ * We only store the "true" versions of the VMX capability MSRs. We
+ * generate the "non-true" versions by setting the must-be-1 bits
+ * according to the SDM.
+ */
+ u32 procbased_ctls_low;
+ u32 procbased_ctls_high;
+ u32 secondary_ctls_low;
+ u32 secondary_ctls_high;
+ u32 pinbased_ctls_low;
+ u32 pinbased_ctls_high;
+ u32 exit_ctls_low;
+ u32 exit_ctls_high;
+ u32 entry_ctls_low;
+ u32 entry_ctls_high;
+ u32 misc_low;
+ u32 misc_high;
+ u32 ept_caps;
+ u32 vpid_caps;
+ u64 basic;
+ u64 cr0_fixed0;
+ u64 cr0_fixed1;
+ u64 cr4_fixed0;
+ u64 cr4_fixed1;
+ u64 vmcs_enum;
+ u64 vmfunc_controls;
+};
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -475,32 +507,7 @@ struct nested_vmx {
u16 vpid02;
u16 last_vpid;
- /*
- * We only store the "true" versions of the VMX capability MSRs. We
- * generate the "non-true" versions by setting the must-be-1 bits
- * according to the SDM.
- */
- u32 nested_vmx_procbased_ctls_low;
- u32 nested_vmx_procbased_ctls_high;
- u32 nested_vmx_secondary_ctls_low;
- u32 nested_vmx_secondary_ctls_high;
- u32 nested_vmx_pinbased_ctls_low;
- u32 nested_vmx_pinbased_ctls_high;
- u32 nested_vmx_exit_ctls_low;
- u32 nested_vmx_exit_ctls_high;
- u32 nested_vmx_entry_ctls_low;
- u32 nested_vmx_entry_ctls_high;
- u32 nested_vmx_misc_low;
- u32 nested_vmx_misc_high;
- u32 nested_vmx_ept_caps;
- u32 nested_vmx_vpid_caps;
- u64 nested_vmx_basic;
- u64 nested_vmx_cr0_fixed0;
- u64 nested_vmx_cr0_fixed1;
- u64 nested_vmx_cr4_fixed0;
- u64 nested_vmx_cr4_fixed1;
- u64 nested_vmx_vmcs_enum;
- u64 nested_vmx_vmfunc_controls;
+ struct nested_vmx_msrs msrs;
/* SMM related state */
struct {
@@ -691,6 +698,11 @@ enum segment_cache_field {
SEG_FIELD_NR = 4
};
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+ return container_of(kvm, struct kvm_vmx, kvm);
+}
+
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -953,6 +965,7 @@ static struct vmcs_config {
u32 cpu_based_2nd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
+ struct nested_vmx_msrs nested;
} vmcs_config;
static struct vmx_capability {
@@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = {
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
};
+DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+
+#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
+
+#define KVM_EVMCS_VERSION 1
+
+#if IS_ENABLED(CONFIG_HYPERV)
+static bool __read_mostly enlightened_vmcs = true;
+module_param(enlightened_vmcs, bool, 0444);
+
+static inline void evmcs_write64(unsigned long field, u64 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u64 *)((char *)current_evmcs + offset) = value;
+
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write32(unsigned long field, u32 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u32 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write16(unsigned long field, u16 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u16 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline u64 evmcs_read64(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u64 *)((char *)current_evmcs + offset);
+}
+
+static inline u32 evmcs_read32(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u32 *)((char *)current_evmcs + offset);
+}
+
+static inline u16 evmcs_read16(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u16 *)((char *)current_evmcs + offset);
+}
+
+static void evmcs_load(u64 phys_addr)
+{
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(smp_processor_id());
+
+ vp_ap->current_nested_vmcs = phys_addr;
+ vp_ap->enlighten_vmentry = 1;
+}
+
+static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
+{
+ /*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ */
+ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
+
+ /*
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
+
+ /* VM_FUNCTION_CONTROL = 0x00002018, */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
+
+ /*
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
+
+ /*
+ * TSC_MULTIPLIER = 0x00002032,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
+
+ /*
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+ /*
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ */
+ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
+ /*
+ * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ */
+ vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+
+ /*
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+}
+#else /* !IS_ENABLED(CONFIG_HYPERV) */
+static inline void evmcs_write64(unsigned long field, u64 value) {}
+static inline void evmcs_write32(unsigned long field, u32 value) {}
+static inline void evmcs_write16(unsigned long field, u16 value) {}
+static inline u64 evmcs_read64(unsigned long field) { return 0; }
+static inline u32 evmcs_read32(unsigned long field) { return 0; }
+static inline u16 evmcs_read16(unsigned long field) { return 0; }
+static inline void evmcs_load(u64 phys_addr) {}
+static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info)
return is_exception_n(intr_info, UD_VECTOR);
}
+static inline bool is_gp_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, GP_VECTOR);
+}
+
static inline bool is_external_interrupt(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1320,7 +1501,7 @@ static inline bool report_flexpriority(void)
static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
{
- return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+ return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
}
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
@@ -1341,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
PIN_BASED_VMX_PREEMPTION_TIMER;
}
+static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
+}
+
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1479,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs)
u64 phys_addr = __pa(vmcs);
u8 error;
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_load(phys_addr);
+
asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
@@ -1652,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
static __always_inline u16 vmcs_read16(unsigned long field)
{
vmcs_check16(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read16(field);
return __vmcs_readl(field);
}
static __always_inline u32 vmcs_read32(unsigned long field)
{
vmcs_check32(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read32(field);
return __vmcs_readl(field);
}
static __always_inline u64 vmcs_read64(unsigned long field)
{
vmcs_check64(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read64(field);
#ifdef CONFIG_X86_64
return __vmcs_readl(field);
#else
@@ -1674,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field)
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
vmcs_checkl(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read64(field);
return __vmcs_readl(field);
}
@@ -1697,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
static __always_inline void vmcs_write16(unsigned long field, u16 value)
{
vmcs_check16(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write16(field, value);
+
__vmcs_writel(field, value);
}
static __always_inline void vmcs_write32(unsigned long field, u32 value)
{
vmcs_check32(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, value);
+
__vmcs_writel(field, value);
}
static __always_inline void vmcs_write64(unsigned long field, u64 value)
{
vmcs_check64(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write64(field, value);
+
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
asm volatile ("");
@@ -1719,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
{
vmcs_checkl(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write64(field, value);
+
__vmcs_writel(field, value);
}
@@ -1726,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_clear_bits does not support 64-bit fields");
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, evmcs_read32(field) & ~mask);
+
__vmcs_writel(field, __vmcs_readl(field) & ~mask);
}
@@ -1733,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_set_bits does not support 64-bit fields");
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, evmcs_read32(field) | mask);
+
__vmcs_writel(field, __vmcs_readl(field) | mask);
}
@@ -1864,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
+ /*
+ * Guest access to VMware backdoor ports could legitimately
+ * trigger #GP because of TSS I/O permission bitmap.
+ * We intercept those #GP and allow access to them anyway
+ * as VMware does.
+ */
+ if (enable_vmware_backdoor)
+ eb |= (1u << GP_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -2129,6 +2357,9 @@ static unsigned long segment_base(u16 selector)
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+ int cpu = raw_smp_processor_id();
+#endif
int i;
if (vmx->host_state.loaded)
@@ -2141,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
*/
vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+
+#ifdef CONFIG_X86_64
+ save_fsgs_for_kvm();
+ vmx->host_state.fs_sel = current->thread.fsindex;
+ vmx->host_state.gs_sel = current->thread.gsindex;
+#else
savesegment(fs, vmx->host_state.fs_sel);
+ savesegment(gs, vmx->host_state.gs_sel);
+#endif
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
@@ -2149,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- savesegment(gs, vmx->host_state.gs_sel);
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
@@ -2160,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
-#endif
-#ifdef CONFIG_X86_64
- vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
- vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
-#else
- vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
- vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
-#endif
+ vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+ vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#ifdef CONFIG_X86_64
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ vmx->msr_host_kernel_gs_base = current->thread.gsbase;
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#else
+ vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+ vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
#endif
if (boot_cpu_has(X86_FEATURE_MPX))
rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
@@ -2532,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
return 0;
}
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Ensure that we clear the HLT state in the VMCS. We don't need to
+ * explicitly skip the instruction because if the HLT state is set,
+ * then the instruction is already executing and RIP has already been
+ * advanced.
+ */
+ if (kvm_hlt_in_guest(vcpu->kvm) &&
+ vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+ vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2554,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
return;
}
+ WARN_ON_ONCE(vmx->emulation_required);
+
if (kvm_exception_is_soft(nr)) {
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
@@ -2562,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+ vmx_clear_hlt(vcpu);
}
static bool vmx_rdtscp_supported(void)
@@ -2689,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
+static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
{
+ if (!nested) {
+ memset(msrs, 0, sizeof(*msrs));
+ return;
+ }
+
/*
* Note that as a general rule, the high half of the MSRs (bits in
* the control fields which may be 1) should be initialized by the
@@ -2708,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
/* pin-based controls */
rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high);
- vmx->nested.nested_vmx_pinbased_ctls_low |=
+ msrs->pinbased_ctls_low,
+ msrs->pinbased_ctls_high);
+ msrs->pinbased_ctls_low |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_pinbased_ctls_high &=
+ msrs->pinbased_ctls_high &=
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- vmx->nested.nested_vmx_pinbased_ctls_high |=
+ PIN_BASED_VIRTUAL_NMIS |
+ (apicv ? PIN_BASED_POSTED_INTR : 0);
+ msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- if (kvm_vcpu_apicv_active(&vmx->vcpu))
- vmx->nested.nested_vmx_pinbased_ctls_high |=
- PIN_BASED_POSTED_INTR;
/* exit controls */
rdmsr(MSR_IA32_VMX_EXIT_CTLS,
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
- vmx->nested.nested_vmx_exit_ctls_low =
+ msrs->exit_ctls_low,
+ msrs->exit_ctls_high);
+ msrs->exit_ctls_low =
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_exit_ctls_high &=
+ msrs->exit_ctls_high &=
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
- vmx->nested.nested_vmx_exit_ctls_high |=
+ msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
if (kvm_mpx_supported())
- vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+ msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* We support free control of debug control saving. */
- vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
- vmx->nested.nested_vmx_entry_ctls_low =
+ msrs->entry_ctls_low,
+ msrs->entry_ctls_high);
+ msrs->entry_ctls_low =
VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_entry_ctls_high &=
+ msrs->entry_ctls_high &=
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
VM_ENTRY_LOAD_IA32_PAT;
- vmx->nested.nested_vmx_entry_ctls_high |=
+ msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
if (kvm_mpx_supported())
- vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+ msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
- vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
- vmx->nested.nested_vmx_procbased_ctls_low =
+ msrs->procbased_ctls_low,
+ msrs->procbased_ctls_high);
+ msrs->procbased_ctls_low =
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_procbased_ctls_high &=
+ msrs->procbased_ctls_high &=
CPU_BASED_VIRTUAL_INTR_PENDING |
CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
@@ -2791,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* can use it to avoid exits to L1 - even when L0 runs L2
* without MSR bitmaps.
*/
- vmx->nested.nested_vmx_procbased_ctls_high |=
+ msrs->procbased_ctls_high |=
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
CPU_BASED_USE_MSR_BITMAPS;
/* We support free control of CR3 access interception. */
- vmx->nested.nested_vmx_procbased_ctls_low &=
+ msrs->procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
/*
@@ -2804,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* depend on CPUID bits, they are added later by vmx_cpuid_update.
*/
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high);
- vmx->nested.nested_vmx_secondary_ctls_low = 0;
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ msrs->secondary_ctls_low,
+ msrs->secondary_ctls_high);
+ msrs->secondary_ctls_low = 0;
+ msrs->secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
@@ -2817,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
- vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+ msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
- vmx->nested.nested_vmx_ept_caps |=
+ msrs->ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
- vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+ msrs->ept_caps &= vmx_capability.ept;
+ msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_1GB_PAGE_BIT;
if (enable_ept_ad_bits) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_PML;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+ msrs->ept_caps |= VMX_EPT_AD_BIT;
}
}
if (cpu_has_vmx_vmfunc()) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VMFUNC;
/*
* Advertise EPTP switching unconditionally
* since we emulate it
*/
if (enable_ept)
- vmx->nested.nested_vmx_vmfunc_controls =
+ msrs->vmfunc_controls =
VMX_VMFUNC_EPTP_SWITCHING;
}
@@ -2854,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* not failing the single-context invvpid, and it is worse.
*/
if (enable_vpid) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VPID;
- vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+ msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
VMX_VPID_EXTENT_SUPPORTED_MASK;
}
if (enable_unrestricted_guest)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
- vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
- vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
- vmx->nested.nested_vmx_misc_low |=
+ msrs->misc_low,
+ msrs->misc_high);
+ msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ msrs->misc_low |=
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT;
- vmx->nested.nested_vmx_misc_high = 0;
+ msrs->misc_high = 0;
/*
* This MSR reports some information about VMX support. We
@@ -2880,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* guest, and the VMCS structure we give it - not about the
* VMX support of the underlying hardware.
*/
- vmx->nested.nested_vmx_basic =
+ msrs->basic =
VMCS12_REVISION |
VMX_BASIC_TRUE_CTLS |
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
if (cpu_has_vmx_basic_inout())
- vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+ msrs->basic |= VMX_BASIC_INOUT;
/*
* These MSRs specify bits which the guest must keep fixed on
@@ -2896,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
*/
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
- vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
- vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+ msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
+ msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
/* These MSRs specify bits which the guest must keep fixed off. */
- rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
- rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
- vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+ msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
}
/*
@@ -2941,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
/* reserved */
BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
- u64 vmx_basic = vmx->nested.nested_vmx_basic;
+ u64 vmx_basic = vmx->nested.msrs.basic;
if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
return -EINVAL;
@@ -2960,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
return -EINVAL;
- vmx->nested.nested_vmx_basic = data;
+ vmx->nested.msrs.basic = data;
return 0;
}
@@ -2972,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
switch (msr_index) {
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
- lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
- highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+ lowp = &vmx->nested.msrs.pinbased_ctls_low;
+ highp = &vmx->nested.msrs.pinbased_ctls_high;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
- lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
- highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+ lowp = &vmx->nested.msrs.procbased_ctls_low;
+ highp = &vmx->nested.msrs.procbased_ctls_high;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- lowp = &vmx->nested.nested_vmx_exit_ctls_low;
- highp = &vmx->nested.nested_vmx_exit_ctls_high;
+ lowp = &vmx->nested.msrs.exit_ctls_low;
+ highp = &vmx->nested.msrs.exit_ctls_high;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- lowp = &vmx->nested.nested_vmx_entry_ctls_low;
- highp = &vmx->nested.nested_vmx_entry_ctls_high;
+ lowp = &vmx->nested.msrs.entry_ctls_low;
+ highp = &vmx->nested.msrs.entry_ctls_high;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
- lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
- highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+ lowp = &vmx->nested.msrs.secondary_ctls_low;
+ highp = &vmx->nested.msrs.secondary_ctls_high;
break;
default:
BUG();
@@ -3020,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
GENMASK_ULL(13, 9) | BIT_ULL(31);
u64 vmx_misc;
- vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
+ vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+ vmx->nested.msrs.misc_high);
if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
return -EINVAL;
- if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+ if ((vmx->nested.msrs.pinbased_ctls_high &
PIN_BASED_VMX_PREEMPTION_TIMER) &&
vmx_misc_preemption_timer_rate(data) !=
vmx_misc_preemption_timer_rate(vmx_misc))
@@ -3041,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
return -EINVAL;
- vmx->nested.nested_vmx_misc_low = data;
- vmx->nested.nested_vmx_misc_high = data >> 32;
+ vmx->nested.msrs.misc_low = data;
+ vmx->nested.msrs.misc_high = data >> 32;
return 0;
}
@@ -3050,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
{
u64 vmx_ept_vpid_cap;
- vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
- vmx->nested.nested_vmx_vpid_caps);
+ vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+ vmx->nested.msrs.vpid_caps);
/* Every bit is either reserved or a feature bit. */
if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
return -EINVAL;
- vmx->nested.nested_vmx_ept_caps = data;
- vmx->nested.nested_vmx_vpid_caps = data >> 32;
+ vmx->nested.msrs.ept_caps = data;
+ vmx->nested.msrs.vpid_caps = data >> 32;
return 0;
}
@@ -3068,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
switch (msr_index) {
case MSR_IA32_VMX_CR0_FIXED0:
- msr = &vmx->nested.nested_vmx_cr0_fixed0;
+ msr = &vmx->nested.msrs.cr0_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- msr = &vmx->nested.nested_vmx_cr4_fixed0;
+ msr = &vmx->nested.msrs.cr4_fixed0;
break;
default:
BUG();
@@ -3135,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_VMX_EPT_VPID_CAP:
return vmx_restore_vmx_ept_vpid_cap(vmx, data);
case MSR_IA32_VMX_VMCS_ENUM:
- vmx->nested.nested_vmx_vmcs_enum = data;
+ vmx->nested.msrs.vmcs_enum = data;
return 0;
default:
/*
@@ -3146,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
}
/* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
- *pdata = vmx->nested.nested_vmx_basic;
+ *pdata = msrs->basic;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high);
+ msrs->pinbased_ctls_low,
+ msrs->pinbased_ctls_high);
if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
+ msrs->procbased_ctls_low,
+ msrs->procbased_ctls_high);
if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_EXIT_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
+ msrs->exit_ctls_low,
+ msrs->exit_ctls_high);
if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
+ msrs->entry_ctls_low,
+ msrs->entry_ctls_high);
if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_MISC:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
+ msrs->misc_low,
+ msrs->misc_high);
break;
case MSR_IA32_VMX_CR0_FIXED0:
- *pdata = vmx->nested.nested_vmx_cr0_fixed0;
+ *pdata = msrs->cr0_fixed0;
break;
case MSR_IA32_VMX_CR0_FIXED1:
- *pdata = vmx->nested.nested_vmx_cr0_fixed1;
+ *pdata = msrs->cr0_fixed1;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- *pdata = vmx->nested.nested_vmx_cr4_fixed0;
+ *pdata = msrs->cr4_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED1:
- *pdata = vmx->nested.nested_vmx_cr4_fixed1;
+ *pdata = msrs->cr4_fixed1;
break;
case MSR_IA32_VMX_VMCS_ENUM:
- *pdata = vmx->nested.nested_vmx_vmcs_enum;
+ *pdata = msrs->vmcs_enum;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high);
+ msrs->secondary_ctls_low,
+ msrs->secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
- *pdata = vmx->nested.nested_vmx_ept_caps |
- ((u64)vmx->nested.nested_vmx_vpid_caps << 32);
+ *pdata = msrs->ept_caps |
+ ((u64)msrs->vpid_caps << 32);
break;
case MSR_IA32_VMX_VMFUNC:
- *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+ *pdata = msrs->vmfunc_controls;
break;
default:
return 1;
@@ -3235,7 +3487,16 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
- return 1;
+ switch (msr->index) {
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ if (!nested)
+ return 1;
+ return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+ default:
+ return 1;
+ }
+
+ return 0;
}
/*
@@ -3309,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
return 1;
- return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
+ return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
+ &msr_info->data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
@@ -3602,6 +3864,14 @@ static int hardware_enable(void)
if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY;
+ /*
+ * This can happen if we hot-added a CPU but failed to allocate
+ * VP assist page for it.
+ */
+ if (static_branch_unlikely(&enable_evmcs) &&
+ !hv_get_vp_assist_page(cpu))
+ return -EFAULT;
+
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
@@ -3700,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
+ memset(vmcs_conf, 0, sizeof(*vmcs_conf));
min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
@@ -3710,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
- if (!kvm_mwait_in_guest())
- min |= CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING;
-
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3835,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->size = vmx_msr_high & 0x1fff;
vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
- vmcs_conf->revision_id = vmx_msr_low;
+
+ /* KVM supports Enlightened VMCS v1 only */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs_conf->revision_id = KVM_EVMCS_VERSION;
+ else
+ vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -3843,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
+ if (static_branch_unlikely(&enable_evmcs))
+ evmcs_sanitize_exec_ctrls(vmcs_conf);
+
cpu_has_load_ia32_efer =
allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
VM_ENTRY_LOAD_IA32_EFER)
@@ -4162,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -4177,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
* vcpu. Warn the user that an update is overdue.
*/
- if (!vcpu->kvm->arch.tss_addr)
+ if (!kvm_vmx->tss_addr)
printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
"called before entering vcpu\n");
vmx_segment_cache_clear(vmx);
- vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+ vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
@@ -4291,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
{
- if (enable_ept && is_paging(vcpu))
+ if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
}
@@ -4339,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+ if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_UNRESTRICTED_GUEST &&
nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
@@ -4353,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
@@ -4428,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
- if (enable_ept)
+ if (enable_ept && !enable_unrestricted_guest)
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -4469,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
- if (is_paging(vcpu) || is_guest_mode(vcpu))
+ if (enable_unrestricted_guest || is_paging(vcpu) ||
+ is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
else
- guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
+ guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
@@ -4487,11 +4766,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* is in force while we are in guest mode. Do not let guests control
* this bit, even if host CR4.MCE == 0.
*/
- unsigned long hw_cr4 =
- (cr4_read_shadow() & X86_CR4_MCE) |
- (cr4 & ~X86_CR4_MCE) |
- (to_vmx(vcpu)->rmode.vm86_active ?
- KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ unsigned long hw_cr4;
+
+ hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
+ if (enable_unrestricted_guest)
+ hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
+ else if (to_vmx(vcpu)->rmode.vm86_active)
+ hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
+ else
+ hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
@@ -4517,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
vcpu->arch.cr4 = cr4;
- if (enable_ept) {
- if (!is_paging(vcpu)) {
- hw_cr4 &= ~X86_CR4_PAE;
- hw_cr4 |= X86_CR4_PSE;
- } else if (!(cr4 & X86_CR4_PAE)) {
- hw_cr4 &= ~X86_CR4_PAE;
+
+ if (!enable_unrestricted_guest) {
+ if (enable_ept) {
+ if (!is_paging(vcpu)) {
+ hw_cr4 &= ~X86_CR4_PAE;
+ hw_cr4 |= X86_CR4_PSE;
+ } else if (!(cr4 & X86_CR4_PAE)) {
+ hw_cr4 &= ~X86_CR4_PAE;
+ }
}
- }
- if (!enable_unrestricted_guest && !is_paging(vcpu))
/*
* SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
* hardware. To emulate this behavior, SMEP/SMAP/PKU needs
@@ -4538,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* If enable_unrestricted_guest, the CPU automatically
* disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
*/
- hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ if (!is_paging(vcpu))
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ }
vmcs_writel(CR4_READ_SHADOW, cr4);
vmcs_writel(GUEST_CR4, hw_cr4);
@@ -4906,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm)
int idx, r;
idx = srcu_read_lock(&kvm->srcu);
- fn = kvm->arch.tss_addr >> PAGE_SHIFT;
+ fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -4932,22 +5218,23 @@ out:
static int init_rmode_identity_map(struct kvm *kvm)
{
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
int i, idx, r = 0;
kvm_pfn_t identity_map_pfn;
u32 tmp;
- /* Protect kvm->arch.ept_identity_pagetable_done. */
+ /* Protect kvm_vmx->ept_identity_pagetable_done. */
mutex_lock(&kvm->slots_lock);
- if (likely(kvm->arch.ept_identity_pagetable_done))
+ if (likely(kvm_vmx->ept_identity_pagetable_done))
goto out2;
- if (!kvm->arch.ept_identity_map_addr)
- kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
- identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+ if (!kvm_vmx->ept_identity_map_addr)
+ kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+ identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
- kvm->arch.ept_identity_map_addr, PAGE_SIZE);
+ kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
if (r < 0)
goto out2;
@@ -4964,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
if (r < 0)
goto out;
}
- kvm->arch.ept_identity_pagetable_done = true;
+ kvm_vmx->ept_identity_pagetable_done = true;
out:
srcu_read_unlock(&kvm->srcu, idx);
@@ -5500,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
exec_control |= CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_INVLPG_EXITING;
+ if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+ exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING);
+ if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+ exec_control &= ~CPU_BASED_HLT_EXITING;
return exec_control;
}
@@ -5533,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
- if (!ple_gap)
+ if (kvm_pause_in_guest(vmx->vcpu.kvm))
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!kvm_vcpu_apicv_active(vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5565,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (xsaves_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_XSAVES;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_XSAVES;
}
}
@@ -5580,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdtscp_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDTSCP;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDTSCP;
}
}
@@ -5601,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (invpcid_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_INVPCID;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_ENABLE_INVPCID;
}
}
@@ -5616,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdrand_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDRAND_EXITING;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDRAND_EXITING;
}
}
@@ -5631,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdseed_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDSEED_EXITING;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDSEED_EXITING;
}
}
@@ -5696,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
}
- if (ple_gap) {
+ if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
vmcs_write32(PLE_GAP, ple_gap);
vmx->ple_window = ple_window;
vmx->ple_window_dirty = true;
@@ -5861,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
update_exception_bitmap(vcpu);
vpid_sync_context(vmx->vpid);
+ if (init_event)
+ vmx_clear_hlt(vcpu);
}
/*
@@ -5885,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
{
- return get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_NMI_EXITING;
+ return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -5932,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else
intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+ vmx_clear_hlt(vcpu);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -5962,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+ vmx_clear_hlt(vcpu);
}
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -6024,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
+ if (enable_unrestricted_guest)
+ return 0;
+
ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
PAGE_SIZE * 3);
if (ret)
return ret;
- kvm->arch.tss_addr = addr;
+ to_kvm_vmx(kvm)->tss_addr = addr;
return init_rmode_tss(kvm);
}
+static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+ to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
+ return 0;
+}
+
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
{
switch (vec) {
@@ -6134,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */
- if (is_invalid_opcode(intr_info)) {
- er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
- if (er == EMULATE_USER_EXIT)
- return 0;
- if (er != EMULATE_DONE)
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ if (is_invalid_opcode(intr_info))
+ return handle_ud(vcpu);
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
+ WARN_ON_ONCE(!enable_vmware_backdoor);
+ er = emulate_instruction(vcpu,
+ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ else if (er != EMULATE_DONE)
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ return 1;
+ }
+
/*
* The #PF with PFEC.RSVD = 1 indicates the guest is accessing
* MMIO, it is better to report an internal error.
@@ -6232,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int size, in, string, ret;
+ int size, in, string;
unsigned port;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
string = (exit_qualification & 16) != 0;
- in = (exit_qualification & 8) != 0;
++vcpu->stat.io_exits;
- if (string || in)
+ if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
+ in = (exit_qualification & 8) != 0;
- ret = kvm_skip_emulated_instruction(vcpu);
-
- /*
- * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
- return kvm_fast_pio_out(vcpu, size, port) && ret;
+ return kvm_fast_pio(vcpu, size, port, in);
}
static void
@@ -6344,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
err = handle_set_cr0(vcpu, val);
return kvm_complete_insn_gp(vcpu, err);
case 3:
+ WARN_ON_ONCE(enable_unrestricted_guest);
err = kvm_set_cr3(vcpu, val);
return kvm_complete_insn_gp(vcpu, err);
case 4:
@@ -6376,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
case 1: /*mov from cr*/
switch (cr) {
case 3:
+ WARN_ON_ONCE(enable_unrestricted_guest);
val = kvm_read_cr3(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
@@ -6769,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
- int ret;
gpa_t gpa;
/*
@@ -6797,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
NULL, 0) == EMULATE_DONE;
}
- ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
- if (ret >= 0)
- return ret;
-
- /* It is the real ept misconfig */
- WARN_ON(1);
-
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
-
- return 0;
+ return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
}
static int handle_nmi_window(struct kvm_vcpu *vcpu)
@@ -6830,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
bool intr_window_requested;
unsigned count = 130;
+ /*
+ * We should never reach the point where we are emulating L2
+ * due to invalid guest state as that means we incorrectly
+ * allowed a nested VMEntry with an invalid vmcs12.
+ */
+ WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
+
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
@@ -6848,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
goto out;
}
- if (err != EMULATE_DONE) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ if (err != EMULATE_DONE)
+ goto emulation_error;
+
+ if (vmx->emulation_required && !vmx->rmode.vm86_active &&
+ vcpu->arch.exception.pending)
+ goto emulation_error;
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
@@ -6869,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
out:
return ret;
-}
-
-static int __grow_ple_window(int val)
-{
- if (ple_window_grow < 1)
- return ple_window;
-
- val = min(val, ple_window_actual_max);
-
- if (ple_window_grow < ple_window)
- val *= ple_window_grow;
- else
- val += ple_window_grow;
-
- return val;
-}
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
- if (modifier < 1)
- return ple_window;
-
- if (modifier < ple_window)
- val /= modifier;
- else
- val -= modifier;
-
- return max(val, minimum);
+emulation_error:
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
}
static void grow_ple_window(struct kvm_vcpu *vcpu)
@@ -6904,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int old = vmx->ple_window;
- vmx->ple_window = __grow_ple_window(old);
+ vmx->ple_window = __grow_ple_window(old, ple_window,
+ ple_window_grow,
+ ple_window_max);
if (vmx->ple_window != old)
vmx->ple_window_dirty = true;
@@ -6917,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int old = vmx->ple_window;
- vmx->ple_window = __shrink_ple_window(old,
- ple_window_shrink, ple_window);
+ vmx->ple_window = __shrink_ple_window(old, ple_window,
+ ple_window_shrink,
+ ple_window);
if (vmx->ple_window != old)
vmx->ple_window_dirty = true;
@@ -6927,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
/*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
- ple_window_actual_max =
- __shrink_ple_window(max(ple_window_max, ple_window),
- ple_window_grow, INT_MIN);
-}
-
-/*
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
*/
static void wakeup_handler(void)
@@ -6960,7 +7229,7 @@ static void wakeup_handler(void)
spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
}
-void vmx_enable_tdp(void)
+static void vmx_enable_tdp(void)
{
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -7061,8 +7330,6 @@ static __init int hardware_setup(void)
else
kvm_disable_tdp();
- update_ple_window_actual_max();
-
/*
* Only enable PML when hardware supports PML feature, and both EPT
* and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -7094,6 +7361,7 @@ static __init int hardware_setup(void)
init_vmcs_shadow_fields();
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+ nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -7122,7 +7390,7 @@ static __exit void hardware_unsetup(void)
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
- if (ple_gap)
+ if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
/*
@@ -7954,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
u64 eptp, gpa;
} operand;
- if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+ if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_EPT) ||
- !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+ !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -7967,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+ types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
@@ -8018,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 gla;
} operand;
- if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+ if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
- !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
+ !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -8031,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_vpid_caps &
+ types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type))) {
@@ -8125,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
/* Check for memory type validity */
switch (address & VMX_EPTP_MT_MASK) {
case VMX_EPTP_MT_UC:
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
return false;
break;
case VMX_EPTP_MT_WB:
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
return false;
break;
default:
@@ -8146,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
/* AD, if set, should be supported */
if (address & VMX_EPTP_AD_ENABLE_BIT) {
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
return false;
}
@@ -8790,7 +9058,8 @@ static void dump_vmcs(void)
pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
vmcs_read64(GUEST_IA32_DEBUGCTL),
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
- if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if (cpu_has_load_perf_global_ctrl &&
+ vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
@@ -8826,7 +9095,8 @@ static void dump_vmcs(void)
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
vmcs_read64(HOST_IA32_EFER),
vmcs_read64(HOST_IA32_PAT));
- if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if (cpu_has_load_perf_global_ctrl &&
+ vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
@@ -9178,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
/* We need to handle NMIs before interrupts are enabled */
if (is_nmi(exit_intr_info)) {
- kvm_before_handle_nmi(&vmx->vcpu);
+ kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
- kvm_after_handle_nmi(&vmx->vcpu);
+ kvm_after_interrupt(&vmx->vcpu);
}
}
@@ -9403,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr3, cr4, evmcs_rsp;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -9469,6 +9739,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
+
+ evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
+ (unsigned long)&current_evmcs->host_rsp : 0;
+
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9477,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
"je 1f \n\t"
"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
+ /* Avoid VMWRITE when Enlightened VMCS is in use */
+ "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "jz 2f \n\t"
+ "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
+ "jmp 1f \n\t"
+ "2: \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t"
/* Reload cr2 if changed */
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
"mov %%cr2, %%" _ASM_DX " \n\t"
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
- "je 2f \n\t"
+ "je 3f \n\t"
"mov %%" _ASM_AX", %%cr2 \n\t"
- "2: \n\t"
+ "3: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers. Don't clobber flags. */
@@ -9554,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
".popsection"
- : : "c"(vmx), "d"((unsigned long)HOST_RSP),
+ : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
@@ -9579,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
[wordsize]"i"(sizeof(ulong))
: "cc", "memory"
#ifdef CONFIG_X86_64
- , "rax", "rbx", "rdi", "rsi"
+ , "rax", "rbx", "rdi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#else
- , "eax", "ebx", "edi", "esi"
+ , "eax", "ebx", "edi"
#endif
);
@@ -9610,6 +9890,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
+ /* All fields are clean at this point */
+ if (static_branch_unlikely(&enable_evmcs))
+ current_evmcs->hv_clean_fields |=
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
update_debugctlmsr(vmx->host_debugctlmsr);
@@ -9646,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
- /*
- * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
- * we did not inject a still-pending event to L1 now because of
- * nested_run_pending, we need to re-enable this bit.
- */
- if (vmx->nested.nested_run_pending)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -9670,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
+static struct kvm *vmx_vm_alloc(void)
+{
+ struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+ return &kvm_vmx->kvm;
+}
+
+static void vmx_vm_free(struct kvm *kvm)
+{
+ kfree(to_kvm_vmx(kvm));
+}
+
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9777,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vmcs;
}
- if (enable_ept) {
+ if (enable_ept && !enable_unrestricted_guest) {
err = init_rmode_identity_map(kvm);
if (err)
goto free_vmcs;
}
if (nested) {
- nested_vmx_setup_ctls_msrs(vmx);
+ nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
+ kvm_vcpu_apicv_active(&vmx->vcpu));
vmx->nested.vpid02 = allocate_vpid();
}
@@ -9817,6 +10106,13 @@ free_vcpu:
return ERR_PTR(err);
}
+static int vmx_vm_init(struct kvm *kvm)
+{
+ if (!ple_gap)
+ kvm->arch.pause_in_guest = true;
+ return 0;
+}
+
static void __init vmx_check_processor_compat(void *rtn)
{
struct vmcs_config vmcs_conf;
@@ -9824,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn)
*(int *)rtn = 0;
if (setup_vmcs_config(&vmcs_conf) < 0)
*(int *)rtn = -EIO;
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -9911,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_cpuid_entry2 *entry;
- vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
- vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+ vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
+ vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
if (entry && (entry->_reg & (_cpuid_mask))) \
- vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \
+ vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
} while (0)
entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
@@ -10013,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
- to_vmx(vcpu)->nested.nested_vmx_ept_caps &
+ to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
nested_ept_ad_enabled(vcpu));
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -10952,6 +11249,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
+ /*
+ * Guest state is invalid and unrestricted guest is disabled,
+ * which means L1 attempted VMEntry to L2 with invalid state.
+ * Fail the VMEntry.
+ */
+ if (vmx->emulation_required) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
+
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
@@ -10965,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 0;
}
+static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
+{
+ if (!nested_cpu_has_nmi_exiting(vmcs12) &&
+ nested_cpu_has_virtual_nmis(vmcs12))
+ return -EINVAL;
+
+ if (!nested_cpu_has_virtual_nmis(vmcs12) &&
+ nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+ return -EINVAL;
+
+ return 0;
+}
+
static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -10992,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high) ||
+ vmx->nested.msrs.procbased_ctls_low,
+ vmx->nested.msrs.procbased_ctls_high) ||
(nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high)) ||
+ vmx->nested.msrs.secondary_ctls_low,
+ vmx->nested.msrs.secondary_ctls_high)) ||
!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high) ||
+ vmx->nested.msrs.pinbased_ctls_low,
+ vmx->nested.msrs.pinbased_ctls_high) ||
!vmx_control_verify(vmcs12->vm_exit_controls,
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high) ||
+ vmx->nested.msrs.exit_ctls_low,
+ vmx->nested.msrs.exit_ctls_high) ||
!vmx_control_verify(vmcs12->vm_entry_controls,
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high))
+ vmx->nested.msrs.entry_ctls_low,
+ vmx->nested.msrs.entry_ctls_high))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_vmx_check_nmi_controls(vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_cpu_has_vmfunc(vmcs12)) {
if (vmcs12->vm_function_control &
- ~vmx->nested.nested_vmx_vmfunc_controls)
+ ~vmx->nested.msrs.vmfunc_controls)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_cpu_has_eptp_switching(vmcs12)) {
@@ -11293,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
} else if (vcpu->arch.nmi_injected) {
vmcs12->idt_vectoring_info_field =
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
- } else if (vcpu->arch.interrupt.pending) {
+ } else if (vcpu->arch.interrupt.injected) {
nr = vcpu->arch.interrupt.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
@@ -11941,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (ple_gap)
+ if (!kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
}
@@ -12259,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
vmx->nested.smm.vmxon = vmx->nested.vmxon;
vmx->nested.vmxon = false;
+ vmx_clear_hlt(vcpu);
return 0;
}
@@ -12300,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_accelerated_tpr = report_flexpriority,
.cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .vm_init = vmx_vm_init,
+ .vm_alloc = vmx_vm_alloc,
+ .vm_free = vmx_vm_free,
+
.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset,
@@ -12367,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
.set_tss_addr = vmx_set_tss_addr,
+ .set_identity_map_addr = vmx_set_identity_map_addr,
.get_tdp_level = get_ept_level,
.get_mt_mask = vmx_get_mt_mask,
@@ -12425,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
static int __init vmx_init(void)
{
- int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+ int r;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ /*
+ * Enlightened VMCS usage should be recommended and the host needs
+ * to support eVMCS v1 or above. We can also disable eVMCS support
+ * with module parameter.
+ */
+ if (enlightened_vmcs &&
+ ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
+ (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
+ KVM_EVMCS_VERSION) {
+ int cpu;
+
+ /* Check that we have assist pages on all online CPUs */
+ for_each_online_cpu(cpu) {
+ if (!hv_get_vp_assist_page(cpu)) {
+ enlightened_vmcs = false;
+ break;
+ }
+ }
+
+ if (enlightened_vmcs) {
+ pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
+ static_branch_enable(&enable_evmcs);
+ }
+ } else {
+ enlightened_vmcs = false;
+ }
+#endif
+
+ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
return r;
@@ -12446,6 +12806,29 @@ static void __exit vmx_exit(void)
#endif
kvm_exit();
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (static_branch_unlikely(&enable_evmcs)) {
+ int cpu;
+ struct hv_vp_assist_page *vp_ap;
+ /*
+ * Reset everything to support using non-enlightened VMCS
+ * access later (e.g. when we reload the module with
+ * enlightened_vmcs=0)
+ */
+ for_each_online_cpu(cpu) {
+ vp_ap = hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->current_nested_vmcs = 0;
+ vp_ap->enlighten_vmentry = 0;
+ }
+
+ static_branch_disable(&enable_evmcs);
+ }
+#endif
}
module_init(vmx_init)
diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h
new file mode 100644
index 0000000..210a884
--- /dev/null
+++ b/arch/x86/kvm/vmx_evmcs.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_EVMCS_H
+#define __KVM_X86_VMX_EVMCS_H
+
+#include <asm/hyperv-tlfs.h>
+
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
+#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
+#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
+ {EVMCS1_OFFSET(name), clean_field}
+
+struct evmcs_field {
+ u16 offset;
+ u16 clean_field;
+};
+
+static const struct evmcs_field vmcs_field_to_evmcs_1[] = {
+ /* 64 bit rw */
+ EVMCS1_FIELD(GUEST_RIP, guest_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_RSP, guest_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR0, host_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR3, host_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR4, host_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_RIP, host_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(MSR_BITMAP, msr_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP),
+ EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(TSC_OFFSET, tsc_offset,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR0, guest_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR3, guest_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR4, guest_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_DR7, guest_dr7,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(HOST_FS_BASE, host_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GS_BASE, host_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_TR_BASE, host_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_RSP, host_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(EPT_POINTER, ept_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+ EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+
+ /* 64 bit read only */
+ EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ /*
+ * Not defined in KVM:
+ *
+ * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ */
+ EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /*
+ * No mask defined in the spec as Hyper-V doesn't currently support
+ * these. Future proof by resetting the whole clean field mask on
+ * access.
+ */
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 32 bit rw */
+ EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC),
+ EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN),
+ EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY),
+ EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vm_entry_exception_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+
+ /* 32 bit read only */
+ EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /* No mask defined in the spec (not used) */
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 16 bit rw */
+ EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+};
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ unsigned int index = ROL16(field, 6);
+ const struct evmcs_field *evmcs_field;
+
+ if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) {
+ WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
+ field);
+ return -ENOENT;
+ }
+
+ evmcs_field = &vmcs_field_to_evmcs_1[index];
+
+ if (clean_field)
+ *clean_field = evmcs_field->clean_field;
+
+ return evmcs_field->offset;
+}
+
+#undef ROL16
+
+#endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18b5ca7..b2ff74b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+static void store_regs(struct kvm_vcpu *vcpu);
+static int sync_regs(struct kvm_vcpu *vcpu);
struct kvm_x86_ops *kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
+bool __read_mostly enable_vmware_backdoor = false;
+module_param(enable_vmware_backdoor, bool, S_IRUGO);
+EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
+
+static bool __read_mostly force_emulation_prefix = false;
+module_param(force_emulation_prefix, bool, S_IRUGO);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_STIMER0_CONFIG,
- HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+ HV_X64_MSR_VP_ASSIST_PAGE,
+ HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
+ HV_X64_MSR_TSC_EMULATION_STATUS,
+
+ MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
MSR_IA32_TSC_ADJUST,
@@ -1054,6 +1067,25 @@ static unsigned num_emulated_msrs;
* can be used by a hypervisor to validate requested CPU features.
*/
static u32 msr_based_features[] = {
+ MSR_IA32_VMX_BASIC,
+ MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+ MSR_IA32_VMX_PINBASED_CTLS,
+ MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+ MSR_IA32_VMX_PROCBASED_CTLS,
+ MSR_IA32_VMX_TRUE_EXIT_CTLS,
+ MSR_IA32_VMX_EXIT_CTLS,
+ MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+ MSR_IA32_VMX_ENTRY_CTLS,
+ MSR_IA32_VMX_MISC,
+ MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR0_FIXED1,
+ MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED1,
+ MSR_IA32_VMX_VMCS_ENUM,
+ MSR_IA32_VMX_PROCBASED_CTLS2,
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
MSR_F10H_DECFG,
MSR_IA32_UCODE_REV,
};
@@ -2432,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_set_msr_common(vcpu, msr, data,
msr_info->host_initiated);
case MSR_IA32_BBL_CR_CTL3:
@@ -2558,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
+ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -2661,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data);
break;
@@ -2777,9 +2816,15 @@ out:
return r;
}
+static inline bool kvm_can_mwait_in_guest(void)
+{
+ return boot_cpu_has(X86_FEATURE_MWAIT) &&
+ !boot_cpu_has_bug(X86_BUG_MONITOR);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
- int r;
+ int r = 0;
switch (ext) {
case KVM_CAP_IRQCHIP:
@@ -2809,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
+ case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2828,11 +2874,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
+ case KVM_CAP_SYNC_REGS:
+ r = KVM_SYNC_X86_VALID_FIELDS;
+ break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
- case KVM_CAP_X86_GUEST_MWAIT:
- r = kvm_mwait_in_guest();
+ case KVM_CAP_X86_DISABLE_EXITS:
+ r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+ if(kvm_can_mwait_in_guest())
+ r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
@@ -2873,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_X2APIC_API_VALID_FLAGS;
break;
default:
- r = 0;
break;
}
return r;
@@ -3265,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected =
- vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
+ vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
@@ -3318,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
- vcpu->arch.interrupt.pending = events->interrupt.injected;
+ vcpu->arch.interrupt.injected = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
@@ -3917,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
u64 ident_addr)
{
- kvm->arch.ept_identity_map_addr = ident_addr;
- return 0;
+ return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
@@ -4178,6 +4227,20 @@ split_irqchip_unlock:
r = 0;
break;
+ case KVM_CAP_X86_DISABLE_EXITS:
+ r = -EINVAL;
+ if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+ break;
+
+ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+ kvm_can_mwait_in_guest())
+ kvm->arch.mwait_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+ kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+ kvm->arch.pause_in_guest = true;
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -4482,6 +4545,15 @@ set_identity_unlock:
r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
break;
}
+ case KVM_HYPERV_EVENTFD: {
+ struct kvm_hyperv_eventfd hvevfd;
+
+ r = -EFAULT;
+ if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+ goto out;
+ r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -4771,6 +4843,30 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+int handle_ud(struct kvm_vcpu *vcpu)
+{
+ int emul_type = EMULTYPE_TRAP_UD;
+ enum emulation_result er;
+ char sig[5]; /* ud2; .ascii "kvm" */
+ struct x86_exception e;
+
+ if (force_emulation_prefix &&
+ kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
+ kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+ memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+ kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
+ emul_type = 0;
+ }
+
+ er = emulate_instruction(vcpu, emul_type);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ if (er != EMULATE_DONE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(handle_ud);
+
static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t gpa, bool write)
{
@@ -5612,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
- if (irq == NMI_VECTOR)
- vcpu->arch.nmi_pending = 0;
- else
- vcpu->arch.interrupt.pending = false;
-
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
-static int handle_emulation_failure(struct kvm_vcpu *vcpu)
+static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
int r = EMULATE_DONE;
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
+
+ if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
+ return EMULATE_FAIL;
+
if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
r = EMULATE_USER_EXIT;
}
+
kvm_queue_exception(vcpu, UD_VECTOR);
return r;
@@ -5876,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
return false;
}
+static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
+{
+ switch (ctxt->opcode_len) {
+ case 1:
+ switch (ctxt->b) {
+ case 0xe4: /* IN */
+ case 0xe5:
+ case 0xec:
+ case 0xed:
+ case 0xe6: /* OUT */
+ case 0xe7:
+ case 0xee:
+ case 0xef:
+ case 0x6c: /* INS */
+ case 0x6d:
+ case 0x6e: /* OUTS */
+ case 0x6f:
+ return true;
+ }
+ break;
+ case 2:
+ switch (ctxt->b) {
+ case 0x33: /* RDPMC */
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
int emulation_type,
@@ -5928,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
- return handle_emulation_failure(vcpu);
+ return handle_emulation_failure(vcpu, emulation_type);
}
}
+ if ((emulation_type & EMULTYPE_VMWARE) &&
+ !is_vmware_backdoor_opcode(ctxt))
+ return EMULATE_FAIL;
+
if (emulation_type & EMULTYPE_SKIP) {
kvm_rip_write(vcpu, ctxt->_eip);
if (ctxt->eflags & X86_EFLAGS_RF)
@@ -5963,7 +6094,7 @@ restart:
emulation_type))
return EMULATE_DONE;
- return handle_emulation_failure(vcpu);
+ return handle_emulation_failure(vcpu, emulation_type);
}
if (ctxt->have_exception) {
@@ -6016,7 +6147,8 @@ restart:
}
EXPORT_SYMBOL_GPL(x86_emulate_instruction);
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
+ unsigned short port)
{
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
@@ -6025,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
vcpu->arch.pio.count = 0;
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
{
@@ -6049,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
return 1;
}
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port)
{
unsigned long val;
int ret;
@@ -6068,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
+{
+ int ret = kvm_skip_emulated_instruction(vcpu);
+
+ /*
+ * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ if (in)
+ return kvm_fast_pio_in(vcpu, size, port) && ret;
+ else
+ return kvm_fast_pio_out(vcpu, size, port) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio);
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
@@ -6246,7 +6392,8 @@ static void kvm_timer_init(void)
kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
-static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
int kvm_is_in_guest(void)
{
@@ -6279,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.get_guest_ip = kvm_get_guest_ip,
};
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(current_vcpu, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
-
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(current_vcpu, NULL);
-}
-EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
-
static void kvm_set_mmio_spte_mask(void)
{
u64 mask;
@@ -6644,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
int r;
/* try to reinject previous events if any */
- if (vcpu->arch.exception.injected) {
- kvm_x86_ops->queue_exception(vcpu);
- return 0;
- }
+ if (vcpu->arch.exception.injected)
+ kvm_x86_ops->queue_exception(vcpu);
/*
- * Exceptions must be injected immediately, or the exception
- * frame will have the address of the NMI or interrupt handler.
+ * Do not inject an NMI or interrupt if there is a pending
+ * exception. Exceptions and interrupts are recognized at
+ * instruction boundaries, i.e. the start of an instruction.
+ * Trap-like exceptions, e.g. #DB, have higher priority than
+ * NMIs and interrupts, i.e. traps are recognized before an
+ * NMI/interrupt that's pending on the same instruction.
+ * Fault-like exceptions, e.g. #GP and #PF, are the lowest
+ * priority, but are only generated (pended) during instruction
+ * execution, i.e. a pending fault-like exception means the
+ * fault occurred on the *previous* instruction and must be
+ * serviced prior to recognizing any new events in order to
+ * fully complete the previous instruction.
*/
- if (!vcpu->arch.exception.pending) {
- if (vcpu->arch.nmi_injected) {
+ else if (!vcpu->arch.exception.pending) {
+ if (vcpu->arch.nmi_injected)
kvm_x86_ops->set_nmi(vcpu);
- return 0;
- }
-
- if (vcpu->arch.interrupt.pending) {
+ else if (vcpu->arch.interrupt.injected)
kvm_x86_ops->set_irq(vcpu);
- return 0;
- }
}
+ /*
+ * Call check_nested_events() even if we reinjected a previous event
+ * in order for caller to determine if it should require immediate-exit
+ * from L2 to L1 due to pending L1 events which require exit
+ * from L2 to L1.
+ */
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
if (r != 0)
@@ -6677,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
+ WARN_ON_ONCE(vcpu->arch.exception.injected);
vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = true;
@@ -6691,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
kvm_x86_ops->queue_exception(vcpu);
- } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
+ }
+
+ /* Don't consider new event if we re-injected an event */
+ if (kvm_event_needs_reinjection(vcpu))
+ return 0;
+
+ if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
+ kvm_x86_ops->smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
++vcpu->arch.smi_count;
enter_smm(vcpu);
@@ -6985,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
- u64 eoi_exit_bitmap[4];
-
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
@@ -6999,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
+
+ if (is_guest_mode(vcpu))
+ vcpu->arch.load_eoi_exitmap_pending = true;
+ else
+ kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+}
+
+static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+ u64 eoi_exit_bitmap[4];
+
+ if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+ return;
+
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
@@ -7113,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
+ vcpu_load_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
@@ -7291,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_put_guest_xcr0(vcpu);
+ kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
+ kvm_after_interrupt(vcpu);
++vcpu->stat.exits;
@@ -7500,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
-
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -7526,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
+ if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (vcpu->run->kvm_dirty_regs) {
+ r = sync_regs(vcpu);
+ if (r != 0)
+ goto out;
+ }
+
/* re-sync apic's tpr */
if (!lapic_in_kernel(vcpu)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
@@ -7550,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
kvm_put_guest_fpu(vcpu);
+ if (vcpu->run->kvm_valid_regs)
+ store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -7557,10 +7737,8 @@ out:
return r;
}
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@@ -7593,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
+}
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ __get_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7630,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.exception.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ __set_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
@@ -7645,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct desc_ptr dt;
- vcpu_load(vcpu);
-
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7679,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
- if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
+ if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
+}
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ __get_sregs(vcpu, sregs);
vcpu_put(vcpu);
return 0;
}
@@ -7754,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
-int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
@@ -7777,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
@@ -7786,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct desc_ptr dt;
int ret = -EINVAL;
- vcpu_load(vcpu);
-
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
goto out;
@@ -7866,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
ret = 0;
out:
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = __set_sregs(vcpu, sregs);
vcpu_put(vcpu);
return ret;
}
@@ -7992,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
+static void store_regs(struct kvm_vcpu *vcpu)
+{
+ BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
+ __get_regs(vcpu, &vcpu->run->s.regs.regs);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
+ __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
+ kvm_vcpu_ioctl_x86_get_vcpu_events(
+ vcpu, &vcpu->run->s.regs.events);
+}
+
+static int sync_regs(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
+ return -EINVAL;
+
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
+ __set_regs(vcpu, &vcpu->run->s.regs.regs);
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
+ }
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
+ if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+ return -EINVAL;
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
+ }
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
+ if (kvm_vcpu_ioctl_x86_set_vcpu_events(
+ vcpu, &vcpu->run->s.regs.events))
+ return -EINVAL;
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
+ }
+
+ return 0;
+}
+
static void fx_init(struct kvm_vcpu *vcpu)
{
fpstate_init(&vcpu->arch.guest_fpu.state);
@@ -8447,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
- mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
@@ -8456,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
+ kvm_hv_init_vm(kvm);
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
@@ -8586,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
+ kvm_hv_destroy_vm(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b91215d..7d35ce6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,12 +2,48 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
-#include <asm/processor.h>
-#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
+#define KVM_DEFAULT_PLE_GAP 128
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_DEFAULT_PLE_WINDOW_GROW 2
+#define KVM_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW 3000
+
+static inline unsigned int __grow_ple_window(unsigned int val,
+ unsigned int base, unsigned int modifier, unsigned int max)
+{
+ u64 ret = val;
+
+ if (modifier < 1)
+ return base;
+
+ if (modifier < base)
+ ret *= modifier;
+ else
+ ret += modifier;
+
+ return min(ret, (u64)max);
+}
+
+static inline unsigned int __shrink_ple_window(unsigned int val,
+ unsigned int base, unsigned int modifier, unsigned int min)
+{
+ if (modifier < 1)
+ return base;
+
+ if (modifier < base)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, min);
+}
+
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{
- vcpu->arch.interrupt.pending = true;
+ vcpu->arch.interrupt.injected = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector;
}
static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
{
- vcpu->arch.interrupt.pending = false;
+ vcpu->arch.interrupt.injected = false;
}
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
+ return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected ||
vcpu->arch.nmi_injected;
}
@@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
return !(kvm->arch.disabled_quirks & quirk);
}
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
@@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+int handle_ud(struct kvm_vcpu *vcpu);
+
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
@@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us;
extern unsigned int lapic_timer_advance_ns;
+extern bool enable_vmware_backdoor;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
@@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
-static inline bool kvm_mwait_in_guest(void)
+#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
+ KVM_X86_DISABLE_EXITS_HTL | \
+ KVM_X86_DISABLE_EXITS_PAUSE)
+
+static inline bool kvm_mwait_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.mwait_in_guest;
+}
+
+static inline bool kvm_hlt_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.hlt_in_guest;
+}
+
+static inline bool kvm_pause_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.pause_in_guest;
+}
+
+DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+
+static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+{
+ __this_cpu_write(current_vcpu, vcpu);
+}
+
+static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
- return boot_cpu_has(X86_FEATURE_MWAIT) &&
- !boot_cpu_has_bug(X86_BUG_MONITOR);
+ __this_cpu_write(current_vcpu, NULL);
}
#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 396e1f0..8008db2 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -778,6 +778,7 @@ void __init mem_init(void)
free_all_bootmem();
after_bootmem = 1;
+ x86_init.hyper.init_after_bootmem();
mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 45241de..66de40e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1185,6 +1185,7 @@ void __init mem_init(void)
/* this will put all memory onto the freelists */
free_all_bootmem();
after_bootmem = 1;
+ x86_init.hyper.init_after_bootmem();
/*
* Must be done after boot memory is put on freelist, because here we
@@ -1328,14 +1329,39 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
+/*
+ * Block size is the minimum amount of memory which can be hotplugged or
+ * hotremoved. It must be power of two and must be equal or larger than
+ * MIN_MEMORY_BLOCK_SIZE.
+ */
+#define MAX_BLOCK_SIZE (2UL << 30)
+
+/* Amount of ram needed to start using large blocks */
+#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
+
static unsigned long probe_memory_block_size(void)
{
- unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
+ unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
+ unsigned long bz;
- /* if system is UV or has 64GB of RAM or more, use large blocks */
- if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
- bz = 2UL << 30; /* 2GB */
+ /* If this is UV system, always set 2G block size */
+ if (is_uv_system()) {
+ bz = MAX_BLOCK_SIZE;
+ goto done;
+ }
+ /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
+ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
+ bz = MIN_MEMORY_BLOCK_SIZE;
+ goto done;
+ }
+
+ /* Find the largest allowed block size that aligns to memory end */
+ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
+ if (IS_ALIGNED(boot_mem_end, bz))
+ break;
+ }
+done:
pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 155ecba..48c5912 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
-static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
+ struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max;
@@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
* process VM image, sets up which VM layout function to use:
*/
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
- unsigned long random_factor, unsigned long task_size)
+ unsigned long random_factor, unsigned long task_size,
+ struct rlimit *rlim_stack)
{
*legacy_base = mmap_legacy_base(random_factor, task_size);
if (mmap_is_legacy())
*base = *legacy_base;
else
- *base = mmap_base(random_factor, task_size);
+ *base = mmap_base(random_factor, task_size, rlim_stack);
}
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
if (mmap_is_legacy())
mm->get_unmapped_area = arch_get_unmapped_area;
@@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
- arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
+ arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
+ rlim_stack);
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/*
@@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* mmap_base, the compat syscall uses mmap_compat_base.
*/
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
- arch_rnd(mmap32_rnd_bits), task_size_32bit());
+ arch_rnd(mmap32_rnd_bits), task_size_32bit(),
+ rlim_stack);
#endif
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d207634..486c0a3 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
static phys_addr_t xen_pt_base, xen_pt_size __initdata;
+static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
+
/*
* Just beyond the highest usermode address. STACK_TOP_MAX has a
* redzone above it, so round it up to a PGD boundary.
@@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
}
+/*
+ * During early boot all page table pages are pinned, but we do not have struct
+ * pages, so return true until struct pages are ready.
+ */
static bool xen_page_pinned(void *ptr)
{
- struct page *page = virt_to_page(ptr);
+ if (static_branch_likely(&xen_struct_pages_ready)) {
+ struct page *page = virt_to_page(ptr);
- return PagePinned(page);
+ return PagePinned(page);
+ }
+ return true;
}
static void xen_extend_mmu_update(const struct mmu_update *update)
@@ -836,11 +845,6 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock);
}
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits. So do all
- * the book-keeping now.
- */
static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
@@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
return 0;
}
-static void __init xen_mark_init_mm_pinned(void)
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits. So do all
+ * the book-keeping now once struct pages for allocated pages are
+ * initialized. This happens only after free_all_bootmem() is called.
+ */
+static void __init xen_after_bootmem(void)
{
+ static_branch_enable(&xen_struct_pages_ready);
+#ifdef CONFIG_X86_64
+ SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
@@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
unsigned level)
{
- bool pinned = PagePinned(virt_to_page(mm->pgd));
+ bool pinned = xen_page_pinned(mm->pgd);
trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
if (pinned) {
struct page *page = pfn_to_page(pfn);
- SetPagePinned(page);
+ if (static_branch_likely(&xen_struct_pages_ready))
+ SetPagePinned(page);
if (!PageHighMem(page)) {
xen_mc_batch();
@@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
#ifdef CONFIG_X86_64
pv_mmu_ops.write_cr3 = &xen_write_cr3;
- SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
- xen_mark_init_mm_pinned();
}
static void xen_leave_lazy_mmu(void)
@@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
x86_init.paging.pagetable_init = xen_pagetable_init;
+ x86_init.hyper.init_after_bootmem = xen_after_bootmem;
pv_mmu_ops = xen_mmu_ops;
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index 5c83798..d5a8215 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -44,9 +44,10 @@ extern struct pci_controller* pcibios_alloc_controller(void);
#define PCI_DMA_BUS_IS_PHYS (1)
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP 1
-#define arch_can_pci_mmap_io() 1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP 1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_io() 1
#endif /* __KERNEL__ */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 3e9d01a..58f29a9 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -57,6 +57,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be
* uninitialized */
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index d981f01..b7c7a60 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -39,7 +39,6 @@
* pcibios_align_resource
* pcibios_fixup_bus
* pci_bus_add_device
- * pci_mmap_page_range
*/
struct pci_controller* pci_ctrl_head;
@@ -258,98 +257,21 @@ pci_controller_num(struct pci_dev *dev)
#endif /* CONFIG_PROC_FS */
/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
* -- paulus.
*/
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static __inline__ int
-__pci_mmap_make_offset(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
- struct pci_controller *pci_ctrl = (struct pci_controller*) dev->sysdata;
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long io_offset = 0;
- int i, res_bit;
+ struct pci_controller *pci_ctrl = (struct pci_controller*) pdev->sysdata;
+ resource_size_t ioaddr = pci_resource_start(pdev, bar);
if (pci_ctrl == 0)
return -EINVAL; /* should never happen */
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)pci_ctrl->io_space.base;
- offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
-
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (offset < (rp->start & PAGE_MASK) || offset > rp->end)
- continue;
-
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- offset += pci_ctrl->io_space.start - io_offset;
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- return 0;
- }
-
- return -EINVAL;
-}
+ /* Convert to an offset within this PCI controller */
+ ioaddr -= (unsigned long)pci_ctrl->io_space.base;
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar,
- struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state,
- int write_combine)
-{
- int ret;
-
- ret = __pci_mmap_make_offset(dev, vma, mmap_state);
- if (ret < 0)
- return ret;
-
- vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
-
- ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start,vma->vm_page_prot);
-
- return ret;
+ vma->vm_pgoff += (ioaddr + pci_ctrl->io_space.start) >> PAGE_SHIFT;
+ return 0;
}
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 57dc231..9220dcd 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(copy_user_highpage);
void flush_dcache_page(struct page *page)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = page_mapping_file(page);
/*
* If we have a mapping but the page is not mapped to user-space
OpenPOWER on IntegriCloud