summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-07 12:08:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-07 12:08:19 -0700
commit49a695ba723224875df50e327bd7b0b65dd9a56b (patch)
tree02372931e3e751106ca16bae14567d990bf22ad8
parent299f89d53e61c0b17479cc7d6f3b5382d5e83f28 (diff)
parentc1b25a17d24925b0961c319cfc3fd7e1dc778914 (diff)
downloadop-kernel-dev-49a695ba723224875df50e327bd7b0b65dd9a56b.zip
op-kernel-dev-49a695ba723224875df50e327bd7b0b65dd9a56b.tar.gz
Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Notable changes: - Support for 4PB user address space on 64-bit, opt-in via mmap(). - Removal of POWER4 support, which was accidentally broken in 2016 and no one noticed, and blocked use of some modern instructions. - Workarounds so that the hypervisor can enable Transactional Memory on Power9. - A series to disable the DAWR (Data Address Watchpoint Register) on Power9. - More information displayed in the meltdown/spectre_v1/v2 sysfs files. - A vpermxor (Power8 Altivec) implementation for the raid6 Q Syndrome. - A big series to make the allocation of our pacas (per cpu area), kernel page tables, and per-cpu stacks NUMA aware when using the Radix MMU on Power9. And as usual many fixes, reworks and cleanups. Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy, Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual, Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook, Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe, Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring, Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira, Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras, Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun" * tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (207 commits) powerpc/64s/idle: Fix restore of AMOR on POWER9 after deep sleep powerpc/64s: Fix POWER9 DD2.2 and above in cputable features powerpc/64s: Fix pkey support in dt_cpu_ftrs, add CPU_FTR_PKEY bit powerpc/64s: Fix dt_cpu_ftrs to have restore_cpu clear unwanted LPCR bits Revert "powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead" powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo} powerpc: io.h: move iomap.h include so that it can use readq/writeq defs cxl: Fix possible deadlock when processing page faults from cxllib powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it powerpc/mm/radix: Update command line parsing for disable_radix powerpc/mm/radix: Parse disable_radix commandline correctly. powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb powerpc/mm/radix: Update pte fragment count from 16 to 256 on radix powerpc/mm/keys: Update documentation and remove unnecessary check powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead powerpc/64s/idle: Consolidate power9_offline_stop()/power9_idle_stop() powerpc/powernv: Always stop secondaries before reboot/shutdown powerpc: hard disable irqs in smp_send_stop loop powerpc: use NMI IPI for smp_send_stop powerpc/powernv: Fix SMT4 forcing idle code ...
-rw-r--r--arch/powerpc/Makefile14
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/boot/dts/adder875-redboot.dts2
-rw-r--r--arch/powerpc/boot/dts/adder875-uboot.dts2
-rw-r--r--arch/powerpc/boot/dts/akebono.dts2
-rw-r--r--arch/powerpc/boot/dts/amigaone.dts2
-rw-r--r--arch/powerpc/boot/dts/asp834x-redboot.dts2
-rw-r--r--arch/powerpc/boot/dts/bamboo.dts2
-rw-r--r--arch/powerpc/boot/dts/c2k.dts2
-rw-r--r--arch/powerpc/boot/dts/currituck.dts2
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts2
-rw-r--r--arch/powerpc/boot/dts/ebony.dts2
-rw-r--r--arch/powerpc/boot/dts/eiger.dts2
-rw-r--r--arch/powerpc/boot/dts/ep405.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mvme7100.dts2
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts2
-rw-r--r--arch/powerpc/boot/dts/holly.dts2
-rw-r--r--arch/powerpc/boot/dts/hotfoot.dts2
-rw-r--r--arch/powerpc/boot/dts/icon.dts2
-rw-r--r--arch/powerpc/boot/dts/iss4xx-mpic.dts2
-rw-r--r--arch/powerpc/boot/dts/iss4xx.dts2
-rw-r--r--arch/powerpc/boot/dts/katmai.dts2
-rw-r--r--arch/powerpc/boot/dts/klondike.dts2
-rw-r--r--arch/powerpc/boot/dts/ksi8560.dts2
-rw-r--r--arch/powerpc/boot/dts/media5200.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8272ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc866ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc885ads.dts2
-rw-r--r--arch/powerpc/boot/dts/mvme5100.dts2
-rw-r--r--arch/powerpc/boot/dts/obs600.dts2
-rw-r--r--arch/powerpc/boot/dts/pq2fads.dts2
-rw-r--r--arch/powerpc/boot/dts/rainier.dts2
-rw-r--r--arch/powerpc/boot/dts/redwood.dts2
-rw-r--r--arch/powerpc/boot/dts/sam440ep.dts2
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts2
-rw-r--r--arch/powerpc/boot/dts/storcenter.dts2
-rw-r--r--arch/powerpc/boot/dts/taishan.dts2
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml507.dts2
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml510.dts2
-rw-r--r--arch/powerpc/boot/dts/walnut.dts2
-rw-r--r--arch/powerpc/boot/dts/warp.dts2
-rw-r--r--arch/powerpc/boot/dts/wii.dts21
-rw-r--r--arch/powerpc/boot/dts/xpedite5200_xmon.dts2
-rw-r--r--arch/powerpc/boot/dts/yosemite.dts2
-rw-r--r--arch/powerpc/boot/libfdt_env.h2
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h15
-rw-r--r--arch/powerpc/include/asm/barrier.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h14
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h25
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h54
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h19
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h27
-rw-r--r--arch/powerpc/include/asm/cacheflush.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h263
-rw-r--r--arch/powerpc/include/asm/debug.h1
-rw-r--r--arch/powerpc/include/asm/eeh.h6
-rw-r--r--arch/powerpc/include/asm/eeh_event.h3
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h22
-rw-r--r--arch/powerpc/include/asm/hugetlb.h8
-rw-r--r--arch/powerpc/include/asm/hvcall.h4
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h5
-rw-r--r--arch/powerpc/include/asm/io.h4
-rw-r--r--arch/powerpc/include/asm/irq.h1
-rw-r--r--arch/powerpc/include/asm/irq_work.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h43
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h8
-rw-r--r--arch/powerpc/include/asm/lppaca.h29
-rw-r--r--arch/powerpc/include/asm/mmu-8xx.h21
-rw-r--r--arch/powerpc/include/asm/mmu.h6
-rw-r--r--arch/powerpc/include/asm/mmu_context.h39
-rw-r--r--arch/powerpc/include/asm/nohash/32/slice.h18
-rw-r--r--arch/powerpc/include/asm/nohash/64/slice.h12
-rw-r--r--arch/powerpc/include/asm/opal-api.h4
-rw-r--r--arch/powerpc/include/asm/opal.h4
-rw-r--r--arch/powerpc/include/asm/paca.h27
-rw-r--r--arch/powerpc/include/asm/page.h11
-rw-r--r--arch/powerpc/include/asm/page_64.h59
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h24
-rw-r--r--arch/powerpc/include/asm/pmc.h13
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h6
-rw-r--r--arch/powerpc/include/asm/powernv.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h10
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h11
-rw-r--r--arch/powerpc/include/asm/processor.h16
-rw-r--r--arch/powerpc/include/asm/reg.h7
-rw-r--r--arch/powerpc/include/asm/security_features.h74
-rw-r--r--arch/powerpc/include/asm/setup.h3
-rw-r--r--arch/powerpc/include/asm/slice.h40
-rw-r--r--arch/powerpc/include/asm/smp.h5
-rw-r--r--arch/powerpc/include/asm/sparsemem.h2
-rw-r--r--arch/powerpc/include/asm/spinlock.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/include/asm/synch.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h1
-rw-r--r--arch/powerpc/include/asm/time.h4
-rw-r--r--arch/powerpc/include/asm/uaccess.h10
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cputable.c59
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c36
-rw-r--r--arch/powerpc/kernel/eeh.c19
-rw-r--r--arch/powerpc/kernel/eeh_cache.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c205
-rw-r--r--arch/powerpc/kernel/eeh_event.c6
-rw-r--r--arch/powerpc/kernel/entry_64.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S86
-rw-r--r--arch/powerpc/kernel/head_64.S19
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S50
-rw-r--r--arch/powerpc/kernel/iomap.c40
-rw-r--r--arch/powerpc/kernel/kprobes.c30
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c37
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c2
-rw-r--r--arch/powerpc/kernel/misc_64.S38
-rw-r--r--arch/powerpc/kernel/nvram_64.c9
-rw-r--r--arch/powerpc/kernel/paca.c242
-rw-r--r--arch/powerpc/kernel/process.c26
-rw-r--r--arch/powerpc/kernel/prom.c19
-rw-r--r--arch/powerpc/kernel/prom_init.c29
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh2
-rw-r--r--arch/powerpc/kernel/ptrace.c16
-rw-r--r--arch/powerpc/kernel/security.c88
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup.h9
-rw-r--r--arch/powerpc/kernel/setup_32.c8
-rw-r--r--arch/powerpc/kernel/setup_64.c113
-rw-r--r--arch/powerpc/kernel/signal.h5
-rw-r--r--arch/powerpc/kernel/signal_32.c4
-rw-r--r--arch/powerpc/kernel/smp.c23
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kernel/traps.c31
-rw-r--r--arch/powerpc/kernel/vdso.c12
-rw-r--r--arch/powerpc/kvm/Makefile7
-rw-r--r--arch/powerpc/kvm/book3s_hv.c55
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S187
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c216
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c109
-rw-r--r--arch/powerpc/kvm/emulate.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/lib/Makefile6
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copypage_power7.S3
-rw-r--r--arch/powerpc/lib/copyuser_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_power7.S3
-rw-r--r--arch/powerpc/lib/feature-fixups.c9
-rw-r--r--arch/powerpc/lib/memcpy_64.S2
-rw-r--r--arch/powerpc/lib/memcpy_power7.S3
-rw-r--r--arch/powerpc/lib/sstep.c4
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c28
-rw-r--r--arch/powerpc/mm/hash_native_64.c15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c34
-rw-r--r--arch/powerpc/mm/hugetlbpage.c26
-rw-r--r--arch/powerpc/mm/init_32.c7
-rw-r--r--arch/powerpc/mm/init_64.c8
-rw-r--r--arch/powerpc/mm/mem.c25
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c24
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c15
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/numa.c36
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c8
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c6
-rw-r--r--arch/powerpc/mm/pgtable-radix.c218
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c5
-rw-r--r--arch/powerpc/mm/pkeys.c17
-rw-r--r--arch/powerpc/mm/slb.c108
-rw-r--r--arch/powerpc/mm/slb_low.S19
-rw-r--r--arch/powerpc/mm/slice.c485
-rw-r--r--arch/powerpc/mm/tlb-radix.c14
-rw-r--r--arch/powerpc/mm/tlb_hash64.c2
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c2
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c4
-rw-r--r--arch/powerpc/perf/Makefile2
-rw-r--r--arch/powerpc/perf/core-book3s.c50
-rw-r--r--arch/powerpc/perf/power4-pmu.c622
-rw-r--r--arch/powerpc/perf/power9-events-list.h28
-rw-r--r--arch/powerpc/perf/power9-pmu.c48
-rw-r--r--arch/powerpc/platforms/4xx/msi.c5
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c2
-rw-r--r--arch/powerpc/platforms/85xx/smp.c8
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c8
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype20
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c2
-rw-r--r--arch/powerpc/platforms/cell/smp.c4
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c23
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c2
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c9
-rw-r--r--arch/powerpc/platforms/powernv/idle.c88
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c261
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c32
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c10
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci-cxl.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.c135
-rw-r--r--arch/powerpc/platforms/powernv/setup.c114
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c19
-rw-r--r--arch/powerpc/platforms/powernv/vas-trace.h113
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c9
-rw-r--r--arch/powerpc/platforms/powernv/vas.c6
-rw-r--r--arch/powerpc/platforms/ps3/mm.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c7
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c3
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c85
-rw-r--r--arch/powerpc/platforms/pseries/smp.c6
-rw-r--r--arch/powerpc/sysdev/mpic.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/xmon/xmon.c56
-rw-r--r--drivers/macintosh/adb-iop.c14
-rw-r--r--drivers/macintosh/ans-lcd.c1
-rw-r--r--drivers/macintosh/macio-adb.c15
-rw-r--r--drivers/macintosh/rack-meter.c6
-rw-r--r--drivers/macintosh/via-macii.c14
-rw-r--r--drivers/macintosh/via-pmu.c16
-rw-r--r--drivers/macintosh/via-pmu68k.c14
-rw-r--r--drivers/misc/cxl/cxl.h6
-rw-r--r--drivers/misc/cxl/cxllib.c87
-rw-r--r--drivers/misc/cxl/native.c11
-rw-r--r--drivers/misc/cxl/pci.c102
-rw-r--r--drivers/misc/cxl/sysfs.c12
-rw-r--r--drivers/pci/hotplug/pnv_php.c2
-rw-r--r--include/linux/memblock.h3
-rw-r--r--include/linux/raid/pq.h4
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile27
-rw-r--r--lib/raid6/algos.c4
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/test/Makefile22
-rw-r--r--lib/raid6/vpermxor.uc105
-rw-r--r--mm/memblock.c2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c13
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/fork.c325
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c92
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c24
275 files changed, 4643 insertions, 2428 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ccd2556..95813df 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -141,11 +141,18 @@ AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
endif
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
+
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
+CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
ifeq ($(CONFIG_PPC_BOOK3S_64),y)
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
-CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
+else
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
+endif
else
CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
endif
@@ -166,11 +173,11 @@ ifdef CONFIG_MPROFILE_KERNEL
endif
CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
-CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
+CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9)
# Altivec option not allowed with e500mc64 in GCC.
ifeq ($(CONFIG_ALTIVEC),y)
@@ -243,6 +250,7 @@ endif
cpu-as-$(CONFIG_4xx) += -Wa,-m405
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 8626615..deb52e4 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -219,6 +219,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts
index 0839847..7f5ff41 100644
--- a/arch/powerpc/boot/dts/adder875-redboot.dts
+++ b/arch/powerpc/boot/dts/adder875-redboot.dts
@@ -178,6 +178,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts
index e4554ca..bd9f33c 100644
--- a/arch/powerpc/boot/dts/adder875-uboot.dts
+++ b/arch/powerpc/boot/dts/adder875-uboot.dts
@@ -177,6 +177,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index 7467792..8a7a101 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -410,6 +410,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
index 49ac36b..7124301 100644
--- a/arch/powerpc/boot/dts/amigaone.dts
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -168,6 +168,6 @@
};
chosen {
- linux,stdout-path = "/pci@80000000/isa@7/serial@3f8";
+ stdout-path = "/pci@80000000/isa@7/serial@3f8";
};
};
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
index 9198745..e987b5a 100644
--- a/arch/powerpc/boot/dts/asp834x-redboot.dts
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -304,7 +304,7 @@
chosen {
bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts
index aa68911..538e42b 100644
--- a/arch/powerpc/boot/dts/bamboo.dts
+++ b/arch/powerpc/boot/dts/bamboo.dts
@@ -295,6 +295,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
index 27f169e..c5beb72 100644
--- a/arch/powerpc/boot/dts/c2k.dts
+++ b/arch/powerpc/boot/dts/c2k.dts
@@ -361,6 +361,6 @@
};
};
chosen {
- linux,stdout-path = &MPSC0;
+ stdout-path = &MPSC0;
};
};
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index f2ad581..a04a4fc 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -237,6 +237,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index c280e75..c3922fc 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -78,7 +78,7 @@
};
rtc@56 {
- compatible = "mc,rv3029c2";
+ compatible = "microcrystal,rv3029";
reg = <0x56>;
};
diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts
index ec2d142..5d11e6e 100644
--- a/arch/powerpc/boot/dts/ebony.dts
+++ b/arch/powerpc/boot/dts/ebony.dts
@@ -332,6 +332,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
index 48bcf71..7a1231d 100644
--- a/arch/powerpc/boot/dts/eiger.dts
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -421,7 +421,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts
index 53ef06c..4ac9c5a 100644
--- a/arch/powerpc/boot/dts/ep405.dts
+++ b/arch/powerpc/boot/dts/ep405.dts
@@ -225,6 +225,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts
index e2d306a..721cb53 100644
--- a/arch/powerpc/boot/dts/fsl/mvme7100.dts
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -146,7 +146,7 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index 6560283..9311b86 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -607,7 +607,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@b0020000";
+ stdout-path = "/plb/opb/serial@b0020000";
bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
};
};
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index 43e6f0c..02bd304 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -191,6 +191,6 @@
};
chosen {
- linux,stdout-path = "/tsi109@c0000000/serial@7808";
+ stdout-path = "/tsi109@c0000000/serial@7808";
};
};
diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts
index 71d3bb4..b93bf2d 100644
--- a/arch/powerpc/boot/dts/hotfoot.dts
+++ b/arch/powerpc/boot/dts/hotfoot.dts
@@ -291,6 +291,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
index 9c94fd7..2e6e3a7 100644
--- a/arch/powerpc/boot/dts/icon.dts
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -442,6 +442,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
index 23e9d9b..f706319 100644
--- a/arch/powerpc/boot/dts/iss4xx-mpic.dts
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -150,6 +150,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts
index 4ff6555..5533aff 100644
--- a/arch/powerpc/boot/dts/iss4xx.dts
+++ b/arch/powerpc/boot/dts/iss4xx.dts
@@ -111,6 +111,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index f913dbe..02629e1 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -505,6 +505,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts
index 8c94290..d9613b7 100644
--- a/arch/powerpc/boot/dts/klondike.dts
+++ b/arch/powerpc/boot/dts/klondike.dts
@@ -222,6 +222,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@50001000";
+ stdout-path = "/plb/opb/serial@50001000";
};
};
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index 5d68236..fe6c17c 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -339,6 +339,6 @@
chosen {
- linux,stdout-path = "/soc/cpm/serial@91a00";
+ stdout-path = "/soc/cpm/serial@91a00";
};
};
diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
index b5413cb..843f156 100644
--- a/arch/powerpc/boot/dts/media5200.dts
+++ b/arch/powerpc/boot/dts/media5200.dts
@@ -25,7 +25,7 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
cpus {
diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts
index 6d2cddf..98282c1 100644
--- a/arch/powerpc/boot/dts/mpc8272ads.dts
+++ b/arch/powerpc/boot/dts/mpc8272ads.dts
@@ -262,6 +262,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
+ stdout-path = "/soc/cpm/serial@11a00";
};
};
diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts
index 34c1f48..4443fac 100644
--- a/arch/powerpc/boot/dts/mpc866ads.dts
+++ b/arch/powerpc/boot/dts/mpc866ads.dts
@@ -185,6 +185,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 4e93bd9..5b037f5 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -227,6 +227,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts
index 1ecb341..a7eb6d2 100644
--- a/arch/powerpc/boot/dts/mvme5100.dts
+++ b/arch/powerpc/boot/dts/mvme5100.dts
@@ -179,7 +179,7 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts
index 18e7d79..d10b041 100644
--- a/arch/powerpc/boot/dts/obs600.dts
+++ b/arch/powerpc/boot/dts/obs600.dts
@@ -309,6 +309,6 @@
};
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts
index 0c525ff..a477615 100644
--- a/arch/powerpc/boot/dts/pq2fads.dts
+++ b/arch/powerpc/boot/dts/pq2fads.dts
@@ -242,6 +242,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
+ stdout-path = "/soc/cpm/serial@11a00";
};
};
diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts
index 9684c80..e59829c 100644
--- a/arch/powerpc/boot/dts/rainier.dts
+++ b/arch/powerpc/boot/dts/rainier.dts
@@ -344,7 +344,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index d86a3a4..f3e046f 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -381,7 +381,7 @@
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts
index 088361c..7d15f18 100644
--- a/arch/powerpc/boot/dts/sam440ep.dts
+++ b/arch/powerpc/boot/dts/sam440ep.dts
@@ -288,6 +288,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index e41b88a..60d211d 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -406,7 +406,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts
index 2a55573..99f6f54 100644
--- a/arch/powerpc/boot/dts/storcenter.dts
+++ b/arch/powerpc/boot/dts/storcenter.dts
@@ -137,6 +137,6 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts
index 1657ad0..803f1bf 100644
--- a/arch/powerpc/boot/dts/taishan.dts
+++ b/arch/powerpc/boot/dts/taishan.dts
@@ -422,6 +422,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000300";
+ stdout-path = "/plb/opb/serial@40000300";
};
};
diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts
index 391a4e2..66f1c63 100644
--- a/arch/powerpc/boot/dts/virtex440-ml507.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml507.dts
@@ -32,7 +32,7 @@
} ;
chosen {
bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = &RS232_Uart_1;
+ stdout-path = &RS232_Uart_1;
} ;
cpus {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts
index 81201d3..3b736ca 100644
--- a/arch/powerpc/boot/dts/virtex440-ml510.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml510.dts
@@ -26,7 +26,7 @@
} ;
chosen {
bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = "/plb@0/serial@83e00000";
+ stdout-path = "/plb@0/serial@83e00000";
} ;
cpus {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts
index 4a9f726..0872862 100644
--- a/arch/powerpc/boot/dts/walnut.dts
+++ b/arch/powerpc/boot/dts/walnut.dts
@@ -241,6 +241,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index ea9053e..b4f3274 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -304,6 +304,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 17a5bab..104b1d6 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -13,6 +13,7 @@
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
/*
* This is commented-out for now.
@@ -176,6 +177,15 @@
compatible = "nintendo,hollywood-gpio";
reg = <0x0d8000c0 0x40>;
gpio-controller;
+ ngpios = <24>;
+
+ gpio-line-names =
+ "POWER", "SHUTDOWN", "FAN", "DC_DC",
+ "DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+ "SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+ "EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+ "DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+ "DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
/*
* This is commented out while a standard binding
@@ -214,5 +224,16 @@
interrupts = <2>;
};
};
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ /* This is the blue LED in the disk drive slot */
+ drive-slot {
+ label = "wii:blue:drive_slot";
+ gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+ panic-indicator;
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
index 646acfb..d5e1442 100644
--- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts
+++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
@@ -503,6 +503,6 @@
/* Needed for dtbImage boot wrapper compatibility */
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
index 30bb475..5650878 100644
--- a/arch/powerpc/boot/dts/yosemite.dts
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -327,6 +327,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index f52c31b..2a0c8b1 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -7,8 +7,6 @@
#include "of.h"
-typedef u32 uint32_t;
-typedef u64 uint64_t;
typedef unsigned long uintptr_t;
typedef __be16 fdt16_t;
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 7330150..d9713ad 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs);
void single_step_exception(struct pt_regs *regs);
void program_check_exception(struct pt_regs *regs);
void alignment_exception(struct pt_regs *regs);
+void slb_miss_bad_addr(struct pt_regs *regs);
void StackOverflow(struct pt_regs *regs);
void nonrecoverable_exception(struct pt_regs *regs);
void kernel_fp_unavailable_exception(struct pt_regs *regs);
@@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
+int sys_debug_setcontext(struct ucontext __user *ctx,
+ int ndbg, struct sig_dbg_op __user *dbg,
+ int r6, int r7, int r8,
+ struct pt_regs *regs);
+int
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
#endif
+
+long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low);
long sys_switch_endian(void);
notrace unsigned int __check_irq_replay(void);
void notrace restore_interrupts(void);
@@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64);
void _mcount(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 10daa1d..c7c6395 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,7 +35,8 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#ifdef __SUBARCH_HAS_LWSYNC
+/* The sub-arch has lwsync */
+#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
# define SMPWMB LWSYNC
#else
# define SMPWMB eieio
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 67c5475..4b54230 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -11,6 +11,12 @@
#define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 9
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 46
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
@@ -34,6 +40,14 @@
#define H_PAGE_COMBO 0x0
#define H_PTE_FRAG_NR 0
#define H_PTE_FRAG_SIZE_SHIFT 0
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT0 0
+#define H_PTE_PKEY_BIT1 0
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+
/*
* On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 3bcf269..cc82745 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,10 +4,16 @@
#define H_PTE_INDEX_SIZE 8
#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 7
+#define H_PUD_INDEX_SIZE 10
#define H_PGD_INDEX_SIZE 8
/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 49
+
+/*
* 64k aligned address free up few of the lower bits of RPN for us
* We steal that here. For more deatils look at pte_pfn/pfn_pte()
*/
@@ -16,6 +22,13 @@
#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */
#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
+/* memory key bits. */
+#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
@@ -25,15 +38,13 @@
/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
/*
- * we support 16 fragments per PTE page of 64K size.
- */
-#define H_PTE_FRAG_NR 16
-/*
* We use a 2K PTE page fragment and another 2K for storing
* real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
*/
-#define H_PTE_FRAG_SIZE_SHIFT 12
-#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+#define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
#ifndef __ASSEMBLY__
#include <asm/errno.h>
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 935adcd..cc8cd65 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -212,7 +212,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
extern void hash__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
-int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid);
int hash__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 37671feb..5094696 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -80,8 +80,29 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
typedef struct {
- mm_context_id_t id;
+ union {
+ /*
+ * We use id as the PIDR content for radix. On hash we can use
+ * more than one id. The extended ids are used when we start
+ * having address above 512TB. We allocate one extended id
+ * for each 512TB. The new id is then used with the 49 bit
+ * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+ * from EA and new context ids to build the new VAs.
+ */
+ mm_context_id_t id;
+ mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+ };
u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
@@ -94,9 +115,18 @@ typedef struct {
struct npu_context *npu_context;
#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+# endif
+ struct slice_mask mask_4k;
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+# endif
#else
u16 sllp; /* SLB page size encoding */
#endif
@@ -177,5 +207,25 @@ extern void radix_init_pseries(void);
static inline void radix_init_pseries(void) { };
#endif
+static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+{
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+ return ctx->extended_id[index];
+
+ /* should never happen */
+ WARN_ON(1);
+ return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+ unsigned long ea, int ssize)
+{
+ unsigned long context = get_ea_context(ctx, ea);
+
+ return get_vsid(context, ea, ssize);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 4746bc6..558a159 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -80,8 +80,18 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
+ /*
+ * With hugetlb, we don't clear the second half of the page table.
+ * If we share the same slab cache with the pmd or pud level table,
+ * we need to make sure we zero out the full table on alloc.
+ * With 4K we don't store slot in the second half. Hence we don't
+ * need to do this for 4k.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+ ((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) || \
+ (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX))
memset(pgd, 0, PGD_TABLE_SIZE);
-
+#endif
return pgd;
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a6b9f1d..47b5ffc 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -60,25 +60,6 @@
/* Max physical address bit as per radix table */
#define _RPAGE_PA_MAX 57
-#ifdef CONFIG_PPC_MEM_KEYS
-#ifdef CONFIG_PPC_64K_PAGES
-#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
-#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
-#else /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */
-#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */
-#endif /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
-#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
-#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
-#else /* CONFIG_PPC_MEM_KEYS */
-#define H_PTE_PKEY_BIT0 0
-#define H_PTE_PKEY_BIT1 0
-#define H_PTE_PKEY_BIT2 0
-#define H_PTE_PKEY_BIT3 0
-#define H_PTE_PKEY_BIT4 0
-#endif /* CONFIG_PPC_MEM_KEYS */
-
/*
* Max physical address bit we will use for now.
*
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index a61aa9c..ca366ec 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -9,5 +9,10 @@
#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
#define RADIX_PUD_INDEX_SIZE 9
#define RADIX_PGD_INDEX_SIZE 13
+/*
+ * One fragment per per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index c7e71ba..8300824 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -10,4 +10,10 @@
#define RADIX_PUD_INDEX_SIZE 9
#define RADIX_PGD_INDEX_SIZE 13
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 365010f..705193e 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -313,7 +313,7 @@ static inline unsigned long radix__get_tree_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int radix__create_section_mapping(unsigned long start, unsigned long end);
+int radix__create_section_mapping(unsigned long start, unsigned long end, int nid);
int radix__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644
index 0000000..db0deda
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT 28
+#define SLICE_LOW_TOP (0x100000000ul)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT 40
+#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+
+#else /* CONFIG_PPC_MM_SLICES */
+
+#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
+#define slice_set_user_psize(mm, psize) \
+do { \
+ (mm)->context.user_psize = (psize); \
+ (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
+} while (0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index b77f036..11843e3 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start,
#ifdef CONFIG_PPC64
extern void flush_dcache_range(unsigned long start, unsigned long stop);
extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
#endif
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 2e2bacb..931dda8 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { }
/* CPU kernel features */
-/* Retain the 32b definitions all use bottom half of word */
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001)
-#define CPU_FTR_L2CR ASM_CONST(0x00000002)
-#define CPU_FTR_SPEC7450 ASM_CONST(0x00000004)
-#define CPU_FTR_ALTIVEC ASM_CONST(0x00000008)
-#define CPU_FTR_TAU ASM_CONST(0x00000010)
-#define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020)
-#define CPU_FTR_USE_TB ASM_CONST(0x00000040)
-#define CPU_FTR_L2CSR ASM_CONST(0x00000080)
-#define CPU_FTR_601 ASM_CONST(0x00000100)
-#define CPU_FTR_DBELL ASM_CONST(0x00000200)
-#define CPU_FTR_CAN_NAP ASM_CONST(0x00000400)
-#define CPU_FTR_L3CR ASM_CONST(0x00000800)
-#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000)
-#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000)
-#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000)
-#define CPU_FTR_NO_DPM ASM_CONST(0x00008000)
-#define CPU_FTR_476_DD2 ASM_CONST(0x00010000)
-#define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000)
-#define CPU_FTR_NO_BTIC ASM_CONST(0x00040000)
-#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000)
-#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000)
-#define CPU_FTR_PPC_LE ASM_CONST(0x00200000)
-#define CPU_FTR_REAL_LE ASM_CONST(0x00400000)
-#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000)
-#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000)
-#define CPU_FTR_SPE ASM_CONST(0x02000000)
-#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000)
-#define CPU_FTR_LWSYNC ASM_CONST(0x08000000)
-#define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000)
-#define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000)
-#define CPU_FTR_EMB_HV ASM_CONST(0x40000000)
+#define CPU_FTR_ALTIVEC ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010)
+#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020)
+#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_601 ASM_CONST(0x00001000)
+#define CPU_FTR_L2CR ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
+#define CPU_FTR_TAU ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
+#define CPU_FTR_USE_RTC ASM_CONST(0x00020000)
+#define CPU_FTR_L3CR ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2 ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
+#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000)
+#define CPU_FTR_SPE ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
+
+#else /* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_601 (0)
+#define CPU_FTR_PPC_LE (0)
+#endif
/*
- * Add the 64-bit processor unique features in the top half of the word;
+ * Definitions for the 64-bit processor unique features;
* on 32-bit, make the names available but defined to be 0.
*/
#ifdef __powerpc64__
@@ -174,38 +181,40 @@ static inline void cpu_feature_keys_init(void) { }
#define LONG_ASM_CONST(x) 0
#endif
-#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000)
-#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000)
-#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000)
-#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000)
-#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000)
-#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000)
-#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000)
-#define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000)
-#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000)
-#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000)
-#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
-#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000)
-#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
-#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
-#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
-#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000)
-#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
-#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
-#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000)
-#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
-#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000)
+#define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000)
+#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000)
+#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000)
+#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000)
+#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
#ifndef __ASSEMBLY__
@@ -286,21 +295,19 @@ static inline void cpu_feature_keys_init(void) { }
#endif
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
-#define CPU_FTRS_603 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
-#define CPU_FTRS_604 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
+#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_740 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750CL (CPU_FTRS_750)
@@ -309,125 +316,114 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
#define CPU_FTRS_750GX (CPU_FTRS_750FX)
#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
-#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE)
#define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
+ CPU_FTR_MAYBE_CAN_NAP)
#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON)
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
-#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON)
+#define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
CPU_FTR_INDEXED_DCR)
#define CPU_FTRS_47X (CPU_FTRS_440x6)
-#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
+#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
CPU_FTR_DEBUG_LVL_EXC)
-#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
/*
* e5500/e6500 erratum A-006958 is a timebase bug that can use the
* same workaround as CPU_FTR_CELL_TB_BUG.
*/
-#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
-#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
-#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
- CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
-#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
CPU_FTR_HVMODE | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -435,7 +431,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
CPU_FTR_DABRX)
-#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -444,7 +440,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
-#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -456,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
-#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -464,33 +460,45 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
- CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
- CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG)
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+ CPU_FTR_P9_TLBIE_BUG)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
-#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+ CPU_FTR_P9_TM_HV_ASSIST | \
+ CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
-#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
-#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2)
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500)
#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
#define CPU_FTRS_POSSIBLE \
- (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+ CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+ CPU_FTRS_POWER9_DD2_2)
+#else
+#define CPU_FTRS_POSSIBLE \
+ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
- CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
+ CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+ CPU_FTRS_POWER9_DD2_2)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
@@ -537,12 +545,19 @@ enum {
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500)
#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+ (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
+ CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#else
#define CPU_FTRS_ALWAYS \
- (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+ (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
- CPU_FTRS_POWER9)
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index fc97404..ce5da21 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
void set_breakpoint(struct arch_hw_breakpoint *brk);
void __set_breakpoint(struct arch_hw_breakpoint *brk);
+bool ppc_breakpoint_available(void);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
extern void do_send_trap(struct pt_regs *regs, unsigned long address,
unsigned long error_code, int brkpt);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fd37cc1..c2266ca 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
+static inline bool eeh_state_active(int state)
+{
+ return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+ == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
+
typedef void *(*eeh_traverse_func)(void *data, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 1e551a2..9884e87 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -34,7 +34,8 @@ struct eeh_event {
int eeh_event_init(void);
int eeh_send_failure_event(struct eeh_pe *pe);
void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
#endif /* __KERNEL__ */
#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 9086324..d3a7e36 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
- unsigned long register r0 asm("r0");
- unsigned long register r3 asm("r3") = in[0];
- unsigned long register r4 asm("r4") = in[1];
- unsigned long register r5 asm("r5") = in[2];
- unsigned long register r6 asm("r6") = in[3];
- unsigned long register r7 asm("r7") = in[4];
- unsigned long register r8 asm("r8") = in[5];
- unsigned long register r9 asm("r9") = in[6];
- unsigned long register r10 asm("r10") = in[7];
- unsigned long register r11 asm("r11") = nr;
- unsigned long register r12 asm("r12");
+ register unsigned long r0 asm("r0");
+ register unsigned long r3 asm("r3") = in[0];
+ register unsigned long r4 asm("r4") = in[1];
+ register unsigned long r5 asm("r5") = in[2];
+ register unsigned long r6 asm("r6") = in[3];
+ register unsigned long r7 asm("r7") = in[4];
+ register unsigned long r8 asm("r8") = in[5];
+ register unsigned long r9 asm("r9") = in[6];
+ register unsigned long r10 asm("r10") = in[7];
+ register unsigned long r11 asm("r11") = nr;
+ register unsigned long r12 asm("r12");
asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 6f6751d..78540c0 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
void flush_dcache_icache_hugepage(struct page *page);
-#if defined(CONFIG_PPC_MM_SLICES)
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len);
-#else
+
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
unsigned long len)
{
+ if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+ return slice_is_hugepage_only_range(mm, addr, len);
return 0;
}
-#endif
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index eca3f9c..2e2ddda 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -88,6 +88,7 @@
#define H_P8 -61
#define H_P9 -62
#define H_TOO_BIG -64
+#define H_UNSUPPORTED -67
#define H_OVERLAP -68
#define H_INTERRUPT -69
#define H_BAD_DATA -70
@@ -337,6 +338,9 @@
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ac6432d..8e7b097 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void arch_unregister_hw_breakpoint(struct perf_event *bp);
void hw_breakpoint_pmu_read(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
@@ -79,9 +80,11 @@ static inline void hw_breakpoint_disable(void)
brk.address = 0;
brk.type = 0;
brk.len = 0;
- __set_breakpoint(&brk);
+ if (ppc_breakpoint_available())
+ __set_breakpoint(&brk);
}
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 422f99c..af07492 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -33,8 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev;
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
-#include <asm-generic/iomap.h>
-
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
@@ -663,6 +661,8 @@ static inline void name at \
#define writel_relaxed(v, addr) writel(v, addr)
#define writeq_relaxed(v, addr) writeq(v, addr)
+#include <asm-generic/iomap.h>
+
#ifdef CONFIG_PPC32
#define mmiowb()
#else
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e8e3a0a..ee39ce5 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -66,6 +66,7 @@ extern void irq_ctx_init(void);
extern void call_do_softirq(struct thread_info *tp);
extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
extern void do_IRQ(struct pt_regs *regs);
+extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
int irq_choose_cpu(const struct cpumask *mask);
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index c6d3078..b8b0be8 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return true;
}
+extern void arch_irq_work_raise(void);
#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 09a802b..a790d5c 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -108,6 +108,8 @@
/* book3s_hv */
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500
+
/*
* Special trap used to indicate to host that this is a
* passthrough interrupt that could not be handled
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 376ae80..4c02a73 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 998f7b7..c424e44 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
set_bit_le(i, map);
}
+static inline u64 sanitize_msr(u64 msr)
+{
+ msr &= ~MSR_HV;
+ msr |= MSR_ME;
+ return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cr = vcpu->arch.cr_tm;
+ vcpu->arch.xer = vcpu->arch.xer_tm;
+ vcpu->arch.lr = vcpu->arch.lr_tm;
+ vcpu->arch.ctr = vcpu->arch.ctr_tm;
+ vcpu->arch.amr = vcpu->arch.amr_tm;
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+ vcpu->arch.tar = vcpu->arch.tar_tm;
+ memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
+ sizeof(vcpu->arch.gpr));
+ vcpu->arch.fp = vcpu->arch.fp_tm;
+ vcpu->arch.vr = vcpu->arch.vr_tm;
+ vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cr_tm = vcpu->arch.cr;
+ vcpu->arch.xer_tm = vcpu->arch.xer;
+ vcpu->arch.lr_tm = vcpu->arch.lr;
+ vcpu->arch.ctr_tm = vcpu->arch.ctr;
+ vcpu->arch.amr_tm = vcpu->arch.amr;
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+ vcpu->arch.tar_tm = vcpu->arch.tar;
+ memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
+ sizeof(vcpu->arch.gpr));
+ vcpu->arch.fp_tm = vcpu->arch.fp;
+ vcpu->arch.vr_tm = vcpu->arch.vr;
+ vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index ab386af..d978fdf 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -119,6 +119,7 @@ struct kvmppc_host_state {
u8 host_ipi;
u8 ptid; /* thread number within subcore when split */
u8 tid; /* thread number within whole core */
+ u8 fake_suspend;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
void __iomem *xics_phys;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1f53b56..deb5429 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -610,6 +610,7 @@ struct kvm_vcpu_arch {
u64 tfhar;
u64 texasr;
u64 tfiar;
+ u64 orig_texasr;
u32 cr_tm;
u64 xer_tm;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7765a80..b7d066b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -436,15 +436,15 @@ struct openpic;
extern void kvm_cma_reserve(void) __init;
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
- paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
+ paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
}
static inline void kvmppc_set_xive_tima(int cpu,
unsigned long phys_addr,
void __iomem *virt_addr)
{
- paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
- paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
}
static inline u32 kvmppc_get_xics_latch(void)
@@ -458,7 +458,7 @@ static inline u32 kvmppc_get_xics_latch(void)
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{
- paca[cpu].kvm_hstate.host_ipi = host_ipi;
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index d0a2a2f..7c23ce8 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -34,16 +34,19 @@
#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+#include <asm/firmware.h>
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS 1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
*/
struct lppaca {
/* cacheline 1 contains read-only data */
@@ -97,13 +100,11 @@ struct lppaca {
__be32 page_ins; /* CMO Hint - # page ins by OS */
u8 reserved11[148];
- volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
+ volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
u8 reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+} ____cacheline_aligned;
-#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr)
+#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
/*
* We are using a non architected field to determine if a partition is
@@ -114,6 +115,8 @@ extern struct lppaca lppaca[];
static inline bool lppaca_shared_proc(struct lppaca *l)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
}
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 2f806e3..4f54775 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -186,11 +186,32 @@
#define M_APG2 0x00000040
#define M_APG3 0x00000060
+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/nohash/32/slice.h>
+#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
#ifndef __ASSEMBLY__
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, 0);
+};
+
typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+ u16 user_psize; /* page size index */
+ unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned char high_slices_psize[0];
+ unsigned long slb_addr_limit;
+ struct slice_mask mask_base_psize; /* 4k or 16k */
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_512k;
+ struct slice_mask mask_8m;
+# endif
+#endif
} mm_context_t;
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb38312..61d15ce 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -111,9 +111,9 @@
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
-#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
-#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
-#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
#define MMU_FTRS_POWER7 MMU_FTRS_POWER6
#define MMU_FTRS_POWER8 MMU_FTRS_POWER6
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 3a15b6d..1835ca1 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ int context_id;
+
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ context_id = hash__alloc_context_id();
+ if (context_id < 0)
+ return context_id;
+
+ VM_WARN_ON(mm->context.extended_id[index]);
+ mm->context.extended_id[index] = context_id;
+ return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ int context_id;
+
+ context_id = get_ea_context(&mm->context, ea);
+ if (!context_id)
+ return true;
+ return false;
+}
+
#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ /* non book3s_64 should never find this called */
+ WARN_ON(1);
+ return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ return false;
+}
#endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
new file mode 100644
index 0000000..777d62e
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H
+#define _ASM_POWERPC_NOHASH_32_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT 26 /* 64 slices */
+#define SLICE_LOW_TOP (0x100000000ull)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT 0
+#define SLICE_NUM_HIGH 0ul
+#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
new file mode 100644
index 0000000..ad0d6e3
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/slice.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
+#define _ASM_POWERPC_NOHASH_64_SLICE_H
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr) MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr) MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize) do { BUG(); } while (0)
+
+#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 94bd1bf..d886a5b 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,7 +204,9 @@
#define OPAL_NPU_SPA_SETUP 159
#define OPAL_NPU_SPA_CLEAR_CACHE 160
#define OPAL_NPU_TL_SET 161
-#define OPAL_LAST 161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
+#define OPAL_LAST 165
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 12e70fb..7159e1a 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
@@ -323,7 +325,7 @@ struct rtc_time;
extern unsigned long opal_get_boot_time(void);
extern void opal_nvram_init(void);
extern void opal_flash_update_init(void);
-extern void opal_flash_term_callback(void);
+extern void opal_flash_update_print_message(void);
extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b62c310..4185f1c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -32,6 +32,7 @@
#include <asm/accounting.h>
#include <asm/hmi.h>
#include <asm/cpuidle.h>
+#include <asm/atomic.h>
register struct paca_struct *local_paca asm("r13");
@@ -46,7 +47,10 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
#define get_paca() local_paca
#endif
+#ifdef CONFIG_PPC_PSERIES
#define get_lppaca() (get_paca()->lppaca_ptr)
+#endif
+
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
@@ -58,7 +62,7 @@ struct task_struct;
* processor.
*/
struct paca_struct {
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_PSERIES
/*
* Because hw_cpu_id, unlike other paca fields, is accessed
* routinely from other CPUs (from the IRQ code), we stick to
@@ -67,7 +71,8 @@ struct paca_struct {
*/
struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
+
/*
* MAGIC: the spinlock functions in arch/powerpc/lib/locks.c
* load lock_token and paca_index with a single lwz
@@ -141,7 +146,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_BOOK3S
mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
- u64 mm_ctx_low_slices_psize;
+ unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long mm_ctx_slb_addr_limit;
#else
@@ -160,10 +165,14 @@ struct paca_struct {
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
+ u8 soft_enabled; /* irq soft-enable flag */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
u8 nap_state_lost; /* NV GPR values lost in power7_idle */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ u8 pmcregs_in_use; /* pseries puts this in lppaca */
+#endif
u64 sprg_vdso; /* Saved user-visible sprg */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u64 tm_scratch; /* TM scratch area for reclaim */
@@ -177,6 +186,8 @@ struct paca_struct {
u8 thread_mask;
/* Mask to denote subcore sibling threads */
u8 subcore_sibling_mask;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
/*
* Pointer to an array which contains pointer
* to the sibling threads' paca.
@@ -241,18 +252,20 @@ struct paca_struct {
void *rfi_flush_fallback_area;
u64 l1d_flush_size;
#endif
-};
+} ____cacheline_aligned;
extern void copy_mm_to_paca(struct mm_struct *mm);
-extern struct paca_struct *paca;
+extern struct paca_struct **paca_ptrs;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
-extern void allocate_pacas(void);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
extern void free_unused_pacas(void);
#else /* CONFIG_PPC64 */
-static inline void allocate_pacas(void) { };
+static inline void allocate_paca_ptrs(void) { };
+static inline void allocate_paca(int cpu) { };
static inline void free_unused_pacas(void) { };
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8da5d4c..dec9ce5 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -126,7 +126,15 @@ extern long long virt_phys_offset;
#ifdef CONFIG_FLATMEM
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
+#ifndef __ASSEMBLY__
+extern unsigned long max_mapnr;
+static inline bool pfn_valid(unsigned long pfn)
+{
+ unsigned long min_pfn = ARCH_PFN_OFFSET;
+
+ return pfn >= min_pfn && pfn < max_mapnr;
+}
+#endif
#endif
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
@@ -344,5 +352,6 @@ typedef struct page *pgtable_t;
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
+#include <asm/slice.h>
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 56234c6..af04acd 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -86,65 +86,6 @@ extern u64 ppc64_pft_size;
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT 28
-#define SLICE_HIGH_SHIFT 40
-
-#define SLICE_LOW_TOP (0x100000000ul)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-
-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
- unsigned long len,
- unsigned long flags,
- unsigned int psize,
- int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
- unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_BOOK3S_64
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* !CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#define slice_set_range_psize(mm, start, len, psize) \
- slice_set_user_psize((mm), (psize))
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
#define VM_DATA_DEFAULT_FLAGS \
(is_32bit_task() ? \
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 723bf48..67a8a95 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -53,6 +53,8 @@ struct power_pmu {
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+ int n_blacklist_ev;
+ int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
};
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 55eddf5..96c1a46 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
#define _ASM_POWERPC_PLPAR_WRAPPERS_H
+#ifdef CONFIG_PPC_PSERIES
+
#include <linux/string.h>
#include <linux/irqflags.h>
@@ -9,14 +11,6 @@
#include <asm/paca.h>
#include <asm/page.h>
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
static inline long poll_pending(void)
{
return plpar_hcall_norets(H_POLL_PENDING);
@@ -311,17 +305,17 @@ static inline long enable_little_endian_exceptions(void)
return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
}
-static inline long plapr_set_ciabr(unsigned long ciabr)
+static inline long plpar_set_ciabr(unsigned long ciabr)
{
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
}
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
{
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
}
-static inline long plapr_signal_sys_reset(long cpu)
+static inline long plpar_signal_sys_reset(long cpu)
{
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
}
@@ -340,4 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
return rc;
}
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index 5a9ede4..7ac3586 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -31,10 +31,21 @@ void ppc_enable_pmcs(void);
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/lppaca.h>
+#include <asm/firmware.h>
static inline void ppc_set_pmu_inuse(int inuse)
{
- get_lppaca()->pmcregs_in_use = inuse;
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+ get_lppaca()->pmcregs_in_use = inuse;
+#endif
+ } else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ get_paca()->pmcregs_in_use = inuse;
+#endif
+ }
+#endif
}
extern void power4_enable_pmcs(void);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf25..d2d8c28 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
u64 desc);
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
unsigned int virq);
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index dc5f6a5..d1c2d2e6 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
}
static inline void pnv_tm_init(void) { }
+static inline void pnv_power9_force_smt4(void) { }
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index f1083bc..18883b8 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -232,6 +232,7 @@
#define PPC_INST_MSGSYNC 0x7c0006ec
#define PPC_INST_MSGSNDP 0x7c00011c
#define PPC_INST_MSGCLRP 0x7c00015c
+#define PPC_INST_MTMSRD 0x7c000164
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
#define PPC_INST_PASTE 0x7c20070d
@@ -239,8 +240,10 @@
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
#define PPC_INST_POPCNTD 0x7c0003f4
#define PPC_INST_POPCNTW 0x7c0002f4
+#define PPC_INST_RFEBB 0x4c000124
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
+#define PPC_INST_RFID 0x4c000024
#define PPC_INST_RFMCI 0x4c00004c
#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
@@ -271,12 +274,14 @@
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_VPERMXOR 0x1000002d
#define PPC_INST_XXLOR 0xf0000490
#define PPC_INST_XXSWAPD 0xf0000250
#define PPC_INST_XVCPSGNDP 0xf0000780
#define PPC_INST_TRECHKPT 0x7c0007dd
#define PPC_INST_TRECLAIM 0x7c00075d
#define PPC_INST_TABORT 0x7c00071d
+#define PPC_INST_TSR 0x7c0005dd
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
@@ -517,6 +522,11 @@
#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
VSX_XX3((t), (a), (b))))
+#define VPERMXOR(vrt, vra, vrb, vrc) \
+ stringify_in_c(.long (PPC_INST_VPERMXOR | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ae94b36..13f7f4c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -439,14 +439,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
/* The following stops all load and store data streams associated with stream
* ID (ie. streams created explicitly). The embedded and server mnemonics for
- * dcbt are different so we use machine "power4" here explicitly.
+ * dcbt are different so this must only be used for server.
*/
-#define DCBT_STOP_ALL_STREAM_IDS(scratch) \
-.machine push ; \
-.machine "power4" ; \
- lis scratch,0x60000000@h; \
- dcbt 0,scratch,0b01010; \
-.machine pop
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch) \
+ lis scratch,0x60000000@h; \
+ dcbt 0,scratch,0b01010
/*
* toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 01299cdc..c4b36a4 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -109,6 +109,13 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_64TB (0x0000400000000000UL)
#define TASK_SIZE_128TB (0x0000800000000000UL)
#define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB (0x0004000000000000UL)
+#define TASK_SIZE_2PB (0x0008000000000000UL)
+/*
+ * With 52 bits in the address we can support
+ * upto 4PB of range.
+ */
+#define TASK_SIZE_4PB (0x0010000000000000UL)
/*
* For now 512TB is only supported with book3s and 64K linux page size.
@@ -117,11 +124,17 @@ void release_thread(struct task_struct *);
/*
* Max value currently used:
*/
-#define TASK_SIZE_USER64 TASK_SIZE_512TB
+#define TASK_SIZE_USER64 TASK_SIZE_4PB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
#else
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
#endif
/*
@@ -505,6 +518,7 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
extern void power7_idle_type(unsigned long type);
extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern unsigned long power9_offline_stop(unsigned long psscr_val);
extern void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e6c7ead..cb0f272 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -156,6 +156,8 @@
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -237,7 +239,12 @@
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
+#define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */
+#define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */
+#define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */
+#define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
+#define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 0000000..fa4d2e1
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern unsigned long powerpc_security_features;
+extern bool rfi_flush;
+
+static inline void security_ftr_set(unsigned long feature)
+{
+ powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(unsigned long feature)
+{
+ powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(unsigned long feature)
+{
+ return !!(powerpc_security_features & feature);
+}
+
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull
+
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull
+
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+ (SEC_FTR_L1D_FLUSH_HV | \
+ SEC_FTR_L1D_FLUSH_PR | \
+ SEC_FTR_BNDS_CHK_SPEC_BAR | \
+ SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 469b7fd..27fa52e 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,6 +23,7 @@ extern void reloc_got2(unsigned long);
#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
void check_for_initrd(void);
+void mem_topology_setup(void);
void initmem_init(void);
void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
@@ -49,7 +50,7 @@ enum l1d_flush_type {
L1D_FLUSH_MTTRIG = 0x8,
};
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
new file mode 100644
index 0000000..e40406cf5
--- /dev/null
+++ b/arch/powerpc/include/asm/slice.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_H
+#define _ASM_POWERPC_SLICE_H
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/slice.h>
+#elif defined(CONFIG_PPC64)
+#include <asm/nohash/64/slice.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/32/slice.h>
+#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index fac963e..cfecfee 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -31,6 +31,7 @@
extern int boot_cpuid;
extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
extern void cpu_die(void);
extern int cpu_to_chip_id(int cpu);
@@ -170,12 +171,12 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
#ifdef CONFIG_PPC64
static inline int get_hard_smp_processor_id(int cpu)
{
- return paca[cpu].hw_cpu_id;
+ return paca_ptrs[cpu]->hw_cpu_id;
}
static inline void set_hard_smp_processor_id(int cpu, int phys)
{
- paca[cpu].hw_cpu_id = phys;
+ paca_ptrs[cpu]->hw_cpu_id = phys;
}
#else
/* 32-bit */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index a7916ee..bc66712 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,7 +17,7 @@
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
+extern int create_section_mapping(unsigned long start, unsigned long end, int nid);
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index b9ebc30..72dc4dd 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,6 +56,8 @@
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
}
#endif
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index c3ca42c..be8c9fa 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void)
msr_check_and_clear(MSR_FP);
}
#else
-static inline void __giveup_fpu(struct task_struct *t) { }
static inline void save_fpu(struct task_struct *t) { }
static inline void flush_fp_to_thread(struct task_struct *t) { }
#endif
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 63e7f5a..6ec5460 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -6,10 +6,6 @@
#include <linux/stringify.h>
#include <asm/feature-fixups.h>
-#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
-#define __SUBARCH_HAS_LWSYNC
-#endif
-
#ifndef __ASSEMBLY__
extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 4a12c00..5964145db 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void)
return (struct thread_info *)val;
}
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index b240666..db546c0 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm);
extern void tick_broadcast_ipi_handler(void);
extern void generic_calibrate_decr(void);
+extern void hdec_interrupt(struct pt_regs *regs);
/* Some sane defaults: 125 MHz timebase, 1GHz processor */
extern unsigned long ppc_proc_freq;
@@ -46,7 +47,7 @@ struct div_result {
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
#ifdef CONFIG_6xx
-#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB))
+#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC))
#else
#define __USE_RTC() 0
#endif
@@ -204,6 +205,7 @@ struct cpu_usage {
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 51bfeb8..a62ee66 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -47,9 +47,13 @@
#else
-#define __access_ok(addr, size, segment) \
- (((addr) <= (segment).seg) && \
- (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr)))))
+static inline int __access_ok(unsigned long addr, unsigned long size,
+ mm_segment_t seg)
+{
+ if (addr > seg.seg)
+ return 0;
+ return (size == 0 || size - 1 <= seg.seg - addr);
+}
#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2358f97..2b4c40b2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ea5eb91..6bee65f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -221,12 +221,17 @@ int main(void)
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXSLB, paca_struct, exslb);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_PSERIES
OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+#endif
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
@@ -568,6 +573,7 @@ int main(void)
OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+ OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
@@ -650,6 +656,7 @@ int main(void)
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_TID, tid);
+ HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -759,6 +766,7 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+ OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
STOP_SPR(STOP_PID, pid);
STOP_SPR(STOP_LDBAR, ldbar);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index c5e5a94..a9f3970 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -226,7 +226,7 @@ BEGIN_FTR_SECTION
beq 1f
END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
lwz r6,CPU_SPEC_FEATURES(r4)
- andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
+ andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h
beq 1f
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 462aed9..8d142e5 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500)
* the feature on the primary core, avoid doing it on the
* secondary core.
*/
- andis. r6, r3, CPU_FTR_EMB_HV@h
+ andi. r6, r3, CPU_FTR_EMB_HV
beq 2f
rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
stw r3, CPU_SPEC_FEATURES(r4)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c40a9fc..c8fc969 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void);
static struct cpu_spec __initdata cpu_specs[] = {
#ifdef CONFIG_PPC_BOOK3S_64
- { /* Power4 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00350000,
- .cpu_name = "POWER4 (gp)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
- { /* Power4+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00380000,
- .cpu_name = "POWER4+ (gq)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
{ /* PPC970 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00390000,
@@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
- { /* Power9 DD 2.1 or later (see DD2.0 above) */
+ { /* Power9 DD 2.1 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0201,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.2 or later */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_features = CPU_FTRS_POWER9_DD2_2,
.cpu_user_features = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features = MMU_FTRS_POWER9,
@@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
- .cpu_name = "POWER4 (compatible)",
+ .cpu_name = "POWER5 (compatible)",
.cpu_features = CPU_FTRS_COMPATIBLE,
.cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+ .mmu_features = MMU_FTRS_POWER,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
- .platform = "power4",
+ .platform = "power5",
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 00b2151..17c8b99 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
if (i == cpu)
continue;
- while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
break;
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8ca5d5b7..e88fbb1 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -54,8 +54,7 @@ struct dt_cpu_feature {
};
#define CPU_FTRS_BASE \
- (CPU_FTR_USE_TB | \
- CPU_FTR_LWSYNC | \
+ (CPU_FTR_LWSYNC | \
CPU_FTR_FPU_UNAVAILABLE |\
CPU_FTR_NODSISRALIGN |\
CPU_FTR_NOEXECUTE |\
@@ -84,6 +83,7 @@ static int hv_mode;
static struct {
u64 lpcr;
+ u64 lpcr_clear;
u64 hfscr;
u64 fscr;
} system_registers;
@@ -92,6 +92,8 @@ static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
+ u64 lpcr;
+
/*
* LPCR is restored by the power on engine already. It can be changed
* after early init e.g., by radix enable, and we have no unified API
@@ -104,8 +106,10 @@ static void __restore_cpu_cpufeatures(void)
* The best we can do to accommodate secondary boot and idle restore
* for now is "or" LPCR with existing.
*/
-
- mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= system_registers.lpcr;
+ lpcr &= ~system_registers.lpcr_clear;
+ mtspr(SPRN_LPCR, lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
@@ -325,8 +329,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
lpcr = mfspr(SPRN_LPCR);
- lpcr &= ~LPCR_ISL;
+ lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
@@ -590,6 +595,8 @@ static struct dt_cpu_feature_match __initdata
{"virtual-page-class-key-protection", feat_enable, 0},
{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
{"transactional-memory-v3", feat_enable_tm, 0},
+ {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+ {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
{"idle-nap", feat_enable_idle_nap, 0},
{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
{"idle-stop", feat_enable_idle_stop, 0},
@@ -707,11 +714,28 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ ; /* DD2.0 has no feature flag */
else if ((version & 0xffffefff) == 0x004e0201)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ } else /* DD2.1 and up have DD2_1 */
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- if ((version & 0xffff0000) == 0x004e0000)
+ if ((version & 0xffff0000) == 0x004e0000) {
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+ }
+
+ /*
+ * PKEY was not in the initial base or feature node
+ * specification, but it should become optional in the next
+ * cpu feature version sequence.
+ */
+ cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 2b9df00..bc640e4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Check PHB state */
ret = eeh_ops->get_state(phb_pe, NULL);
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
ret = 0;
goto out;
}
@@ -433,7 +431,6 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
@@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* state, PE is in good state.
*/
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- ((ret & active_flags) == active_flags)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
@@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
- if (ret > 0 &&
- (ret & active_flags) != active_flags)
+ if (ret > 0 && !eeh_state_active(ret))
pe = parent_pe;
/* Next parent level */
@@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
*/
int eeh_pe_reset_full(struct eeh_pe *pe)
{
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
@@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
/* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((state & active_flags) == active_flags)
+ if (eeh_state_active(state))
break;
if (state < 0) {
@@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
struct pci_device_id *id;
- int flags, ret;
+ int ret;
/* Check PE state */
- flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
ret = eeh_ops->get_state(pe, NULL);
if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
return 0;
/* Unfrozen PE, nothing to do */
- if ((ret & flags) == flags)
+ if (eeh_state_active(ret))
return 0;
/* Frozen PE, check if it needs PE level reset */
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index d4cc266..201943d 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
- * that implements this address. Be sure to pci_dev_put the device
- * when finished. I/O port numbers are assumed to be offset
+ * that implements this address. I/O port numbers are assumed to be offset
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0c0b66f..b8a329f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
edev->in_error = true;
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+ device_lock(&dev->dev);
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
if (!driver->err_handler ||
!driver->err_handler->mmio_enabled ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (edev->mode & EEH_DEV_NO_HANDLER))
+ goto out;
rc = driver->err_handler->mmio_enabled(dev);
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_enable_irq(dev);
if (!driver->err_handler ||
!driver->err_handler->slot_reset ||
(edev->mode & EEH_DEV_NO_HANDLER) ||
- (!edev->in_error)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (!edev->in_error))
+ goto out;
rc = driver->err_handler->slot_reset(dev);
if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
was_in_error = edev->in_error;
edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
!driver->err_handler->resume ||
(edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
edev->mode &= ~EEH_DEV_NO_HANDLER;
- eeh_pcid_put(dev);
- return NULL;
+ goto out;
}
driver->err_handler->resume(dev);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+ eeh_pcid_put(dev);
#ifdef CONFIG_PCI_IOV
if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
#endif
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
/**
* eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
* @pe: EEH PE
* @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
- struct eeh_rmv_data *rmv_data)
+ struct eeh_rmv_data *rmv_data,
+ bool driver_eeh_aware)
{
- struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- } else {
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- }
- } else if (frozen_bus) {
+ if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+ } else {
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
}
/*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (bus) {
- pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ if (!driver_eeh_aware || rmv_data->removed) {
+ pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+ (driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
/*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ if (!driver_eeh_aware)
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_hp_add_devices(bus);
}
- } else if (frozen_bus && rmv_data->removed) {
- pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
- ssleep(5);
-
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- if (pe->type & EEH_PE_VF)
- eeh_add_virt_device(edev, NULL);
- else
- pci_hp_add_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
*
* Attempts to recover the given PE. If recovery fails or the PE has failed
* too many times, remove the PE.
*
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
*/
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
{
- struct pci_bus *frozen_bus;
+ struct pci_bus *bus;
struct eeh_dev *edev, *tmp;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
- frozen_bus = eeh_pe_bus_get(pe);
- if (!frozen_bus) {
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
- return false;
+ return;
}
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus, NULL);
+ rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL, &rmv_data);
+ rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return false;
+ goto final;
hard_fail:
/*
@@ -916,23 +936,21 @@ hard_fail:
* all removed devices correctly to avoid access
* the their PCI config any more.
*/
- if (frozen_bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
- pci_lock_rescan_remove();
- pci_hp_remove_devices(frozen_bus);
- pci_unlock_rescan_remove();
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- /* The passed PE should no longer be used */
- return true;
- }
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
- return false;
+final:
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
}
/**
@@ -942,7 +960,7 @@ hard_fail:
* specific PE. Iterates through possible failures and handles them as
* necessary.
*/
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- /*
- * eeh_handle_normal_event() can make the PE stale if it
- * determines that the PE cannot possibly be recovered.
- * Don't modify the PE state if that's the case.
- */
- if (eeh_handle_normal_event(pe))
- continue;
-
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
break;
} while (rc != EEH_NEXT_ERR_NONE);
}
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
- if (pe)
- eeh_handle_normal_event(pe);
- else
- eeh_handle_special_event();
-}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index accbf8b..61c9356 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy)
/* We might have event without binding PE */
pe = event->pe;
if (pe) {
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
if (pe->type & EEH_PE_PHB)
pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
@@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy)
pr_info("EEH: Detected PCI bus error on "
"PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
- eeh_handle_event(pe);
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
- eeh_handle_event(NULL);
+ eeh_handle_special_event();
}
kfree(event);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb5109..5169560 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -545,7 +545,7 @@ _GLOBAL(_switch)
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
*/
- DCBT_STOP_ALL_STREAM_IDS(r6)
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
#endif
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ecfd8f..ae6a849 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common)
b pnv_powersave_wakeup
#endif
+/*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+#define ADD_RECONCILE_NMI \
+ li r10,IRQS_ALL_DISABLED; \
+ stb r10,PACAIRQSOFTMASK(r13); \
+ lbz r10,PACAIRQHAPPENED(r13); \
+ std r10,_DAR(r1)
+
EXC_COMMON_BEGIN(system_reset_common)
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
@@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
system_reset, system_reset_exception,
- ADD_NVGPRS;ADD_RECONCILE)
+ ADD_NVGPRS;ADD_RECONCILE_NMI)
+
+ /* This (and MCE) can be simplified with mtmsrd L=1 */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r0,MSR_RI
+ mfmsr r9
+ andc r9,r9,r0
+ mtmsrd r9,1
/*
- * The stack is no longer in use, decrement in_nmi.
+ * MSR_RI is clear, now we can decrement paca->in_nmi.
*/
lhz r10,PACA_IN_NMI(r13)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- b ret_from_except
+ /*
+ * Restore soft mask settings.
+ */
+ ld r10,_DAR(r1)
+ stb r10,PACAIRQHAPPENED(r13)
+ ld r10,SOFTE(r1)
+ stb r10,PACAIRQSOFTMASK(r13)
+
+ /*
+ * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
+ * Should share common bits...
+ */
+
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_SRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_SRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+ RFI_TO_USER_OR_KERNEL
#ifdef CONFIG_PPC_PSERIES
/*
@@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq- 8f /* if bad address, make full stack frame */
+ /*
+ * Large address, check whether we have to allocate new contexts.
+ */
+ beq- 8f
bne- cr5,2f /* if unrecoverable exception, oops */
@@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bne cr4,1f /* returning to kernel */
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
RFI_TO_USER
b . /* prevent speculative execution */
1:
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mr r3,r12
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,bad_addr_slb)
+ LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
@@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
bl unrecoverable_exception
b 1b
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13)
@@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
std r10, _TRAP(r1)
2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_bad_addr
+ bl slb_miss_large_addr
b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1273,7 +1325,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
bne+ denorm_assist
#endif
- KVMTEST_PR(0x1500)
+ KVMTEST_HV(0x1500)
EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
@@ -1285,7 +1337,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1466,7 +1518,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
@@ -1506,7 +1558,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index a61151a..6eca15f 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -392,19 +392,20 @@ generic_secondary_common_init:
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
- ld r13,0(r13) /* Get base vaddr of paca array */
#ifndef CONFIG_SMP
- addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
b kexec_wait /* wait for next kernel if !SMP */
#else
+ LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
+ ld r8,0(r8) /* Get base vaddr of array */
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
li r5,0 /* logical cpu id */
-1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
+1:
+ sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */
+ lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
- addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
cmpw r5,r7 /* Check if more pacas exist */
blt 1b
@@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
- ld r4,0(r4) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
- add r13,r13,r4 /* for this processor. */
+ LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca_ptrs array */
+ sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */
SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
/* Mark interrupts soft and hard disabled (they might be enabled
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 53b9c1d..4c1012b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -33,6 +33,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
+#include <asm/debug.h>
#include <linux/uaccess.h>
/*
@@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
* 'symbolsize' should satisfy the check below.
*/
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
length_max = 8; /* DABR */
if (cpu_has_feature(CPU_FTR_DAWR)) {
length_max = 512 ; /* 64 doublewords */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 01e1c19..79d0054 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -325,12 +325,6 @@ enter_winkle:
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /* DO THIS IN REAL MODE! See comment above. */
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
@@ -339,6 +333,7 @@ power_enter_stop:
bne .Lhandle_esl_ec_set
PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
+ std r3, PACA_REQ_PSSCR(r13)
/*
* pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
@@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
* r3 contains desired PSSCR register value.
+ *
+ * Offline (CPU unplug) case also must notify KVM that the CPU is
+ * idle.
*/
+_GLOBAL(power9_offline_stop)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+ /* fall through */
+
_GLOBAL(power9_idle_stop)
std r3, PACA_REQ_PSSCR(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+ sync
+ lwz r5, PACA_DONT_STOP(r13)
+ cmpwi r5, 0
+ bne 1f
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+#endif
mtspr SPRN_PSSCR,r3
LOAD_REG_ADDR(r4,power_enter_stop)
b pnv_powersave_common
/* No return */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+1:
+ /*
+ * We get here when TM / thread reconfiguration bug workaround
+ * code wants to get the CPU into SMT4 mode, and therefore
+ * we are being asked not to stop.
+ */
+ li r3, 0
+ std r3, PACA_REQ_PSSCR(r13)
+ blr /* return 0 for wakeup cause / SRR1 value */
+#endif
/*
* On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
@@ -520,6 +551,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r0,HSTATE_HWTHREAD_STATE(r13)
+ cmpwi r0,KVM_HWTHREAD_IN_KERNEL
+ beq 1f
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
@@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71)
mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
+ li r0, 0 /* clear requested_psscr to say we're awake */
+ std r0, PACA_REQ_PSSCR(r13)
cmpd cr4,r5,r4
bge cr4,pnv_wakeup_tb_loss /* returns to caller */
@@ -834,6 +870,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PTCR,r4
ld r4,_RPR(r1)
mtspr SPRN_RPR,r4
+ ld r4,_AMOR(r1)
+ mtspr SPRN_AMOR,r4
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4,_TSCR(r1)
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index aab456e..5ac84ef 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr)
{
return readq(addr);
}
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+ return readq(addr);
+}
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+ return readq(addr);
+}
u64 ioread64be(void __iomem *addr)
{
return readq_be(addr);
}
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+ return readq_be(addr);
+}
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+ return readq_be(addr);
+}
EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
#endif /* __powerpc64__ */
void iowrite8(u8 val, void __iomem *addr)
@@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr)
{
writeq(val, addr);
}
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
void iowrite64be(u64 val, void __iomem *addr)
{
writeq_be(val, addr);
}
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
#endif /* __powerpc64__ */
/*
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ca5d5a0..e4c5bf3 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->nip = orig_ret_address;
+
/*
- * Make LR point to the orig_ret_address.
- * When the 'nop' inside the kretprobe_trampoline
- * is optimized, we can do a 'blr' after executing the
- * detour buffer code.
+ * We get here through one of two paths:
+ * 1. by taking a trap -> kprobe_handler() -> here
+ * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+ *
+ * When going back through (1), we need regs->nip to be setup properly
+ * as it is used to determine the return address from the trap.
+ * For (2), since nip is not honoured with optprobes, we instead setup
+ * the link register properly so that the subsequent 'blr' in
+ * kretprobe_trampoline jumps back to the right instruction.
+ *
+ * For nip, we should set the address to the previous instruction since
+ * we end up emulating it in kprobe_handler(), which increments the nip
+ * again.
*/
+ regs->nip = orig_ret_address - 4;
regs->link = orig_ret_address;
- reset_current_kprobe();
kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+
+ return 0;
}
NOKPROBE_SYMBOL(trampoline_probe_handler);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 49d34d7..1044bf1 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
* are correctly onlined. If somehow we start a CPU on boot with RTAS
* start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
* time, the boot CPU will timeout. If it does eventually execute
- * stuff, the secondary will start up (paca[].cpu_start was written) and
- * get into a peculiar state. If the platform supports
- * smp_ops->take_timebase(), the secondary CPU will probably be spinning
- * in there. If not (i.e. pseries), the secondary will continue on and
- * try to online itself/idle/etc. If it survives that, we need to find
- * these possible-but-not-online-but-should-be CPUs and chaperone them
- * into kexec_smp_wait().
+ * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+ * written) and get into a peculiar state.
+ * If the platform supports smp_ops->take_timebase(), the secondary CPU
+ * will probably be spinning in there. If not (i.e. pseries), the
+ * secondary will continue on and try to online itself/idle/etc. If it
+ * survives that, we need to find these
+ * possible-but-not-online-but-should-be CPUs and chaperone them into
+ * kexec_smp_wait().
*/
for_each_online_cpu(i) {
if (i == my_cpu)
continue;
- while (paca[i].kexec_state < wait_state) {
+ while (paca_ptrs[i]->kexec_state < wait_state) {
barrier();
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter %i state\n",
- i, paca[i].hw_cpu_id, wait_state);
+ i, paca_ptrs[i]->hw_cpu_id, wait_state);
notified = i;
}
}
@@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image)
kexec_stack.thread_info.cpu = current_thread_info()->cpu;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
- * it. Also poison per_cpu_offset to catch anyone using non-static
- * data.
+ * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+ * non-static data.
*/
memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
- paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
- kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+ kexec_paca.lppaca_ptr = NULL;
+#endif
+ paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
setup_paca(&kexec_paca);
- /* XXX: If anyone does 'dynamic lppacas' this will also need to be
- * switched to a static version!
+ /*
+ * The lppaca should be unregistered at this point so the HV won't
+ * touch it. In the case of a crash, none of the lppacas are
+ * unregistered so there is not much we can do about it here.
*/
+
/*
* On Book3S, the copy must happen with the MMU off if we are either
* using Radix page tables or we are not in an LPAR since we can
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index e4395f9..45e0b7d 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -43,7 +43,7 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
fops = kexec_file_loaders[i];
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3280953..fa267e9 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
blr
EXPORT_SYMBOL(flush_dcache_range)
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mfmsr r5 /* Disable MMU Data Relocation */
- ori r0,r5,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- mtmsr r5 /* Re-enable MMU Data Relocation */
- sync
- isync
- blr
-
_GLOBAL(flush_inval_dcache_range)
ld r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 496d639..ba681da 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part,
tmp_index = part->index;
- rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
return rc;
@@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
tmp_index = part->index;
if (part->os_partition) {
- rc = ppc_md.nvram_read((char *)&info,
- sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
return rc;
@@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void)
"detected: 0-length partition\n");
goto out;
}
- tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
err = -ENOMEM;
if (!tmp_part) {
printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 95ffedf..0ee3e6d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -20,116 +20,105 @@
#include "setup.h"
-#ifdef CONFIG_PPC_BOOK3S
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ unsigned long pa;
+ int nid;
+
+ /*
+ * boot_cpuid paca is allocated very early before cpu_to_node is up.
+ * Set bottom-up mode, because the boot CPU should be on node-0,
+ * which will put its paca in the right place.
+ */
+ if (cpu == boot_cpuid) {
+ nid = -1;
+ memblock_set_bottom_up(true);
+ } else {
+ nid = early_cpu_to_node(cpu);
+ }
+
+ pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(size, align, limit);
+ if (!pa)
+ panic("cannot allocate paca data");
+ }
+
+ if (cpu == boot_cpuid)
+ memblock_set_bottom_up(false);
+
+ return __va(pa);
+}
+
+#ifdef CONFIG_PPC_PSERIES
/*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary. The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary. The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
*/
-struct lppaca lppaca[] = {
- [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+ *lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(sizeof(struct lppaca)),
+ .size = cpu_to_be16(0x400),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
- .page_ins = 0,
- },
+ .page_ins = 0, };
};
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
- if (nr_cpus <= NR_LPPACAS)
- return;
-
- lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
- (nr_cpus - NR_LPPACAS));
- extra_lppacas = __va(memblock_alloc_base(lppaca_size,
- PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
+ size_t size = 0x400;
- if (cpu < NR_LPPACAS)
- return &lppaca[cpu];
+ BUILD_BUG_ON(size < sizeof(struct lppaca));
+
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ return NULL;
- lp = extra_lppacas + (cpu - NR_LPPACAS);
- *lp = lppaca[0];
+ lp = alloc_paca_data(size, 0x400, limit, cpu);
+ init_lppaca(lp);
return lp;
}
-
-static void __init free_lppacas(void)
-{
- long new_size = 0, nr;
-
- if (!lppaca_size)
- return;
- nr = num_possible_cpus() - NR_LPPACAS;
- if (nr > 0)
- new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
- if (new_size >= lppaca_size)
- return;
-
- memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
- lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * 3 persistent SLBs are registered here. The buffer will be zero
+ * 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
*
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow * __initdata slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
-{
- int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
-
- if (early_radix_enabled())
- return;
-
- slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
- memset(slb_shadow, 0, size);
-}
-
-static struct slb_shadow * __init init_slb_shadow(int cpu)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
{
struct slb_shadow *s;
- if (early_radix_enabled())
- return NULL;
-
- s = &slb_shadow[cpu];
+ if (cpu != boot_cpuid) {
+ /*
+ * Boot CPU comes here before early_radix_enabled
+ * is parsed (e.g., for disable_radix). So allocate
+ * always and this will be fixed up in free_unused_pacas.
+ */
+ if (early_radix_enabled())
+ return NULL;
+ }
- /*
- * When we come through here to initialise boot_paca, the slb_shadow
- * buffers are not allocated yet. That's OK, we'll get one later in
- * boot, but make sure we don't corrupt memory at 0.
- */
- if (!slb_shadow)
- return NULL;
+ s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+ memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* !CONFIG_PPC_BOOK3S_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
@@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
-#ifdef CONFIG_PPC_BOOK3S
- new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+ new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E
new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
@@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
#ifdef CONFIG_PPC_BOOK3S_64
- new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+ new_paca->slb_shadow_ptr = NULL;
#endif
#ifdef CONFIG_PPC_BOOK3E
@@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca)
}
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+ paca_nr_cpu_ids = nr_cpu_ids;
+
+ paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+ memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
-void __init allocate_pacas(void)
+void __init allocate_paca(int cpu)
{
u64 limit;
- int cpu;
+ struct paca_struct *paca;
+
+ BUG_ON(cpu >= paca_nr_cpu_ids);
#ifdef CONFIG_PPC_BOOK3S_64
/*
@@ -220,40 +219,44 @@ void __init allocate_pacas(void)
limit = ppc64_rma_size;
#endif
- paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
- paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
- memset(paca, 0, paca_size);
-
- printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
- paca_size, nr_cpu_ids, paca);
-
- allocate_lppacas(nr_cpu_ids, limit);
-
- allocate_slb_shadows(nr_cpu_ids, limit);
+ paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+ limit, cpu);
+ paca_ptrs[cpu] = paca;
+ memset(paca, 0, sizeof(struct paca_struct));
- /* Can't use for_each_*_cpu, as they aren't functional yet */
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- initialise_paca(&paca[cpu], cpu);
+ initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+ paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+ paca_struct_size += sizeof(struct paca_struct);
}
void __init free_unused_pacas(void)
{
- int new_size;
-
- new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+ int new_ptrs_size;
- if (new_size >= paca_size)
- return;
+ new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ if (new_ptrs_size < paca_ptrs_size)
+ memblock_free(__pa(paca_ptrs) + new_ptrs_size,
+ paca_ptrs_size - new_ptrs_size);
- memblock_free(__pa(paca) + new_size, paca_size - new_size);
+ paca_nr_cpu_ids = nr_cpu_ids;
+ paca_ptrs_size = new_ptrs_size;
- printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
- paca_size - new_size);
-
- paca_size = new_size;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (early_radix_enabled()) {
+ /* Ugly fixup, see new_slb_shadow() */
+ memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+ sizeof(struct slb_shadow));
+ paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+ }
+#endif
- free_lppacas();
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+ paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
void copy_mm_to_paca(struct mm_struct *mm)
@@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm->context.slb_addr_limit);
get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_low_slices_psize,
+ &context->low_slices_psize, sizeof(context->low_slices_psize));
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1738c41..1237f13 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits)
EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
-void __giveup_fpu(struct task_struct *tsk)
+static void __giveup_fpu(struct task_struct *tsk)
{
unsigned long msr;
@@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
}
-void save_all(struct task_struct *tsk)
+static void save_all(struct task_struct *tsk)
{
unsigned long usermsr;
@@ -718,7 +718,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
{
thread->hw_brk.address = 0;
thread->hw_brk.type = 0;
- set_breakpoint(&thread->hw_brk);
+ if (ppc_breakpoint_available())
+ set_breakpoint(&thread->hw_brk);
}
#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -815,9 +816,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
if (cpu_has_feature(CPU_FTR_DAWR))
+ // Power8 or later
set_dawr(brk);
- else
+ else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ // Power7 or earlier
set_dabr(brk);
+ else
+ // Shouldn't happen due to higher level checks
+ WARN_ON_ONCE(1);
}
void set_breakpoint(struct arch_hw_breakpoint *brk)
@@ -827,6 +833,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk)
preempt_enable();
}
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR))
+ return true; /* POWER8 DAWR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false; /* POWER9 with DAWR disabled */
+ /* DABR: Everything but POWER8 and POWER9 */
+ return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
#ifdef CONFIG_PPC64
DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4dffef9..9dbed48 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node)
static void __init check_cpu_feature_properties(unsigned long node)
{
- unsigned long i;
+ int i;
struct feature_property *fp = feature_properties;
const __be32 *prop;
- for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+ for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
prop = of_get_flat_dt_prop(node, fp->name, NULL);
if (prop && be32_to_cpup(prop) >= fp->min_value) {
cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
DBG("boot cpu: logical %d physical %d\n", found,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
/*
* PAPR defines "logical" PVR values for cpus that
@@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
else if (!dt_cpu_ftrs_in_use())
cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ allocate_paca(boot_cpuid);
#endif
+ set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
return 0;
}
@@ -744,7 +745,7 @@ void __init early_init_devtree(void *params)
* FIXME .. and the initrd too? */
move_device_tree();
- allocate_pacas();
+ allocate_paca_ptrs();
DBG("Scanning CPUs ...\n");
@@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
+#ifdef CONFIG_SMP
+ /*
+ * Early firmware scanning must use this rather than
+ * get_hard_smp_processor_id because we don't have pacas allocated
+ * until memory topology is discovered.
+ */
+ if (cpu_to_phys_id != NULL)
+ return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
return (int)phys_id == get_hard_smp_processor_id(cpu);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index acf4b2e..f9d6bef 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start;
static unsigned long __initdata prom_tce_alloc_end;
#endif
-static bool __initdata prom_radix_disable;
+static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
struct platform_support {
bool hash_mmu;
@@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void)
opt = strstr(prom_cmd_line, "disable_radix");
if (opt) {
- prom_debug("Radix disabled from cmdline\n");
- prom_radix_disable = true;
+ opt += 13;
+ if (*opt && *opt == '=') {
+ bool val;
+
+ if (kstrtobool(++opt, &val))
+ prom_radix_disable = false;
+ else
+ prom_radix_disable = val;
+ } else
+ prom_radix_disable = true;
}
+ if (prom_radix_disable)
+ prom_debug("Radix disabled from cmdline\n");
}
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void)
}
}
- if (supported.radix_mmu && supported.radix_gtse) {
+ if (supported.radix_mmu && supported.radix_gtse &&
+ IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
/* Radix preferred - but we require GTSE for now */
prom_debug("Asking for radix with GTSE\n");
ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
@@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void)
* size to 4 MB. This is enough to map 2GB of PCI DMA space.
* By doing this, we avoid the pitfalls of trying to DMA to
* MMIO space and the DMA alias hole.
- *
- * On POWER4, firmware sets the TCE region by assuming
- * each TCE table is 8MB. Using this memory for anything
- * else will impact performance, so we always allocate 8MB.
- * Anton
*/
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
- minsize = 8UL << 20;
- else
- minsize = 4UL << 20;
+ minsize = 4UL << 20;
/* Align to the greater of the align or size */
align = max(minalign, minsize);
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 12640f7..acb6b92 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -19,7 +19,7 @@
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
_end enter_prom memcpy memset reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ca72d73..d23cf63 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -41,6 +41,7 @@
#include <asm/switch_to.h>
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
struct perf_event_attr attr;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ bool set_bp = true;
struct arch_hw_breakpoint hw_brk;
#endif
@@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
hw_brk.address = data & (~HW_BRK_TYPE_DABR);
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
hw_brk.len = 8;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
bp = thread->ptrace_bps[0];
- if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
+ if (!set_bp) {
if (bp) {
unregister_hw_breakpoint(bp);
thread->ptrace_bps[0] = NULL;
@@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
return PTR_ERR(bp);
}
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
task->thread.hw_brk = hw_brk;
#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
if (child->thread.hw_brk.address)
return -ENOSPC;
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
child->thread.hw_brk = brk;
return 1;
@@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request,
#endif
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
dbginfo.num_instruction_bps = 0;
- dbginfo.num_data_bps = 1;
+ if (ppc_breakpoint_available())
+ dbginfo.num_data_bps = 1;
+ else
+ dbginfo.num_data_bps = 0;
dbginfo.num_condition_regs = 0;
#ifdef CONFIG_PPC64
dbginfo.data_bp_alignment = 8;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 0000000..bab5a27
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool thread_priv;
+
+ thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (rfi_flush || thread_priv) {
+ struct seq_buf s;
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (rfi_flush)
+ seq_buf_printf(&s, "RFI Flush");
+
+ if (rfi_flush && thread_priv)
+ seq_buf_printf(&s, ", ");
+
+ if (thread_priv)
+ seq_buf_printf(&s, "L1D private per thread");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool bcs, ccd, ori;
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+ ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+ ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (bcs || ccd) {
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (bcs)
+ seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+ if (bcs && ccd)
+ seq_buf_printf(&s, ", ");
+
+ if (ccd)
+ seq_buf_printf(&s, "Indirect branch cache disabled");
+ } else
+ seq_buf_printf(&s, "Vulnerable");
+
+ if (ori)
+ seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d73ec51..0af5c11 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc)
}
+u32 *cpu_to_phys_id = NULL;
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
+ cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32)));
+ memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
@@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void)
intserv = of_get_property(dn, "reg", &len);
if (!intserv) {
cpu_be = cpu_to_be32(cpu);
+ /* XXX: what is this? uninitialized?? */
intserv = &cpu_be; /* assume logical == phys */
len = 4;
}
@@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void)
"enable-method", "spin-table");
set_cpu_present(cpu, avail);
- set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
set_cpu_possible(cpu, true);
+ cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
cpu++;
}
@@ -835,6 +842,23 @@ static __init void print_system_info(void)
pr_info("-----------------------------------------------------\n");
}
+#ifdef CONFIG_SMP
+static void smp_setup_pacas(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ allocate_paca(cpu);
+ set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+ }
+
+ memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = NULL;
+}
+#endif
+
/*
* Called into from start_kernel this initializes memblock, which is used
* to manage page allocation until mem_init is called.
@@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p)
/* Check the SMT related command line arguments (ppc64). */
check_smt_enabled();
- /* On BookE, setup per-core TLB data structures. */
- setup_tlb_core_data();
+ /* Parse memory topology */
+ mem_topology_setup();
/*
* Release secondary cpus out of their spinloops at 0x60 now that
@@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p)
* so smp_release_cpus() does nothing for them.
*/
#ifdef CONFIG_SMP
+ smp_setup_pacas();
+
+ /* On BookE, setup per-core TLB data structures. */
+ setup_tlb_core_data();
+
smp_release_cpus();
#endif
@@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC64
if (!radix_enabled())
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 3fc11e3..d144df5 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -46,13 +46,10 @@ static inline void emergency_stack_init(void) { };
#endif
#ifdef CONFIG_PPC64
-void record_spr_defaults(void);
-#else
-static inline void record_spr_defaults(void) { };
-#endif
-
-#ifdef CONFIG_PPC64
u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
#endif
/*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 51ebc01..7445748 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
#include <asm/udbg.h>
#include <asm/code-patching.h>
#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
#define DBG(fmt...)
@@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr)
}
/* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L2CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str)
__setup("l2cr=", ppc_setup_l2cr);
/* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L3CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync);
#endif /* CONFIG_NVRAM */
-int __init ppc_init(void)
+static int __init ppc_init(void)
{
/* clear the progress line */
if (ppc_md.progress)
@@ -192,7 +193,6 @@ int __init ppc_init(void)
}
return 0;
}
-
arch_initcall(ppc_init);
void __init irqstack_early_init(void)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c388cc3..66f2b62 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void)
if (cpu_first_thread_sibling(boot_cpuid) == first)
first = boot_cpuid;
- paca[cpu].tcd_ptr = &paca[first].tcd;
+ paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
/*
* If we have threads, we need either tlbsrx.
@@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void)
get_paca()->kernel_msr = MSR_KERNEL;
}
+unsigned long spr_default_dscr = 0;
+
+void __init record_spr_defaults(void)
+{
+ if (early_cpu_has_feature(CPU_FTR_DSCR))
+ spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr)
early_init_devtree(__va(dt_ptr));
/* Now we know the logical id of our boot cpu, setup the paca. */
- setup_paca(&paca[boot_cpuid]);
+ if (boot_cpuid != 0) {
+ /* Poison paca_ptrs[0] again if it's not the boot cpu */
+ memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
+ }
+ setup_paca(paca_ptrs[boot_cpuid]);
fixup_boot_paca();
/*
@@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void)
#endif
}
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+ unsigned long pa;
+
+ pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
+ early_cpu_to_node(cpu), MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ if (!pa)
+ panic("cannot allocate stacks");
+ }
+
+ return __va(pa);
+}
+
void __init irqstack_early_init(void)
{
u64 limit = ppc64_bolted_size();
@@ -610,12 +637,8 @@ void __init irqstack_early_init(void)
* accessed in realmode.
*/
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
+ softirq_ctx[i] = alloc_stack(limit, i);
+ hardirq_ctx[i] = alloc_stack(limit, i);
}
}
@@ -623,20 +646,21 @@ void __init irqstack_early_init(void)
void __init exc_lvl_early_init(void)
{
unsigned int i;
- unsigned long sp;
for_each_possible_cpu(i) {
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- critirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+ void *sp;
+
+ sp = alloc_stack(ULONG_MAX, i);
+ critirq_ctx[i] = sp;
+ paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- dbgirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ dbgirq_ctx[i] = sp;
+ paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ mcheckirq_ctx[i] = sp;
+ paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
@@ -690,23 +714,24 @@ void __init emergency_stack_init(void)
for_each_possible_cpu(i) {
struct thread_info *ti;
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
}
@@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void)
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
- paca[cpu].data_offset = __per_cpu_offset[cpu];
+ paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
}
}
#endif
@@ -846,9 +871,6 @@ static void do_nothing(void *unused)
void rfi_flush_enable(bool enable)
{
- if (rfi_flush == enable)
- return;
-
if (enable) {
do_rfi_flush_fixups(enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
@@ -863,6 +885,10 @@ static void init_fallback_flush(void)
u64 l1d_size, limit;
int cpu;
+ /* Only allocate the fallback flush area once (at boot time). */
+ if (l1d_flush_fallback_area)
+ return;
+
l1d_size = ppc64_caches.l1d.size;
limit = min(ppc64_bolted_size(), ppc64_rma_size);
@@ -875,23 +901,24 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_size = l1d_size;
+ struct paca_struct *paca = paca_ptrs[cpu];
+ paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca->l1d_flush_size = l1d_size;
}
}
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
{
if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: Using fallback displacement flush\n");
+ pr_info("rfi-flush: fallback displacement flush available\n");
init_fallback_flush();
}
if (types & L1D_FLUSH_ORI)
- pr_info("rfi-flush: Using ori type flush\n");
+ pr_info("rfi-flush: ori type flush available\n");
if (types & L1D_FLUSH_MTTRIG)
- pr_info("rfi-flush: Using mttrig type flush\n");
+ pr_info("rfi-flush: mttrig type flush available\n");
enabled_flush_types = types;
@@ -902,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
+ bool enable;
+
if (val == 1)
- rfi_flush_enable(true);
+ enable = true;
else if (val == 0)
- rfi_flush_enable(false);
+ enable = false;
else
return -EINVAL;
+ /* Only do anything if we're changing state */
+ if (enable != rfi_flush)
+ rfi_flush_enable(enable);
+
return 0;
}
@@ -927,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void)
}
device_initcall(rfi_flush_debugfs_init);
#endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
- if (rfi_flush)
- return sprintf(buf, "Mitigation: RFI Flush\n");
-
- return sprintf(buf, "Vulnerable\n");
-}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 7c59d88..a6467f8 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#else /* CONFIG_PPC64 */
+extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a46de00..492f034 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
{
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
int ctx_has_vsx_region = 0;
#ifdef CONFIG_PPC64
@@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
{
struct sig_dbg_op op;
int i;
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
unsigned long new_msr = regs->msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bbe7634..e16ec7b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- if (!paca[nr].cpu_start) {
- paca[nr].cpu_start = 1;
+ if (!paca_ptrs[nr]->cpu_start) {
+ paca_ptrs[nr]->cpu_start = 1;
smp_mb();
return 0;
}
@@ -565,19 +565,28 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
}
#endif
+#ifdef CONFIG_NMI_IPI
+static void stop_this_cpu(struct pt_regs *regs)
+#else
static void stop_this_cpu(void *dummy)
+#endif
{
/* Remove this CPU */
set_cpu_online(smp_processor_id(), false);
- local_irq_disable();
+ hard_irq_disable();
+ spin_begin();
while (1)
- ;
+ spin_cpu_relax();
}
void smp_send_stop(void)
{
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
+#else
smp_call_function(stop_this_cpu, NULL, 0);
+#endif
}
struct thread_info *current_set[NR_CPUS];
@@ -657,7 +666,7 @@ void smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
#ifdef CONFIG_PPC64
- paca[boot_cpuid].__current = current;
+ paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
current_set[boot_cpuid] = task_thread_info(current);
@@ -748,8 +757,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
struct thread_info *ti = task_thread_info(idle);
#ifdef CONFIG_PPC64
- paca[cpu].__current = idle;
- paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->__current = idle;
+ paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
ti->cpu = cpu;
secondary_ti = current_set[cpu] = ti;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 04d0bbd..755dc98 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -20,6 +20,7 @@
#include <asm/firmware.h>
#include "cacheinfo.h"
+#include "setup.h"
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600,
static void sysfs_create_dscr_default(void)
{
- int err = 0;
- if (cpu_has_feature(CPU_FTR_DSCR))
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ int err = 0;
+ int cpu;
-void __init record_spr_defaults(void)
-{
- int cpu;
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- dscr_default = mfspr(SPRN_DSCR);
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- paca[cpu].dscr_default = dscr_default;
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
}
+
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a32823d..360e71d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -266,6 +266,9 @@ void accumulate_stolen_time(void)
static inline u64 calculate_stolen_time(u64 stop_tb)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
return scan_dispatch_log(stop_tb);
@@ -1234,7 +1237,7 @@ void calibrate_delay(void)
static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
ppc_md.get_rtc_time(tm);
- return rtc_valid_tm(tm);
+ return 0;
}
static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1e48d15..a2ef0c0 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
}
raw_local_irq_restore(flags);
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) == 0x100)
+ return;
+
crash_fadump(regs, "die oops");
if (kexec_should_crash(current))
@@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags;
- if (debugger(regs))
- return;
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) != 0x100) {
+ if (debugger(regs))
+ return;
+ }
flags = oops_begin(regs);
if (__die(str, regs, err))
@@ -460,7 +471,7 @@ static inline int check_io_access(struct pt_regs *regs)
/* single-step stuff */
#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs) do {} while(0)
#else
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
@@ -473,6 +484,7 @@ static inline int check_io_access(struct pt_regs *regs)
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
+#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE)
#endif
#if defined(CONFIG_E500)
@@ -988,6 +1000,7 @@ void single_step_exception(struct pt_regs *regs)
enum ctx_state prev_state = exception_enter();
clear_single_step(regs);
+ clear_br_trace(regs);
if (kprobe_post_handler(regs))
return;
@@ -1495,18 +1508,6 @@ bail:
exception_exit(prev_state);
}
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, regs->dar, SIGSEGV);
-
- exception_exit(prev_state);
-}
-
void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 22b01a3..b44ec10 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
+#ifdef CONFIG_PPC32
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_gettimeofday", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_gettime", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_getres", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_get_tbfreq", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_time", NULL
},
+#endif
};
/*
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 85ba80d..4b19da8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,9 +74,15 @@ kvm-hv-y += \
book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm.o
+
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o book3s_hv_rm_xive.o
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm_builtin.o
+
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
@@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
+ $(kvm-book3s_64-builtin-tm-objs-y) \
$(kvm-book3s_64-builtin-xics-objs-y)
endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9cb9448..81e2ea8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
@@ -170,7 +171,7 @@ static bool kvmppc_ipi_thread(int cpu)
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (cpu >= 0 && cpu < nr_cpu_ids) {
- if (paca[cpu].kvm_hstate.xics_phys) {
+ if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
xics_wake_cpu(cpu);
return true;
}
@@ -498,7 +499,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
* use 640 bytes of the structure though, so we should accept
* clients that set a size of 640.
*/
- if (len < 640)
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+ if (len < sizeof(struct lppaca))
break;
vpap = &tvcpu->arch.vpa;
err = 0;
@@ -741,6 +743,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
case H_SET_MODE_RESOURCE_SET_DAWR:
if (!kvmppc_power8_compatible(vcpu))
return H_P2;
+ if (!ppc_breakpoint_available())
+ return H_P2;
if (mflags)
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
@@ -1206,6 +1210,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
}
break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ break;
+#endif
+
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
break;
@@ -1978,7 +1995,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
* turn off the HFSCR bit, which causes those instructions to trap.
*/
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
- if (!cpu_has_feature(CPU_FTR_TM))
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ vcpu->arch.hfscr |= HFSCR_TM;
+ else if (!cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr &= ~HFSCR_TM;
if (cpu_has_feature(CPU_FTR_ARCH_300))
vcpu->arch.hfscr &= ~HFSCR_MSGP;
@@ -2140,7 +2159,7 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
tpaca->kvm_hstate.kvm_vcpu = NULL;
@@ -2173,7 +2192,7 @@ static void kvmppc_release_hwthread(int cpu)
{
struct paca_struct *tpaca;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
@@ -2239,9 +2258,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.thread_cpu = cpu;
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+ tpaca->kvm_hstate.fake_suspend = 0;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
tpaca->kvm_hstate.kvm_vcore = vc;
@@ -2264,7 +2284,7 @@ static void kvmppc_wait_for_nap(int n_threads)
* for any threads that still have a non-NULL vcore ptr.
*/
for (i = 1; i < n_threads; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcore)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
break;
if (i == n_threads) {
HMT_medium();
@@ -2274,7 +2294,7 @@ static void kvmppc_wait_for_nap(int n_threads)
}
HMT_medium();
for (i = 1; i < n_threads; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcore)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -2806,9 +2826,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
for (thr = 0; thr < controlled_threads; ++thr) {
- paca[pcpu + thr].kvm_hstate.tid = thr;
- paca[pcpu + thr].kvm_hstate.napping = 0;
- paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+ struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+ paca->kvm_hstate.tid = thr;
+ paca->kvm_hstate.napping = 0;
+ paca->kvm_hstate.kvm_split_mode = sip;
}
/* Initiate micro-threading (split-core) on POWER8 if required */
@@ -2923,7 +2945,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
} else if (hpt_on_radix) {
/* Wait for all threads to have seen final sync */
for (thr = 1; thr < controlled_threads; ++thr) {
- while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+ struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+ while (paca->kvm_hstate.kvm_split_mode) {
HMT_low();
barrier();
}
@@ -4388,7 +4412,7 @@ static int kvm_init_subcore_bitmap(void)
int node = cpu_to_node(first_cpu);
/* Ignore if it is already allocated. */
- if (paca[first_cpu].sibling_subcore_state)
+ if (paca_ptrs[first_cpu]->sibling_subcore_state)
continue;
sibling_subcore_state =
@@ -4403,7 +4427,8 @@ static int kvm_init_subcore_bitmap(void)
for (j = 0; j < threads_per_core; j++) {
int cpu = first_cpu + j;
- paca[cpu].sibling_subcore_state = sibling_subcore_state;
+ paca_ptrs[cpu]->sibling_subcore_state =
+ sibling_subcore_state;
}
}
return 0;
@@ -4430,7 +4455,7 @@ static int kvmppc_book3s_init_hv(void)
/*
* We need a way of accessing the XICS interrupt controller,
- * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+ * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
* indirectly, via OPAL.
*/
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 49a2c78..de18299 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu)
return;
/* Else poke the target with an IPI */
- xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
else
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index dc54373..0e84930 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r5, 0
mtspr SPRN_MMCRA, r5
isync
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r5, LPPACA_PMCINUSE(r3)
+ lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f86a202..bd63fa8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_SPRG_VDSO_WRITE,r3
/* Reload the host's PMU registers */
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r4, LPPACA_PMCINUSE(r3)
+ lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r4, 0
beq 23f /* skip if not */
BEGIN_FTR_SECTION
@@ -786,12 +785,18 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* Load guest PMU registers */
@@ -885,8 +890,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX(r4)
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
+ /*
+ * Handle broken DAWR case by not writing it. This means we
+ * can still store the DAWR register for migration.
+ */
+BEGIN_FTR_SECTION
mtspr SPRN_DAWR, r5
mtspr SPRN_DAWRX, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
@@ -914,11 +925,14 @@ BEGIN_FTR_SECTION
mtspr SPRN_ACOP, r6
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
+ nop
FTR_SECTION_ELSE
/* POWER9-only registers */
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
+ lbz r8, HSTATE_FAKE_SUSPEND(r13)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
@@ -1370,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* For softpatch interrupt, go off and do TM instruction emulation */
+ cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+ beq kvmppc_tm_emul
+#endif
+
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
@@ -1747,12 +1767,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* Increment yield count if they have a VPA */
@@ -1852,6 +1878,10 @@ BEGIN_FTR_SECTION
ld r6, STACK_SLOT_DAWR(r1)
ld r7, STACK_SLOT_DAWRX(r1)
mtspr SPRN_CIABR, r5
+ /*
+ * If the DAWR doesn't work, it's ok to write these here as
+ * this value should always be zero
+ */
mtspr SPRN_DAWR, r6
mtspr SPRN_DAWRX, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
@@ -2055,6 +2085,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtlr r0
blr
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+kvmppc_tm_emul:
+ /* Save instruction image in HEIR */
+ mfspr r3, SPRN_HEIR
+ stw r3, VCPU_HEIR(r9)
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ lbz r0, HSTATE_FAKE_SUSPEND(r13)
+ cmpwi r0, 0 /* keep exiting guest if in fake suspend */
+ bne guest_exit_cont
+ rldicl r3, r11, 64 - MSR_TS_S_LG, 62
+ cmpwi r3, 1 /* or if not in suspend state */
+ bne guest_exit_cont
+
+ /* Call C code to do the emulation */
+ mr r3, r9
+ bl kvmhv_p9_tm_emulation_early
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+ cmpwi r3, 0
+ beq guest_exit_cont /* continue exiting if not handled */
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ b fast_interrupt_c_return /* go back to guest if handled */
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
/*
* Check whether an HDSI is an HPTE not found fault or something else.
* If it is an HPTE not found fault that is due to the guest accessing
@@ -2507,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3,0
blr
+2:
+BEGIN_FTR_SECTION
+ /* POWER9 with disabled DAWR */
+ li r3, H_HARDWARE
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
@@ -2588,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
ld r9, HSTATE_KVM_VCPU(r13)
bl kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/*
@@ -2701,12 +2779,18 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
*/
bl kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
#endif
/* load up FP state */
@@ -3033,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
kvmppc_save_tm:
mflr r0
std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
/* Turn on TM. */
mfmsr r8
@@ -3047,6 +3132,24 @@ kvmppc_save_tm:
std r1, HSTATE_HOST_R1(r13)
li r3, TM_CAUSE_KVM_RESCHED
+BEGIN_FTR_SECTION
+ lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+ cmpwi r0, 0
+ beq 3f
+ rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+ beq 4f
+BEGIN_FTR_SECTION_NESTED(96)
+ bl pnv_power9_force_smt4_catch
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ nop
+ b 6f
+3:
+ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+ mfspr r6, SPRN_TEXASR
+ std r6, VCPU_ORIG_TEXASR(r9)
+6:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
li r5, 0
mtmsrd r5, 1
@@ -3058,6 +3161,43 @@ kvmppc_save_tm:
SET_SCRATCH0(r13)
GET_PACA(r13)
std r9, PACATMSCRATCH(r13)
+
+ /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
+BEGIN_FTR_SECTION
+ lbz r9, HSTATE_FAKE_SUSPEND(r13)
+ cmpwi r9, 0
+ beq 2f
+ /*
+ * We were in fake suspend, so we are not going to save the
+ * register state as the guest checkpointed state (since
+ * we already have it), therefore we can now use any volatile GPR.
+ */
+ /* Reload stack pointer and TOC. */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+ /* Set MSR RI now we have r1 and r13 back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+ HMT_MEDIUM
+ ld r6, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r6
+BEGIN_FTR_SECTION_NESTED(96)
+ bl pnv_power9_force_smt4_release
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+ nop
+
+4:
+ mfspr r3, SPRN_PSSCR
+ /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+ li r0, PSSCR_FAKE_SUSPEND
+ andc r3, r3, r0
+ mtspr SPRN_PSSCR, r3
+ ld r9, HSTATE_KVM_VCPU(r13)
+ /* Don't save TEXASR, use value from last exit in real suspend state */
+ b 11f
+2:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
ld r9, HSTATE_KVM_VCPU(r13)
/* Get a few more GPRs free. */
@@ -3128,13 +3268,15 @@ kvmppc_save_tm:
* change these outside of a transaction, so they must always be
* context switched.
*/
+ mfspr r7, SPRN_TEXASR
+ std r7, VCPU_TEXASR(r9)
+11:
mfspr r5, SPRN_TFHAR
mfspr r6, SPRN_TFIAR
- mfspr r7, SPRN_TEXASR
std r5, VCPU_TFHAR(r9)
std r6, VCPU_TFIAR(r9)
- std r7, VCPU_TEXASR(r9)
+ addi r1, r1, PPC_MIN_STKFRM
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
@@ -3169,6 +3311,8 @@ kvmppc_restore_tm:
mtspr SPRN_TFIAR, r6
mtspr SPRN_TEXASR, r7
+ li r0, 0
+ stb r0, HSTATE_FAKE_SUSPEND(r13)
ld r5, VCPU_MSR(r4)
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
beqlr /* TM not active in guest */
@@ -3183,6 +3327,15 @@ kvmppc_restore_tm:
mtspr SPRN_TEXASR, r7
/*
+ * If we are doing TM emulation for the guest on a POWER9 DD2,
+ * then we don't actually do a trechkpt -- we either set up
+ * fake-suspend mode, or emulate a TM rollback.
+ */
+BEGIN_FTR_SECTION
+ b .Ldo_tm_fake_load
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
+ /*
* We need to load up the checkpointed state for the guest.
* We need to do this early as it will blow away any GPRs, VSRs and
* some SPRs.
@@ -3254,10 +3407,24 @@ kvmppc_restore_tm:
/* Set the MSR RI since we have our registers back. */
li r5, MSR_RI
mtmsrd r5, 1
-
+9:
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
+
+.Ldo_tm_fake_load:
+ cmpwi r5, 1 /* check for suspended state */
+ bgt 10f
+ stb r5, HSTATE_FAKE_SUSPEND(r13)
+ b 9b /* and return */
+10: stdu r1, -PPC_MIN_STKFRM(r1)
+ /* guest is in transactional state, so simulate rollback */
+ mr r3, r4
+ bl kvmhv_emulate_tm_rollback
+ nop
+ ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+ addi r1, r1, PPC_MIN_STKFRM
+ b 9b
#endif
/*
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644
index 0000000..bf710ad
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+ u64 texasr, tfiar;
+ u64 msr = vcpu->arch.shregs.msr;
+
+ tfiar = vcpu->arch.pc & ~0x3ull;
+ texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+ texasr |= TEXASR_SUSP;
+ if (msr & MSR_PR) {
+ texasr |= TEXASR_PR;
+ tfiar |= 1;
+ }
+ vcpu->arch.tfiar = tfiar;
+ /* Preserve ROT and TL fields of existing TEXASR */
+ vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated. The
+ * instruction image is in vcpu->arch.emul_inst. If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct. The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 msr = vcpu->arch.shregs.msr;
+ u64 newmsr, bescr;
+ int ra, rs;
+
+ switch (instr & 0xfc0007ff) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.shregs.srr0;
+ return RESUME_GUEST;
+
+ case PPC_INST_RFEBB:
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_EBB_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ bescr = vcpu->arch.bescr;
+ /* expect to see a S->T transition requested */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ ((bescr >> 30) & 3) == 2));
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ vcpu->arch.bescr = bescr;
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.ebbrr;
+ return RESUME_GUEST;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ /* check this is a Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ return RESUME_GUEST;
+
+ case PPC_INST_TSR:
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ /* L=1 => tresume, L=0 => tsuspend */
+ if (instr & (1 << 21)) {
+ if (MSR_TM_SUSPENDED(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ } else {
+ if (MSR_TM_TRANSACTIONAL(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+ }
+ vcpu->arch.shregs.msr = msr;
+ return RESUME_GUEST;
+
+ case PPC_INST_TRECLAIM:
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If no transaction active, generate TM bad thing */
+ if (!MSR_TM_ACTIVE(msr)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+ /* If failure was not previously recorded, recompute TEXASR */
+ if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+ ra = (instr >> 16) & 0x1f;
+ if (ra)
+ ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+ emulate_tx_failure(vcpu, ra);
+ }
+
+ copy_from_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+ return RESUME_GUEST;
+
+ case PPC_INST_TRECHKPT:
+ /* XXX do we need to check for PR=0 here? */
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+ ((u64)FSCR_TM_LG << 56);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If transaction active or TEXASR[FS] = 0, bad thing */
+ if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+
+ copy_to_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ vcpu->arch.shregs.msr = msr | MSR_TS_S;
+ return RESUME_GUEST;
+ }
+
+ /* What should we do here? We didn't recognize the instruction */
+ WARN_ON_ONCE(1);
+ return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644
index 0000000..d98ccfd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 newmsr, msr, bescr;
+ int rs;
+
+ switch (instr & 0xfc0007ff) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = vcpu->arch.shregs.srr0;
+ return 1;
+
+ case PPC_INST_RFEBB:
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ msr = vcpu->arch.shregs.msr;
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+ ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+ return 0;
+ bescr = mfspr(SPRN_BESCR);
+ /* expect to see a S->T transition requested */
+ if (((bescr >> 30) & 3) != 2)
+ return 0;
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ mtspr(SPRN_BESCR, bescr);
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.pc - 4;
+ vcpu->arch.pc = mfspr(SPRN_EBBRR);
+ return 1;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ msr = vcpu->arch.shregs.msr;
+ /* check this is a Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ return 1;
+
+ case PPC_INST_TSR:
+ /* we know the MSR has the TS field = S (0b01) here */
+ msr = vcpu->arch.shregs.msr;
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+ return 0;
+ /* L=1 => tresume => set TS to T (0b10) */
+ if (instr & (1 << 21))
+ vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ /* Set CR0 to 0b0010 */
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state. We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
+ vcpu->arch.pc = vcpu->arch.tfhar;
+ copy_from_checkpoint(vcpu);
+ vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+}
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4d8b4d6..fa888bf 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -45,12 +45,6 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
-
- /* POWER4+ triggers a dec interrupt if the value is < 0 */
- if (vcpu->arch.dec & 0x80000000) {
- kvmppc_core_queue_dec(vcpu);
- return;
- }
#endif
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 52c2053..4e38764 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = hv_enabled;
break;
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_CAP_PPC_HTM:
r = hv_enabled &&
- (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
+ (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
break;
+#endif
default:
r = 0;
break;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3c29c90..6539010 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -22,9 +22,11 @@ ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
extra-$(CONFIG_PPC64) += crtsavres.o
endif
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+ memcpy_power7.o
+
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
- memcpy_64.o memcmp_64.o pmem.o
+ string_64.o memcpy_64.o memcmp_64.o pmem.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4bcc9e7..8d5034f 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -21,7 +21,9 @@ _GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
b copypage_power7
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index ca5fc8f..8fa73b7 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -42,8 +42,6 @@ _GLOBAL(copypage_power7)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
/* setup read stream 0 */
dcbt 0,r4,0b01000 /* addr from */
dcbt 0,r7,0b01010 /* length and depth from */
@@ -52,7 +50,6 @@ _GLOBAL(copypage_power7)
dcbtst 0,r10,0b01010 /* length and depth to */
eieio
dcbt 0,r8,0b01010 /* all streams GO */
-.machine pop
#ifdef CONFIG_ALTIVEC
mflr r0
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 08da06e..5066773 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -20,11 +20,13 @@
.align 7
_GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
b __copy_tofrom_user_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
_GLOBAL(__copy_tofrom_user_base)
/* first check for a whole page copy on a page boundary */
cmpldi cr1,r5,16
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d416a4a..215e476 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -312,8 +312,6 @@ err1; stb r0,0(r3)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
/* setup read stream 0 */
dcbt 0,r6,0b01000 /* addr from */
dcbt 0,r7,0b01010 /* length and depth from */
@@ -322,7 +320,6 @@ err1; stb r0,0(r3)
dcbtst 0,r10,0b01010 /* length and depth to */
eieio
dcbt 0,r8,0b01010 /* all streams GO */
-.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 73697c4..35f80ab 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
patch_instruction(dest + 2, instrs[2]);
}
- printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+ printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index f4d6088..8d8265b 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -19,9 +19,11 @@ BEGIN_FTR_SECTION
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
#endif
FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
#ifndef SELFTEST
b memcpy_power7
#endif
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
#ifdef __LITTLE_ENDIAN__
/* dumb little-endian memcpy that will get replaced at runtime */
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 193909a..df7de9d 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -259,15 +259,12 @@ _GLOBAL(memcpy_power7)
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
-.machine push
-.machine "power4"
dcbt 0,r6,0b01000
dcbt 0,r7,0b01010
dcbtst 0,r9,0b01000
dcbtst 0,r10,0b01010
eieio
dcbt 0,r8,0b01010 /* GO */
-.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 70274b7..34d68f1 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
* Copy from userspace to a buffer, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
struct pt_regs *regs)
{
int err = 0;
@@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
* Copy from a buffer to userspace, using the largest possible
* aligned accesses, up to sizeof(long).
*/
-static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
struct pt_regs *regs)
{
int err = 0;
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 849f50c..cf77d75 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
mtspr(SPRN_M_TW, __pa(pgd) - offset);
/* Update context */
- mtspr(SPRN_M_CASID, id);
+ mtspr(SPRN_M_CASID, id - 1);
/* sync */
mb();
}
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 697b70a..7d0945b 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 866446c..c01d627 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec,
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
return true;
-
+ /*
+ * We should ideally do the vma pkey access check here. But in the
+ * fault path, handle_mm_fault() also does the same check. To avoid
+ * these multiple checks, we skip it here and handle access error due
+ * to pkeys later.
+ */
return false;
}
@@ -518,25 +523,16 @@ good_area:
#ifdef CONFIG_PPC_MEM_KEYS
/*
- * if the HPTE is not hashed, hardware will not detect
- * a key fault. Lets check if we failed because of a
- * software detected key fault.
+ * we skipped checking for access error due to key earlier.
+ * Check that using handle_mm_fault error return.
*/
if (unlikely(fault & VM_FAULT_SIGSEGV) &&
- !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
- is_exec, 0)) {
- /*
- * The PGD-PDT...PMD-PTE tree may not have been fully setup.
- * Hence we cannot walk the tree to locate the PTE, to locate
- * the key. Hence let's use vma_pkey() to get the key; instead
- * of get_mm_addr_key().
- */
+ !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
+
int pkey = vma_pkey(vma);
- if (likely(pkey)) {
- up_read(&mm->mmap_sem);
- return bad_key_fault_exception(regs, address, pkey);
- }
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
}
#endif /* CONFIG_PPC_MEM_KEYS */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 656933c..1d049c7 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_register_proc_table(unsigned long base, unsigned long page_size,
- unsigned long table_size)
-{
- unsigned long patb1 = base << 25; /* VSID */
-
- patb1 |= (page_size << 5); /* sllp */
- patb1 |= table_size;
-
- partition_tb->patb1 = cpu_to_be64(patb1);
- return 0;
-}
-
void __init hpte_init_native(void)
{
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
@@ -889,7 +877,4 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
mmu_hash_ops.flush_hash_range = native_flush_hash_range;
mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index cf290d41..0bd3790 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops);
* is provided by the firmware.
*/
-/* Pre-POWER4 CPUs (4k pages only)
+/*
+ * Fallback (4k pages only)
*/
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
@@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void)
mmu_psize_set_default_penc();
/* Default to 4K pages only */
- memcpy(mmu_psize_defs, mmu_psize_defaults_old,
- sizeof(mmu_psize_defaults_old));
+ memcpy(mmu_psize_defs, mmu_psize_defaults,
+ sizeof(mmu_psize_defaults));
/*
* Try to find the available page sizes in the device-tree
@@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
}
}
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -875,6 +876,12 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
- u64 lpsizes;
- unsigned char *hpsizes;
+ unsigned char *psizes;
unsigned long index, mask_index;
if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->mm_ctx_low_slices_psize;
+ psizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = get_paca()->mm_ctx_high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}
#else
@@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
@@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Get VSID */
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid)
return;
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3a08d21..f1153f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
/*
@@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
@@ -563,10 +562,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
#ifdef CONFIG_PPC_MM_SLICES
- unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
/* With radix we don't use slice, so derive it from vma*/
- if (!radix_enabled())
+ if (!radix_enabled()) {
+ unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
return 1UL << mmu_psize_to_shift(psize);
+ }
#endif
return vma_kernel_pagesize(vma);
}
@@ -663,15 +664,26 @@ static int __init hugetlbpage_init(void)
shift = mmu_psize_to_shift(psize);
- if (add_huge_page_size(1ULL << shift) < 0)
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (shift > PGDIR_SHIFT)
continue;
-
+ else if (shift > PUD_SHIFT)
+ pdshift = PGDIR_SHIFT;
+ else if (shift > PMD_SHIFT)
+ pdshift = PUD_SHIFT;
+ else
+ pdshift = PMD_SHIFT;
+#else
if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT;
else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
+#endif
+
+ if (add_huge_page_size(1ULL << shift) < 0)
+ continue;
/*
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6419b33..3e59e5d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -88,18 +88,13 @@ void MMU_init(void);
int __map_without_bats;
int __map_without_ltlbs;
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
* Check for command-line options that affect what MMU_init will do.
*/
-void __init MMU_setup(void)
+static void __init MMU_setup(void)
{
/* Check for nobats option (used in mapin_ram). */
if (strstr(boot_command_line, "nobats")) {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fdb424a..51ce091 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,12 +68,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if H_PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
phys_addr_t kernstart_addr;
@@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p)
{
bool val;
- if (strlen(p) == 0)
+ if (!p)
val = true;
else if (kstrtobool(p, &val))
return -EINVAL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index fe8c611..737f8a4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
int page_is_ram(unsigned long pfn)
{
-#ifndef CONFIG_PPC64 /* XXX for now */
- return pfn < max_pfn;
-#else
- unsigned long paddr = (pfn << PAGE_SHIFT);
- struct memblock_region *reg;
-
- for_each_memblock(memory, reg)
- if (paddr >= reg->base && paddr < (reg->base + reg->size))
- return 1;
- return 0;
-#endif
+ return memblock_is_memory(__pfn_to_phys(pfn));
}
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
{
return -ENODEV;
}
@@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
resize_hpt_for_hotplug(memblock_phys_mem_size());
start = (unsigned long)__va(start);
- rc = create_section_mapping(start, start + size);
+ rc = create_section_mapping(start, start + size, nid);
if (rc) {
pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
@@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
EXPORT_SYMBOL_GPL(walk_system_ram_range);
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
@@ -224,7 +214,10 @@ void __init initmem_init(void)
* memblock_regions
*/
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+}
+void __init initmem_init(void)
+{
/* XXX need to clip this if using highmem? */
sparse_memory_present_with_active_regions(0);
sparse_init();
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980ba..b75194d 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
/*
- * In the case of exec, use the default limit,
- * otherwise inherit it from the mm we are duplicating.
- */
- if (!mm->context.slb_addr_limit)
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-
- /*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
* forced down to 4K.
@@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm)
* check against 0 is OK.
*/
if (mm->context.id == 0)
- slice_set_user_psize(mm, mmu_virtual_psize);
+ slice_init_new_context_exec(mm);
subpage_prot_init_new_context(mm);
@@ -186,6 +179,19 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+static void destroy_contexts(mm_context_t *ctx)
+{
+ int index, context_id;
+
+ spin_lock(&mmu_context_lock);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_remove(&mmu_context_ida, context_id);
+ }
+ spin_unlock(&mmu_context_lock);
+}
+
#ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm)
{
@@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
+ destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d65..be8f5c9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);
+#ifdef CONFIG_PPC_MM_SLICES
+ /*
+ * We have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ */
+ if (mm->context.id == 0)
+ slice_init_new_context_exec(mm);
+#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
return 0;
@@ -428,8 +439,8 @@ void __init mmu_context_init(void)
* -- BenH
*/
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
+ first_context = 1;
+ last_context = 16;
no_selective_tlbil = true;
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 57fbc55..c4c0a09 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
extern unsigned int rtas_data, rtas_size;
struct hash_pte;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index edd8d0b..57a5029 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -831,18 +831,13 @@ out:
of_node_put(rtas);
}
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
- int nid, cpu;
-
- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- max_pfn = max_low_pfn;
+ int cpu;
if (parse_numa_properties())
setup_nonnuma();
- memblock_dump_all();
-
/*
* Modify the set of possible NUMA nodes to reflect information
* available about the set of online nodes, and the set of nodes
@@ -853,6 +848,23 @@ void __init initmem_init(void)
find_possible_nodes();
+ setup_node_to_cpumask_map();
+
+ reset_numa_cpu_lookup_table();
+
+ for_each_present_cpu(cpu)
+ numa_setup_cpu(cpu);
+}
+
+void __init initmem_init(void)
+{
+ int nid;
+
+ max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ max_pfn = max_low_pfn;
+
+ memblock_dump_all();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -863,10 +875,6 @@ void __init initmem_init(void)
sparse_init();
- setup_node_to_cpumask_map();
-
- reset_numa_cpu_lookup_table();
-
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
@@ -876,8 +884,6 @@ void __init initmem_init(void)
*/
cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
- for_each_present_cpu(cpu)
- numa_setup_cpu(cpu);
}
static int __init early_numa(char *p)
@@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void)
for_each_possible_cpu(cpu) {
int i;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++)
counts[i] = hypervisor_counts[i];
@@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void)
for_each_possible_cpu(cpu) {
int i, changed = 0;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++) {
if (hypervisor_counts[i] != counts[i]) {
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 422e802..518518f 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -155,15 +155,15 @@ void mmu_cleanup_all(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid)
{
if (radix_enabled())
- return radix__create_section_mapping(start, end);
+ return radix__create_section_mapping(start, end, nid);
- return hash__create_section_mapping(start, end);
+ return hash__create_section_mapping(start, end, nid);
}
-int remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
{
if (radix_enabled())
return radix__remove_section_mapping(start, end);
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 469808e..199bfda 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -24,6 +24,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* vmemmap is the starting address of the virtual address space where
@@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2e10a96..f1891e2 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
return 0;
}
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pa = 0;
void *pt;
- pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+ if (region_start || region_end) /* has region hint */
+ pa = memblock_alloc_range(size, size, region_start, region_end,
+ MEMBLOCK_NONE);
+ else if (nid != -1) /* has node hint */
+ pa = memblock_alloc_base_nid(size, size,
+ MEMBLOCK_ALLOC_ANYWHERE,
+ nid, MEMBLOCK_NONE);
+
+ if (!pa)
+ pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+ BUG_ON(!pa);
+
+ pt = __va(pa);
memset(pt, 0, size);
return pt;
}
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
- unsigned int map_page_size)
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ smp_wmb();
+ return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
* Make sure task size is correct as per the max adddr
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- } else {
- pgdp = pgd_offset_k(ea);
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
- pudp = pud_offset(pgdp, ea);
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
+
+ if (unlikely(!slab_is_available()))
+ return early_map_kernel_page(ea, pa, flags, map_page_size,
+ nid, region_start, region_end);
+
+ /*
+ * Should make page table allocation functions be able to take a
+ * node, so we can place kernel page tables on the right nodes after
+ * boot.
+ */
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
}
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
set_the_pte:
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb();
return 0;
}
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
void radix__change_memory_range(unsigned long start, unsigned long end,
unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
}
static int __meminit create_physical_mapping(unsigned long start,
- unsigned long end)
+ unsigned long end,
+ int nid)
{
unsigned long vaddr, addr, mapping_size = 0;
pgprot_t prot;
@@ -267,7 +323,7 @@ retry:
else
prot = PAGE_KERNEL;
- rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+ rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc)
return rc;
}
@@ -276,7 +332,7 @@ retry:
return 0;
}
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
/*
* Create the linear mapping, using standard page size for now
*/
- for_each_memblock(memory, reg)
+ for_each_memblock(memory, reg) {
+ /*
+ * The memblock allocator is up at this point, so the
+ * page tables will be allocated within the range. No
+ * need or a node (which we don't have yet).
+ */
WARN_ON(create_physical_mapping(reg->base,
- reg->base + reg->size));
+ reg->base + reg->size,
+ -1));
+ }
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
* host.
*/
BUG_ON(PRTB_SIZE_SHIFT > 36);
- process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
/*
* Fill in the process table.
*/
@@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void)
#ifdef CONFIG_PCI
pci_io_base = ISA_IO_BASE;
#endif
-
- /*
- * For now radix also use the same frag size
- */
- __pte_frag_nr = H_PTE_FRAG_NR;
- __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+ __pte_frag_nr = RADIX_PTE_FRAG_NR;
+ __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
@@ -695,7 +754,7 @@ struct change_mapping_params {
unsigned long aligned_end;
};
-static int stop_machine_change_mapping(void *data)
+static int __meminit stop_machine_change_mapping(void *data)
{
struct change_mapping_params *params =
(struct change_mapping_params *)data;
@@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start);
- create_physical_mapping(params->end, params->aligned_end);
+ create_physical_mapping(params->aligned_start, params->start, -1);
+ create_physical_mapping(params->end, params->aligned_end, -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
/*
* clear the pte and potentially split the mapping helper
*/
-static void split_kernel_mapping(unsigned long addr, unsigned long end,
+static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
unsigned long size, pte_t *pte)
{
unsigned long mask = ~(size - 1);
@@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
}
}
-static void remove_pagetable(unsigned long start, unsigned long end)
+static void __meminit remove_pagetable(unsigned long start, unsigned long end)
{
unsigned long addr, next;
pud_t *pud_base;
@@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end)
radix__flush_tlb_kernel_range(start, end);
}
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- return create_physical_mapping(start, end);
+ return create_physical_mapping(start, end, nid);
}
-int radix__remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
{
remove_pagetable(start, end);
return 0;
@@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end)
#endif /* CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int map_page_size,
+ int nid)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
/* Create a PTE encoding */
unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+ int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+ int ret;
+
+ ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+ BUG_ON(ret);
- BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
remove_pagetable(start, start + page_size);
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d35d9ad..120a49b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
* mem_init() sets high_memory so only do the check after that.
*/
if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
- !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
+ page_is_ram(__phys_to_pfn(p))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
(unsigned long long)p, __builtin_return_address(0));
return NULL;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index adf469f..9bf659d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,11 +57,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
#ifdef CONFIG_PPC_BOOK3S_64
/*
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index ba71c54..0eafdf01 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -119,18 +119,15 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
+ initial_allocation_mask = ~0x0;
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
/*
- * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0, 1 are reserved.
* key 0 is the default key, which allows read/write/execute.
* key 1 is recommended not to be used. PowerISA(3.0) page 1015,
* programming note.
*/
- initial_allocation_mask = ~0x0;
-
- /* register mask is in BE format */
- pkey_amr_uamor_mask = ~0x0ul;
- pkey_iamr_mask = ~0x0ul;
-
for (i = 2; i < (pkeys_total - os_reserved); i++) {
initial_allocation_mask &= ~(0x1 << i);
pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread)
if (static_branch_likely(&pkey_disabled))
return;
- write_amr(read_amr() & pkey_amr_uamor_mask);
- write_iamr(read_iamr() & pkey_iamr_mask);
- write_uamor(read_uamor() & pkey_amr_uamor_mask);
+ thread->amr = read_amr() & pkey_amr_uamor_mask;
+ thread->iamr = read_iamr() & pkey_iamr_mask;
+ thread->uamor = read_uamor() & pkey_amr_uamor_mask;
}
static inline bool pkey_allows_readwrite(int pkey)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 13cfe41..66577cc 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,7 @@
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
+#include <linux/context_tracking.h>
#include <linux/mm_types.h>
#include <asm/udbg.h>
@@ -340,3 +341,110 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
}
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+ int bpsize, int ssize)
+{
+ unsigned long flags, vsid_data, esid_data;
+ enum slb_index index;
+ int slb_cache_index;
+
+ /*
+ * We are irq disabled, hence should be safe to access PACA.
+ */
+ index = get_paca()->stab_rr;
+
+ /*
+ * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+ */
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+
+ get_paca()->stab_rr = index;
+
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+ vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+ ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+ : "memory");
+
+ /*
+ * Now update slb cache entries
+ */
+ slb_cache_index = get_paca()->slb_cache_ptr;
+ if (slb_cache_index < SLB_CACHE_ENTRIES) {
+ /*
+ * We have space in slb cache for optimized switch_slb().
+ * Top 36 bits from esid_data as per ISA
+ */
+ get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+ get_paca()->slb_cache_ptr++;
+ } else {
+ /*
+ * Our cache is full and the current cache content strictly
+ * doesn't indicate the active SLB conents. Bump the ptr
+ * so that switch_slb() will ignore the cache.
+ */
+ get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ }
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long vsid;
+ int bpsize;
+
+ /*
+ * We are always above 1TB, hence use high user segment size.
+ */
+ vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+ bpsize = get_slice_psize(mm, ea);
+ insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+ unsigned long ea = regs->dar;
+ int context;
+
+ if (REGION_ID(ea) != USER_REGION_ID)
+ goto slb_bad_addr;
+
+ /*
+ * Are we beyound what the page table layout supports ?
+ */
+ if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ goto slb_bad_addr;
+
+ /* Lower address should have been handled by asm code */
+ if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+ goto slb_bad_addr;
+
+ /*
+ * consider this as bad access if we take a SLB miss
+ * on an address above addr limit.
+ */
+ if (ea >= current->mm->context.slb_addr_limit)
+ goto slb_bad_addr;
+
+ context = get_ea_context(&current->mm->context, ea);
+ if (!context)
+ goto slb_bad_addr;
+
+ handle_multi_context_slb_miss(context, ea);
+ exception_exit(prev_state);
+ return;
+
+slb_bad_addr:
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ else
+ bad_page_fault(regs, ea, SIGSEGV);
+ exception_exit(prev_state);
+}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3..a83fbd2 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
*/
_GLOBAL(slb_allocate)
/*
- * check for bad kernel/user address
- * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ * Check if the address falls within the range of the first context, or
+ * if we may need to handle multi context. For the first context we
+ * allocate the slb entry via the fast path below. For large address we
+ * branch out to C-code and see if additional contexts have been
+ * allocated.
+ * The test here is:
+ * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
*/
- rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+ rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
bne- 8f
srdi r9,r3,60 /* get region */
@@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
5:
/*
* Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
+ * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
*/
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
+ srdi r11,r10,1 /* index */
+ addi r9,r11,PACALOWSLICESPSIZE
+ lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
+ rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
6:
sldi r11,r11,2 /* index * 4 */
/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5..9cd87d1 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,32 +37,25 @@
#include <asm/hugetlb.h>
static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
#ifdef DEBUG
int _slice_debug = 1;
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
{
if (!_slice_debug)
return;
- pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
- pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+ pr_devel("%s low_slice: %*pbl\n", label,
+ (int)SLICE_NUM_LOW, &mask->low_slices);
+ pr_devel("%s high_slice: %*pbl\n", label,
+ (int)SLICE_NUM_HIGH, mask->high_slices);
}
#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
#else
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
#define slice_dbg(fmt...)
#endif
@@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long end = start + len - 1;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- (1u << GET_LOW_SLICE_INDEX(start));
@@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
start = SLICE_LOW_TOP;
+#endif
return !slice_area_is_free(mm, start, end - start);
}
@@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
unsigned long i;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
@@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
- unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
{
- unsigned char *hpsizes;
- int index, mask_index;
- unsigned long i;
- u64 lpsizes;
-
- ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &mm->context.mask_64k;
+#endif
+ if (psize == MMU_PAGE_4K)
+ return &mm->context.mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &mm->context.mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &mm->context.mask_16g;
+#endif
+ BUG();
+}
+#elif defined(CONFIG_PPC_8xx)
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ if (psize == mmu_virtual_psize)
+ return &mm->context.mask_base_psize;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_512K)
+ return &mm->context.mask_512k;
+ if (psize == MMU_PAGE_8M)
+ return &mm->context.mask_8m;
+#endif
+ BUG();
+}
+#else
+#error "Must define the slice masks for page sizes supported by the platform"
+#endif
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret->low_slices |= 1u << i;
+static bool slice_check_range_fits(struct mm_struct *mm,
+ const struct slice_mask *available,
+ unsigned long start, unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ u64 low_slices = 0;
- if (high_limit <= SLICE_LOW_TOP)
- return;
+ if (start < SLICE_LOW_TOP) {
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- __set_bit(i, ret->high_slices);
+ low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
}
-}
+ if ((low_slices & available->low_slices) != low_slices)
+ return false;
-static int slice_check_fit(struct mm_struct *mm,
- struct slice_mask mask, struct slice_mask available)
-{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
- /*
- * Make sure we just do bit compare only to the max
- * addr limit and not the full bit map size.
- */
- unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
+ if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+ unsigned long i;
- bitmap_and(result, mask.high_slices,
- available.high_slices, slice_count);
+ for (i = start_index; i < start_index + count; i++) {
+ if (!test_bit(i, available->high_slices))
+ return false;
+ }
+ }
- return (mask.low_slices & available.low_slices) == mask.low_slices &&
- bitmap_equal(result, mask.high_slices, slice_count);
+ return true;
}
static void slice_flush_segments(void *parm)
{
+#ifdef CONFIG_PPC64
struct mm_struct *mm = parm;
unsigned long flags;
@@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm)
local_irq_save(flags);
slb_flush_and_rebolt();
local_irq_restore(flags);
+#endif
}
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+ const struct slice_mask *mask, int psize)
{
int index, mask_index;
/* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *psize_mask, *old_mask;
unsigned long i, flags;
+ int old_psize;
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
slice_print_mask(" mask", mask);
+ psize_mask = slice_mask_for_size(mm, psize);
+
/* We need to use a spinlock here to protect against
* concurrent 64k -> 4k demotion ...
*/
spin_lock_irqsave(&slice_convert_lock, flags);
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ if (!(mask->low_slices & (1u << i)))
+ continue;
+
+ mask_index = i & 0x1;
+ index = i >> 1;
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ /* Update the slice_mask */
+ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask->low_slices &= ~(1u << i);
+ psize_mask->low_slices |= 1u << i;
+
+ /* Update the sizes array */
+ lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
hpsizes = mm->context.high_slices_psize;
for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ if (!test_bit(i, mask->high_slices))
+ continue;
+
mask_index = i & 0x1;
index = i >> 1;
- if (test_bit(i, mask.high_slices))
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
+
+ /* Update the slice_mask */
+ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ __clear_bit(i, old_mask->high_slices);
+ __set_bit(i, psize_mask->high_slices);
+
+ /* Update the sizes array */
+ hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
}
@@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
* 'available' slice_mark.
*/
static bool slice_scan_available(unsigned long addr,
- struct slice_mask available,
- int end,
- unsigned long *boundary_addr)
+ const struct slice_mask *available,
+ int end, unsigned long *boundary_addr)
{
unsigned long slice;
if (addr < SLICE_LOW_TOP) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
- return !!(available.low_slices & (1u << slice));
+ return !!(available->low_slices & (1u << slice));
} else {
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!test_bit(slice, available.high_slices);
+ return !!test_bit(slice, available->high_slices);
}
}
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
- struct slice_mask mask, int psize,
+ const struct slice_mask *mask, int psize,
int topdown, unsigned long high_limit)
{
if (topdown)
@@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
}
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_copy_mask(struct slice_mask *dst,
+ const struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices |= src->low_slices;
- bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ dst->low_slices = src->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
}
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices &= ~src->low_slices;
+ dst->low_slices = src1->low_slices | src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
- bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+static inline void slice_andnot_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
+{
+ dst->low_slices = src1->low_slices & ~src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
}
#ifdef CONFIG_PPC_64K_PAGES
@@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
int topdown)
{
- struct slice_mask mask;
struct slice_mask good_mask;
struct slice_mask potential_mask;
- struct slice_mask compat_mask;
+ const struct slice_mask *maskp;
+ const struct slice_mask *compat_maskp = NULL;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long page_size = 1UL << pshift;
@@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
}
if (high_limit > mm->context.slb_addr_limit) {
+ /*
+ * Increasing the slb_addr_limit does not require
+ * slice mask cache to be recalculated because it should
+ * be already initialised beyond the old address limit.
+ */
mm->context.slb_addr_limit = high_limit;
+
on_each_cpu(slice_flush_segments, mm, 1);
}
- /*
- * init different masks
- */
- mask.low_slices = 0;
- bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-
- /* silence stupid warning */;
- potential_mask.low_slices = 0;
- bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-
- compat_mask.low_slices = 0;
- bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-
/* Sanity checks */
BUG_ON(mm->task_size == 0);
BUG_ON(mm->context.slb_addr_limit == 0);
@@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- slice_mask_for_size(mm, psize, &good_mask, high_limit);
- slice_print_mask(" good_mask", good_mask);
+ maskp = slice_mask_for_size(mm, psize);
/*
* Here "good" means slices that are already the right page size,
@@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* search in good | compat | free, found => convert free.
*/
-#ifdef CONFIG_PPC_64K_PAGES
- /* If we support combo pages, we can allow 64k pages in 4k slices */
- if (psize == MMU_PAGE_64K) {
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
+ /*
+ * If we support combo pages, we can allow 64k pages in 4k slices
+ * The mask copies could be avoided in most cases here if we had
+ * a pointer to good mask for the next code to use.
+ */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
if (fixed)
- slice_or_mask(&good_mask, &compat_mask);
+ slice_or_mask(&good_mask, maskp, compat_maskp);
+ else
+ slice_copy_mask(&good_mask, maskp);
+ } else {
+ slice_copy_mask(&good_mask, maskp);
}
-#endif
+
+ slice_print_mask(" good_mask", &good_mask);
+ if (compat_maskp)
+ slice_print_mask(" compat_mask", compat_maskp);
/* First check hint if it's valid or if we have MAP_FIXED */
if (addr != 0 || fixed) {
- /* Build a mask for the requested range */
- slice_range_to_mask(addr, len, &mask);
- slice_print_mask(" mask", mask);
-
/* Check if we fit in the good mask. If we do, we just return,
* nothing else to do
*/
- if (slice_check_fit(mm, mask, good_mask)) {
+ if (slice_check_range_fits(mm, &good_mask, addr, len)) {
slice_dbg(" fits good !\n");
- return addr;
+ newaddr = addr;
+ goto return_addr;
}
} else {
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask,
+ newaddr = slice_find_area(mm, len, &good_mask,
psize, topdown, high_limit);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
*/
slice_dbg(" found area at 0x%lx\n", newaddr);
- return newaddr;
+ goto return_addr;
}
}
/*
@@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* empty and thus can be converted
*/
slice_mask_for_free(mm, &potential_mask, high_limit);
- slice_or_mask(&potential_mask, &good_mask);
- slice_print_mask(" potential", potential_mask);
+ slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+ slice_print_mask(" potential", &potential_mask);
- if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
- slice_dbg(" fits potential !\n");
- goto convert;
+ if (addr != 0 || fixed) {
+ if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+ slice_dbg(" fits potential !\n");
+ newaddr = addr;
+ goto convert;
+ }
}
/* If we have MAP_FIXED and failed the above steps, then error out */
@@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask,
- psize, topdown, high_limit);
- if (addr != -ENOMEM) {
- slice_dbg(" found area at 0x%lx\n", addr);
- return addr;
+ newaddr = slice_find_area(mm, len, &good_mask,
+ psize, topdown, high_limit);
+ if (newaddr != -ENOMEM) {
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ goto return_addr;
}
}
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
#ifdef CONFIG_PPC_64K_PAGES
- if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+ if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
- slice_or_mask(&potential_mask, &compat_mask);
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
}
#endif
- if (addr == -ENOMEM)
+ if (newaddr == -ENOMEM)
return -ENOMEM;
- slice_range_to_mask(addr, len, &mask);
- slice_dbg(" found potential area at 0x%lx\n", addr);
- slice_print_mask(" mask", mask);
+ slice_range_to_mask(newaddr, len, &potential_mask);
+ slice_dbg(" found potential area at 0x%lx\n", newaddr);
+ slice_print_mask(" mask", &potential_mask);
convert:
- slice_andnot_mask(&mask, &good_mask);
- slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
- slice_convert(mm, mask, psize);
+ /*
+ * Try to allocate the context before we do slice convert
+ * so that we handle the context allocation failure gracefully.
+ */
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+
+ slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+ if (compat_maskp && !fixed)
+ slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+ if (potential_mask.low_slices ||
+ (SLICE_NUM_HIGH &&
+ !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+ slice_convert(mm, &potential_mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
}
- return addr;
+ return newaddr;
+return_addr:
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+ return newaddr;
}
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
@@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- unsigned char *hpsizes;
+ unsigned char *psizes;
int index, mask_index;
- /*
- * Radix doesn't use slice, but can get enabled along with MMU_SLICE
- */
- if (radix_enabled()) {
-#ifdef CONFIG_PPC_64K_PAGES
- return MMU_PAGE_64K;
-#else
- return MMU_PAGE_4K;
-#endif
- }
+ VM_BUG_ON(radix_enabled());
+
if (addr < SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
+ psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+void slice_init_new_context_exec(struct mm_struct *mm)
{
- int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
- unsigned int old_psize;
- int i;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *mask;
+ unsigned int psize = mmu_virtual_psize;
- slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+ slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
- VM_BUG_ON(radix_enabled());
- spin_lock_irqsave(&slice_convert_lock, flags);
-
- old_psize = mm->context.user_psize;
- slice_dbg(" old_psize=%d\n", old_psize);
- if (old_psize == psize)
- goto bail;
+ /*
+ * In the case of exec, use the default limit. In the
+ * case of fork it is just inherited from the mm being
+ * duplicated.
+ */
+#ifdef CONFIG_PPC64
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#else
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
mm->context.user_psize = psize;
- wmb();
+ /*
+ * Set all slice psizes to the default.
+ */
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
-
-
-
- slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
- bail:
- spin_unlock_irqrestore(&slice_convert_lock, flags);
+ /*
+ * Slice mask cache starts zeroed, fill the default size cache.
+ */
+ mask = slice_mask_for_size(mm, psize);
+ mask->low_slices = ~0UL;
+ if (SLICE_NUM_HIGH)
+ bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
}
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
VM_BUG_ON(radix_enabled());
slice_range_to_mask(start, len, &mask);
- slice_convert(mm, mask, psize);
+ slice_convert(mm, &mask, psize);
}
#ifdef CONFIG_HUGETLB_PAGE
@@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
* for now as we only use slices with hugetlbfs enabled. This should
* be fixed as the generic code gets fixed.
*/
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
- struct slice_mask mask, available;
+ const struct slice_mask *maskp;
unsigned int psize = mm->context.user_psize;
- unsigned long high_limit = mm->context.slb_addr_limit;
- if (radix_enabled())
- return 0;
+ VM_BUG_ON(radix_enabled());
- slice_range_to_mask(addr, len, &mask);
- slice_mask_for_size(mm, psize, &available, high_limit);
+ maskp = slice_mask_for_size(mm, psize);
#ifdef CONFIG_PPC_64K_PAGES
/* We need to account for 4k slices too */
if (psize == MMU_PAGE_64K) {
- struct slice_mask compat_mask;
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
- slice_or_mask(&available, &compat_mask);
+ const struct slice_mask *compat_maskp;
+ struct slice_mask available;
+
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ slice_or_mask(&available, maskp, compat_maskp);
+ return !slice_check_range_fits(mm, &available, addr, len);
}
#endif
-#if 0 /* too verbose */
- slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
- mm, addr, len);
- slice_print_mask(" mask", mask);
- slice_print_mask(" available", available);
-#endif
- return !slice_check_fit(mm, mask, available);
+ return !slice_check_range_fits(mm, maskp, addr, len);
}
#endif
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a07f537..2fba617 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
rb |= set << PPC_BITLSHIFT(51);
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -668,7 +668,7 @@ void radix__flush_tlb_all(void)
rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
prs = 0; /* partition scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
asm volatile("ptesync": : :"memory");
@@ -706,7 +706,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
- unsigned int pid = mm->context.id;
+ unsigned long pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
- if (paca[sib].kvm_hstate.kvm_vcpu)
+ if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 9b23f12..87d71dd 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 44d67b1..2668cc4 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
/* Create cached_info and set spu_info[spu->number] to point to it.
* spu->number is a system-wide value, not a per-node value.
*/
- info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
index c579b16..f40e373 100644
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
unsigned int size, unsigned int offset, unsigned int guard_ptr,
unsigned int guard_val)
{
- struct vma_to_fileoffset_map *new =
- kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
+ struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
if (!new) {
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
__func__, __LINE__);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 57ebc65..82986d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
-obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
+obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f8908ea..3f66fcf 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
+
+ if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+ is_kernel_addr(mfspr(SPRN_SDAR)))
+ *addrp = 0;
}
static bool regs_sihv(struct pt_regs *regs)
@@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
/* invalid entry */
continue;
+ /*
+ * BHRB rolling buffer could very much contain the kernel
+ * addresses at this point. Check the privileges before
+ * exporting it to userspace (avoid exposure of regions
+ * where we could have speculative execution)
+ */
+ if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+ is_kernel_addr(addr))
+ continue;
+
/* Branches are read most recent first (ie. mfbhrb 0 is
* the most recent branch).
* There are two types of valid entries:
@@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu)
*/
write_mmcr0(cpuhw, val);
mb();
+ isync();
/*
* Disable instruction sampling if it was enabled
@@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu)
mtspr(SPRN_MMCRA,
cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
mb();
+ isync();
}
cpuhw->disabled = 1;
cpuhw->n_added = 0;
ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+ /*
+ * These are readable by userspace, may contain kernel
+ * addresses and are not switched by context switch, so clear
+ * them now to avoid leaking anything to userspace in general
+ * including to another process.
+ */
+ if (ppmu->flags & PPMU_ARCH_207S) {
+ mtspr(SPRN_SDAR, 0);
+ mtspr(SPRN_SIAR, 0);
+ }
+#endif
}
local_irq_restore(flags);
@@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
+static bool is_event_blacklisted(u64 ev)
+{
+ int i;
+
+ for (i=0; i < ppmu->n_blacklist_ev; i++) {
+ if (ppmu->blacklist_ev[i] == ev)
+ return true;
+ }
+
+ return false;
+}
+
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
@@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event)
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return -EOPNOTSUPP;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return err;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
case PERF_TYPE_RAW:
ev = event->attr.config;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
default:
return -ENOENT;
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
deleted file mode 100644
index ce6072f..0000000
--- a/arch/powerpc/perf/power4-pmu.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
- *
- * Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <linux/string.h>
-#include <asm/reg.h>
-#include <asm/cputable.h>
-
-/*
- * Bits in event code for POWER4
- */
-#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
-#define PM_PMC_MSK 0xf
-#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
-#define PM_UNIT_MSK 0xf
-#define PM_LOWER_SH 6
-#define PM_LOWER_MSK 1
-#define PM_LOWER_MSKS 0x40
-#define PM_BYTE_SH 4 /* Byte number of event bus to use */
-#define PM_BYTE_MSK 3
-#define PM_PMCSEL_MSK 7
-
-/*
- * Unit code values
- */
-#define PM_FPU 1
-#define PM_ISU1 2
-#define PM_IFU 3
-#define PM_IDU0 4
-#define PM_ISU1_ALT 6
-#define PM_ISU2 7
-#define PM_IFU_ALT 8
-#define PM_LSU0 9
-#define PM_LSU1 0xc
-#define PM_GPS 0xf
-
-/*
- * Bits in MMCR0 for POWER4
- */
-#define MMCR0_PMC1SEL_SH 8
-#define MMCR0_PMC2SEL_SH 1
-#define MMCR_PMCSEL_MSK 0x1f
-
-/*
- * Bits in MMCR1 for POWER4
- */
-#define MMCR1_TTM0SEL_SH 62
-#define MMCR1_TTC0SEL_SH 61
-#define MMCR1_TTM1SEL_SH 59
-#define MMCR1_TTC1SEL_SH 58
-#define MMCR1_TTM2SEL_SH 56
-#define MMCR1_TTC2SEL_SH 55
-#define MMCR1_TTM3SEL_SH 53
-#define MMCR1_TTC3SEL_SH 52
-#define MMCR1_TTMSEL_MSK 3
-#define MMCR1_TD_CP_DBG0SEL_SH 50
-#define MMCR1_TD_CP_DBG1SEL_SH 48
-#define MMCR1_TD_CP_DBG2SEL_SH 46
-#define MMCR1_TD_CP_DBG3SEL_SH 44
-#define MMCR1_DEBUG0SEL_SH 43
-#define MMCR1_DEBUG1SEL_SH 42
-#define MMCR1_DEBUG2SEL_SH 41
-#define MMCR1_DEBUG3SEL_SH 40
-#define MMCR1_PMC1_ADDER_SEL_SH 39
-#define MMCR1_PMC2_ADDER_SEL_SH 38
-#define MMCR1_PMC6_ADDER_SEL_SH 37
-#define MMCR1_PMC5_ADDER_SEL_SH 36
-#define MMCR1_PMC8_ADDER_SEL_SH 35
-#define MMCR1_PMC7_ADDER_SEL_SH 34
-#define MMCR1_PMC3_ADDER_SEL_SH 33
-#define MMCR1_PMC4_ADDER_SEL_SH 32
-#define MMCR1_PMC3SEL_SH 27
-#define MMCR1_PMC4SEL_SH 22
-#define MMCR1_PMC5SEL_SH 17
-#define MMCR1_PMC6SEL_SH 12
-#define MMCR1_PMC7SEL_SH 7
-#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
-
-static short mmcr1_adder_bits[8] = {
- MMCR1_PMC1_ADDER_SEL_SH,
- MMCR1_PMC2_ADDER_SEL_SH,
- MMCR1_PMC3_ADDER_SEL_SH,
- MMCR1_PMC4_ADDER_SEL_SH,
- MMCR1_PMC5_ADDER_SEL_SH,
- MMCR1_PMC6_ADDER_SEL_SH,
- MMCR1_PMC7_ADDER_SEL_SH,
- MMCR1_PMC8_ADDER_SEL_SH
-};
-
-/*
- * Bits in MMCRA
- */
-#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
-
-/*
- * Layout of constraint bits:
- * 6666555555555544444444443333333333222222222211111111110000000000
- * 3210987654321098765432109876543210987654321098765432109876543210
- * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
- * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
- * \SMPL ||\TTC3SEL
- * |\TTC_IFU_SEL
- * \TTM2SEL0
- *
- * SMPL - SAMPLE_ENABLE constraint
- * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
- *
- * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
- * 55: UC1 error 0x0080_0000_0000_0000
- * 54: FPU events needed 0x0040_0000_0000_0000
- * 53: ISU1 events needed 0x0020_0000_0000_0000
- * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
- *
- * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
- * 51: UC2 error 0x0008_0000_0000_0000
- * 50: FPU events needed 0x0004_0000_0000_0000
- * 49: IFU events needed 0x0002_0000_0000_0000
- * 48: LSU0 events needed 0x0001_0000_0000_0000
- *
- * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
- * 47: UC3 error 0x8000_0000_0000
- * 46: LSU0 events needed 0x4000_0000_0000
- * 45: IFU events needed 0x2000_0000_0000
- * 44: IDU0|ISU2 events needed 0x1000_0000_0000
- * 43: ISU1 events needed 0x0800_0000_0000
- *
- * TTM2SEL0
- * 42: 0 = IDU0 events needed
- * 1 = ISU2 events needed 0x0400_0000_0000
- *
- * TTC_IFU_SEL
- * 41: 0 = IFU.U events needed
- * 1 = IFU.L events needed 0x0200_0000_0000
- *
- * TTC3SEL
- * 40: 0 = LSU1.U events needed
- * 1 = LSU1.L events needed 0x0100_0000_0000
- *
- * PS1
- * 39: PS1 error 0x0080_0000_0000
- * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
- *
- * PS2
- * 35: PS2 error 0x0008_0000_0000
- * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
- *
- * B0
- * 28-31: Byte 0 event source 0xf000_0000
- * 1 = FPU
- * 2 = ISU1
- * 3 = IFU
- * 4 = IDU0
- * 7 = ISU2
- * 9 = LSU0
- * c = LSU1
- * f = GPS
- *
- * B1, B2, B3
- * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
- *
- * P8
- * 15: P8 error 0x8000
- * 14-15: Count of events needing PMC8
- *
- * P1..P7
- * 0-13: Count of events needing PMC1..PMC7
- *
- * Note: this doesn't allow events using IFU.U to be combined with events
- * using IFU.L, though that is feasible (using TTM0 and TTM2). However
- * there are no listed events for IFU.L (they are debug events not
- * verified for performance monitoring) so this shouldn't cause a
- * problem.
- */
-
-static struct unitinfo {
- unsigned long value, mask;
- int unit;
- int lowerbit;
-} p4_unitinfo[16] = {
- [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
- [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_ISU1_ALT] =
- { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IFU_ALT] =
- { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
- [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
- [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
- [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
- [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
-};
-
-static unsigned char direct_marked_event[8] = {
- (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
- (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
- (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
- (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
- (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
- (1<<3) | (1<<4) | (1<<5),
- /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
- (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
- (1<<4), /* PMC8: PM_MRK_LSU_FIN */
-};
-
-/*
- * Returns 1 if event counts things relating to marked instructions
- * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
- */
-static int p4_marked_instr_event(u64 event)
-{
- int pmc, psel, unit, byte, bit;
- unsigned int mask;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- psel = event & PM_PMCSEL_MSK;
- if (pmc) {
- if (direct_marked_event[pmc - 1] & (1 << psel))
- return 1;
- if (psel == 0) /* add events */
- bit = (pmc <= 4)? pmc - 1: 8 - pmc;
- else if (psel == 6) /* decode events */
- bit = 4;
- else
- return 0;
- } else
- bit = psel;
-
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- mask = 0;
- switch (unit) {
- case PM_LSU1:
- if (event & PM_LOWER_MSKS)
- mask = 1 << 28; /* byte 7 bit 4 */
- else
- mask = 6 << 24; /* byte 3 bits 1 and 2 */
- break;
- case PM_LSU0:
- /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
- mask = 0x083dff00;
- }
- return (mask >> (byte * 8 + bit)) & 1;
-}
-
-static int p4_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
-{
- int pmc, byte, unit, lower, sh;
- unsigned long mask = 0, value = 0;
- int grp = -1;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc > 8)
- return -1;
- sh = (pmc - 1) * 2;
- mask |= 2 << sh;
- value |= 1 << sh;
- grp = ((pmc - 1) >> 1) & 1;
- }
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- if (unit) {
- lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
-
- /*
- * Bus events on bytes 0 and 2 can be counted
- * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
- */
- if (!pmc)
- grp = byte & 1;
-
- if (!p4_unitinfo[unit].unit)
- return -1;
- mask |= p4_unitinfo[unit].mask;
- value |= p4_unitinfo[unit].value;
- sh = p4_unitinfo[unit].lowerbit;
- if (sh > 1)
- value |= (unsigned long)lower << sh;
- else if (lower != sh)
- return -1;
- unit = p4_unitinfo[unit].unit;
-
- /* Set byte lane select field */
- mask |= 0xfULL << (28 - 4 * byte);
- value |= (unsigned long)unit << (28 - 4 * byte);
- }
- if (grp == 0) {
- /* increment PMC1/2/5/6 field */
- mask |= 0x8000000000ull;
- value |= 0x1000000000ull;
- } else {
- /* increment PMC3/4/7/8 field */
- mask |= 0x800000000ull;
- value |= 0x100000000ull;
- }
-
- /* Marked instruction events need sample_enable set */
- if (p4_marked_instr_event(event)) {
- mask |= 1ull << 56;
- value |= 1ull << 56;
- }
-
- /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
- if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
- mask |= 1ull << 56;
-
- *maskp = mask;
- *valp = value;
- return 0;
-}
-
-static unsigned int ppc_inst_cmpl[] = {
- 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
-};
-
-static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-{
- int i, j, na;
-
- alt[0] = event;
- na = 1;
-
- /* 2 possibilities for PM_GRP_DISP_REJECT */
- if (event == 0x8003 || event == 0x0224) {
- alt[1] = event ^ (0x8003 ^ 0x0224);
- return 2;
- }
-
- /* 2 possibilities for PM_ST_MISS_L1 */
- if (event == 0x0c13 || event == 0x0c23) {
- alt[1] = event ^ (0x0c13 ^ 0x0c23);
- return 2;
- }
-
- /* several possibilities for PM_INST_CMPL */
- for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
- if (event == ppc_inst_cmpl[i]) {
- for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
- if (j != i)
- alt[na++] = ppc_inst_cmpl[j];
- break;
- }
- }
-
- return na;
-}
-
-static int p4_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
-{
- unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
- unsigned int pmc, unit, byte, psel, lower;
- unsigned int ttm, grp;
- unsigned int pmc_inuse = 0;
- unsigned int pmc_grp_use[2];
- unsigned char busbyte[4];
- unsigned char unituse[16];
- unsigned int unitlower = 0;
- int i;
-
- if (n_ev > 8)
- return -1;
-
- /* First pass to count resource use */
- pmc_grp_use[0] = pmc_grp_use[1] = 0;
- memset(busbyte, 0, sizeof(busbyte));
- memset(unituse, 0, sizeof(unituse));
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc_inuse & (1 << (pmc - 1)))
- return -1;
- pmc_inuse |= 1 << (pmc - 1);
- /* count 1/2/5/6 vs 3/4/7/8 use */
- ++pmc_grp_use[((pmc - 1) >> 1) & 1];
- }
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
- if (unit) {
- if (!pmc)
- ++pmc_grp_use[byte & 1];
- if (unit == 6 || unit == 8)
- /* map alt ISU1/IFU codes: 6->2, 8->3 */
- unit = (unit >> 1) - 1;
- if (busbyte[byte] && busbyte[byte] != unit)
- return -1;
- busbyte[byte] = unit;
- lower <<= unit;
- if (unituse[unit] && lower != (unitlower & lower))
- return -1;
- unituse[unit] = 1;
- unitlower |= lower;
- }
- }
- if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
- return -1;
-
- /*
- * Assign resources and set multiplexer selects.
- *
- * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
- * Each TTMx can only select one unit, but since
- * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
- * we have some choices.
- */
- if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
- unituse[6] = 1; /* Move 2 to 6 */
- unituse[2] = 0;
- }
- if (unituse[3] & (unituse[1] | unituse[2])) {
- unituse[8] = 1; /* Move 3 to 8 */
- unituse[3] = 0;
- unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
- }
- /* Check only one unit per TTMx */
- if (unituse[1] + unituse[2] + unituse[3] > 1 ||
- unituse[4] + unituse[6] + unituse[7] > 1 ||
- unituse[8] + unituse[9] > 1 ||
- (unituse[5] | unituse[10] | unituse[11] |
- unituse[13] | unituse[14]))
- return -1;
-
- /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
- mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
- << MMCR1_TTM0SEL_SH;
- mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
- << MMCR1_TTM1SEL_SH;
- mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
-
- /* Set TTCxSEL fields. */
- if (unitlower & 0xe)
- mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
- if (unitlower & 0xf0)
- mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
- if (unitlower & 0xf00)
- mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
- if (unitlower & 0x7000)
- mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
-
- /* Set byte lane select fields. */
- for (byte = 0; byte < 4; ++byte) {
- unit = busbyte[byte];
- if (!unit)
- continue;
- if (unit == 0xf) {
- /* special case for GPS */
- mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
- } else {
- if (!unituse[unit])
- ttm = unit - 1; /* 2->1, 3->2 */
- else
- ttm = unit >> 2;
- mmcr1 |= (unsigned long)ttm
- << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
- }
- }
-
- /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- psel = event[i] & PM_PMCSEL_MSK;
- if (!pmc) {
- /* Bus event or 00xxx direct event (off or cycles) */
- if (unit)
- psel |= 0x10 | ((byte & 2) << 2);
- for (pmc = 0; pmc < 8; ++pmc) {
- if (pmc_inuse & (1 << pmc))
- continue;
- grp = (pmc >> 1) & 1;
- if (unit) {
- if (grp == (byte & 1))
- break;
- } else if (pmc_grp_use[grp] < 4) {
- ++pmc_grp_use[grp];
- break;
- }
- }
- pmc_inuse |= 1 << pmc;
- } else {
- /* Direct event */
- --pmc;
- if (psel == 0 && (byte & 2))
- /* add events on higher-numbered bus */
- mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
- else if (psel == 6 && byte == 3)
- /* seem to need to set sample_enable here */
- mmcra |= MMCRA_SAMPLE_ENABLE;
- psel |= 8;
- }
- if (pmc <= 1)
- mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
- else
- mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
- if (pmc == 7) /* PMC8 */
- mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
- hwc[i] = pmc;
- if (p4_marked_instr_event(event[i]))
- mmcra |= MMCRA_SAMPLE_ENABLE;
- }
-
- if (pmc_inuse & 1)
- mmcr0 |= MMCR0_PMC1CE;
- if (pmc_inuse & 0xfe)
- mmcr0 |= MMCR0_PMCjCE;
-
- mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
-
- /* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
- return 0;
-}
-
-static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
- /*
- * Setting the PMCxSEL field to 0 disables PMC x.
- * (Note that pmc is 0-based here, not 1-based.)
- */
- if (pmc <= 1) {
- mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
- } else {
- mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
- if (pmc == 7)
- mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
- }
-}
-
-static int p4_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = 7,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
- [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-/*
- * Table of generalized cache-related events.
- * 0 means not supported, -1 means nonsensical, other values
- * are event codes.
- */
-static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
- [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x8c10, 0x3c10 },
- [C(OP_WRITE)] = { 0x7c10, 0xc13 },
- [C(OP_PREFETCH)] = { 0xc35, 0 },
- },
- [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { 0, 0 },
- },
- [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { 0, 0 },
- [C(OP_PREFETCH)] = { 0xc34, 0 },
- },
- [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x904 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x900 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x330, 0x331 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { -1, -1 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
-};
-
-static struct power_pmu power4_pmu = {
- .name = "POWER4/4+",
- .n_counter = 8,
- .max_alternatives = 5,
- .add_fields = 0x0000001100005555ul,
- .test_adder = 0x0011083300000000ul,
- .compute_mmcr = p4_compute_mmcr,
- .get_constraint = p4_get_constraint,
- .get_alternatives = p4_get_alternatives,
- .disable_pmc = p4_disable_pmc,
- .n_generic = ARRAY_SIZE(p4_generic_events),
- .generic_events = p4_generic_events,
- .cache_events = &power4_cache_events,
- .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
-};
-
-static int __init init_power4_pmu(void)
-{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
- return -ENODEV;
-
- return register_power_pmu(&power4_pmu);
-}
-
-early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index e99c6bf..7de344b 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT, 0x10012)
EVENT(PM_BR_2PATH, 0x20036)
/* ALternate branch event that are not strongly biased */
EVENT(PM_BR_2PATH_ALT, 0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2, 0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056)
+EVENT(PM_FLOP_CMPL, 0x100f4)
+EVENT(PM_MRK_NTF_FIN, 0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024)
+EVENT(PM_IFETCH_THROTTLE, 0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c)
+EVENT(PM_DTLB_MISS_16G, 0x1c058)
+EVENT(PM_DERAT_MISS_2M, 0x1c05a)
+EVENT(PM_DTLB_MISS_2M, 0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c)
+EVENT(PM_DTLB_MISS_4K, 0x2c056)
+EVENT(PM_DERAT_MISS_1G, 0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e)
+EVENT(PM_DTLB_MISS_64K, 0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156)
+EVENT(PM_DTLB_MISS_16M, 0x4c056)
+EVENT(PM_DTLB_MISS_1G, 0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 24b5b5b..2ca0b33 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -101,9 +101,45 @@ enum {
#define POWER9_MMCRA_IFM2 0x0000000080000000UL
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS 0x00002000
+
/* PowerISA v2.07 format attribute structure*/
extern struct attribute_group isa207_pmu_format_group;
+int p9_dd21_bl_ev[] = {
+ PM_MRK_ST_DONE_L2,
+ PM_RADIX_PWC_L1_HIT,
+ PM_FLOP_CMPL,
+ PM_MRK_NTF_FIN,
+ PM_RADIX_PWC_L2_HIT,
+ PM_IFETCH_THROTTLE,
+ PM_MRK_L2_TM_ST_ABORT_SISTER,
+ PM_RADIX_PWC_L3_HIT,
+ PM_RUN_CYC_SMT2_MODE,
+ PM_TM_TX_PASS_RUN_INST,
+ PM_DISP_HELD_SYNC_HOLD,
+};
+
+int p9_dd22_bl_ev[] = {
+ PM_DTLB_MISS_16G,
+ PM_DERAT_MISS_2M,
+ PM_DTLB_MISS_2M,
+ PM_MRK_DTLB_MISS_1G,
+ PM_DTLB_MISS_4K,
+ PM_DERAT_MISS_1G,
+ PM_MRK_DERAT_MISS_2M,
+ PM_MRK_DTLB_MISS_4K,
+ PM_MRK_DTLB_MISS_16G,
+ PM_DTLB_MISS_64K,
+ PM_MRK_DERAT_MISS_1G,
+ PM_MRK_DTLB_MISS_64K,
+ PM_DISP_HELD_SYNC_HOLD,
+ PM_DTLB_MISS_16M,
+ PM_DTLB_MISS_1G,
+ PM_MRK_DTLB_MISS_16M,
+};
+
/* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
{ PM_INST_DISP, PM_INST_DISP_ALT },
@@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = {
static int __init init_power9_pmu(void)
{
int rc = 0;
+ unsigned int pvr = mfspr(SPRN_PVR);
/* Comes from cpu_specs[] */
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
return -ENODEV;
+ /* Blacklist events */
+ if (!(pvr & PVR_POWER9_CUMULUS)) {
+ if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+ power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+ } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+ power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+ }
+ }
+
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
/*
* Since PM_INST_CMPL may not provide right counts in all
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index d50417e..96aaae6 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
- msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
+ msi = kzalloc(sizeof(*msi), GFP_KERNEL);
if (!msi) {
dev_err(&dev->dev, "No memory for MSI structure\n");
return -ENOMEM;
@@ -241,7 +241,8 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
if (!msi_irqs)
return -ENODEV;
- if (ppc4xx_setup_pcieh_hw(dev, res, msi))
+ err = ppc4xx_setup_pcieh_hw(dev, res, msi);
+ if (err)
goto error_out;
err = ppc4xx_msi_init_allocator(dev, msi);
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37..69d9f60 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
if (IS_ERR_VALUE(offset))
continue;
- ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
+ ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
if (!ocm_blk) {
printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
rh_free(ocm_reg->rh, offset);
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index f51fd35..7e966f4 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -147,7 +147,7 @@ static void qoriq_cpu_kill(unsigned int cpu)
for (i = 0; i < 500; i++) {
if (is_cpu_dead(cpu)) {
#ifdef CONFIG_PPC64
- paca[cpu].cpu_start = 0;
+ paca_ptrs[cpu]->cpu_start = 0;
#endif
return;
}
@@ -328,7 +328,7 @@ static int smp_85xx_kick_cpu(int nr)
return ret;
done:
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
generic_set_cpu_up(nr);
return ret;
@@ -409,14 +409,14 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
}
if (disable_threadbit) {
- while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
now = mftb();
if (!notified && now - start > 1000000) {
pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
__func__, smp_processor_id(),
disable_cpu,
- paca[disable_cpu].kexec_state);
+ paca_ptrs[disable_cpu]->kexec_state);
notified = true;
}
}
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index e1274db..2188d69 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd)
static void cpm_cascade(struct irq_desc *desc)
{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- int cascade_irq = cpm_get_irq();
-
- if (cascade_irq >= 0)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
+ generic_handle_irq(cpm_get_irq());
}
/* Initialize the internal interrupt controllers. The number of
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a429d85..67d3125 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,7 +61,7 @@ choice
help
There are two families of 64 bit PowerPC chips supported.
The most common ones are the desktop and server CPUs
- (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+ (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
The other are the "embedded" processors compliant with the
"Book 3E" variant of the architecture
@@ -87,7 +87,6 @@ endchoice
choice
prompt "CPU selection"
depends on PPC64
- default POWER8_CPU if CPU_LITTLE_ENDIAN
default GENERIC_CPU
help
This will create a kernel which is optimised for a particular CPU.
@@ -96,17 +95,18 @@ choice
If unsure, select Generic.
config GENERIC_CPU
- bool "Generic"
+ bool "Generic (POWER4 and above)"
depends on !CPU_LITTLE_ENDIAN
+config GENERIC_CPU
+ bool "Generic (POWER8 and above)"
+ depends on CPU_LITTLE_ENDIAN
+ select ARCH_HAS_FAST_MULTIPLIER
+
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-config POWER4_CPU
- bool "POWER4"
- depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-
config POWER5_CPU
bool "POWER5"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
@@ -125,6 +125,11 @@ config POWER8_CPU
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+config POWER9_CPU
+ bool "POWER9"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+
config E5500_CPU
bool "Freescale e5500"
depends on E500
@@ -326,6 +331,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
default y if PPC_BOOK3S_64
+ default y if PPC_8xx && HUGETLB_PAGE
default n
config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 6ea3f24..326d34e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device)
pr_devel("axon_msi: setting up dn %pOF\n", dn);
- msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+ msic = kzalloc(sizeof(*msic), GFP_KERNEL);
if (!msic) {
printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
dn);
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f84d52a..1aeac57 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -83,7 +83,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
pcpu = get_hard_smp_processor_id(lcpu);
/* Fixup atomic count: it exited inside IRQ handler. */
- task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
/*
* If the RTAS start-cpu token does not exist then presume the
@@ -126,7 +126,7 @@ static int smp_cell_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
return 0;
}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index d1e61e2..1200d0d 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
np);
- priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
pr_err("SPIDERPCI-IOWA:"
"Can't allocate struct spiderpci_iowa_private");
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
index b847e94..d9de848 100644
--- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa)
struct spu_lscsa *lscsa;
unsigned char *p;
- lscsa = vzalloc(sizeof(struct spu_lscsa));
+ lscsa = vzalloc(sizeof(*lscsa));
if (!lscsa)
return -ENOMEM;
csa->lscsa = lscsa;
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index ade8382..7206f3f 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base)
out_be32(io_base + FLIPPER_ICR, 0xffffffff);
}
-struct irq_domain * __init flipper_pic_init(struct device_node *np)
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
{
struct device_node *pi;
struct irq_domain *irq_domain = NULL;
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
index 7feb325..5c7e7ce 100644
--- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -169,7 +169,7 @@ static int ug_getc(void)
/*
* Transmits a character.
*/
-void ug_udbg_putc(char ch)
+static void ug_udbg_putc(char ch)
{
ug_putc(ch);
}
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 3fd683e..8bb46dc 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -44,6 +44,7 @@
#define HW_GPIO_BASE(idx) (idx * 0x20)
#define HW_GPIO_OUT(idx) (HW_GPIO_BASE(idx) + 0)
#define HW_GPIO_DIR(idx) (HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER (HW_GPIO_BASE(1) + 0x1c)
#define HW_GPIO_SHUTDOWN (1<<1)
#define HW_GPIO_SLOT_LED (1<<5)
@@ -79,21 +80,9 @@ void __init wii_memory_fixups(void)
BUG_ON(memblock.memory.cnt != 2);
BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
- /* trim unaligned tail */
- memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
- (phys_addr_t)ULLONG_MAX);
-
- /* determine hole, add & reserve them */
+ /* determine hole */
wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
wii_hole_size = p[1].base - wii_hole_start;
- memblock_add(wii_hole_start, wii_hole_size);
- memblock_reserve(wii_hole_start, wii_hole_size);
-
- BUG_ON(memblock.memory.cnt != 1);
- __memblock_dump_all();
-
- /* allow ioremapping the address space in the hole */
- __allow_ioremap_reserved = 1;
}
unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
@@ -176,6 +165,12 @@ static void wii_power_off(void)
local_irq_disable();
if (hw_gpio) {
+ /*
+ * set the owner of the shutdown pin to ARM, because it is
+ * accessed through the registers for the ARM, below
+ */
+ clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
/* make sure that the poweroff GPIO is configured as output */
setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
@@ -239,7 +234,7 @@ static int __init wii_device_probe(void)
if (!machine_is(wii))
return 0;
- of_platform_bus_probe(NULL, wii_of_bus, NULL);
+ of_platform_populate(NULL, wii_of_bus, NULL, NULL);
return 0;
}
device_initcall(wii_device_probe);
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 3408f31..fa89f30 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
const u32 *psteps, *prate, *addrp;
u32 steps;
- host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
if (host == NULL) {
printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
np);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index df3c93b..e0462fe 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
while (length >= 12) {
/* Allocate a structure */
- func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL);
+ func = kzalloc(sizeof(*func), GFP_KERNEL);
if (func == NULL)
goto bail;
kref_init(&func->ref);
@@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np,
return -EBUSY;
}
- dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (dev == NULL) {
DBG("pmf: no memory !\n");
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6c9d519..703a350 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
-obj-$(CONFIG_PPC_FTW) += nx-ftw.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 33c86c1..ddfc354 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
dev_pe = dev_pe->parent;
while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE |
- EEH_STATE_DMA_ACTIVE);
-
ret = eeh_ops->get_state(dev_pe, NULL);
- if (ret <= 0 || (ret & active_flags) == active_flags) {
+ if (ret <= 0 || eeh_state_active(ret)) {
dev_pe = dev_pe->parent;
continue;
}
@@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
struct eeh_pe *phb_pe, *parent_pe;
__be64 frozen_pe_no;
__be16 err_type, severity;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
long rc;
int state, ret = EEH_NEXT_ERR_NONE;
@@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
/* Frozen parent PE ? */
state = eeh_ops->get_state(parent_pe, NULL);
- if (state > 0 &&
- (state & active_flags) != active_flags)
+ if (state > 0 && !eeh_state_active(state))
*pe = parent_pe;
/* Next parent level */
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 443d5ca..1f12ab1 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -24,6 +24,7 @@
#include <asm/code-patching.h>
#include <asm/smp.h>
#include <asm/runlatch.h>
+#include <asm/dbell.h>
#include "powernv.h"
#include "subcore.h"
@@ -80,7 +81,7 @@ static int pnv_save_sprs_for_deep_states(void)
for_each_possible_cpu(cpu) {
uint64_t pir = get_hard_smp_processor_id(cpu);
- uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+ uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
if (rc != 0)
@@ -173,12 +174,12 @@ static void pnv_alloc_idle_core_states(void)
for (j = 0; j < threads_per_core; j++) {
int cpu = first_cpu + j;
- paca[cpu].core_idle_state_ptr = core_idle_state;
- paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
- paca[cpu].thread_mask = 1 << j;
+ paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
+ paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
+ paca_ptrs[cpu]->thread_mask = 1 << j;
if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
continue;
- paca[cpu].thread_sibling_pacas =
+ paca_ptrs[cpu]->thread_sibling_pacas =
kmalloc_node(paca_ptr_array_size,
GFP_KERNEL, node);
}
@@ -387,6 +388,78 @@ void power9_idle(void)
power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+ int cpu, cpu0, thr;
+ int awake_threads = 1; /* this thread is awake */
+ int poke_threads = 0;
+ int need_awake = threads_per_core;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+ /* order setting dont_stop vs testing requested_psscr */
+ mb();
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (!paca_ptrs[cpu0+thr]->requested_psscr)
+ ++awake_threads;
+ else
+ poke_threads |= (1 << thr);
+ }
+
+ /* If at least 3 threads are awake, the core is in SMT4 already */
+ if (awake_threads < need_awake) {
+ /* We have to wake some threads; we'll use msgsnd */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (poke_threads & (1 << thr)) {
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+ paca_ptrs[cpu0+thr]->hw_cpu_id);
+ }
+ }
+ /* now spin until at least 3 threads are awake */
+ do {
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if ((poke_threads & (1 << thr)) &&
+ !paca_ptrs[cpu0+thr]->requested_psscr) {
+ ++awake_threads;
+ poke_threads &= ~(1 << thr);
+ }
+ }
+ } while (awake_threads < need_awake);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+ int cpu, cpu0, thr;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+
+ /* clear all the dont_stop flags */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
#ifdef CONFIG_HOTPLUG_CPU
static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
{
@@ -434,7 +507,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = mfspr(SPRN_PSSCR);
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
- srr1 = power9_idle_stop(psscr);
+ srr1 = power9_offline_stop(psscr);
} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
@@ -749,7 +822,8 @@ static int __init pnv_init_idle_states(void)
for (i = 0; i < threads_per_core; i++) {
int j = base_cpu + i;
- paca[j].thread_sibling_pacas[idx] = &paca[cpu];
+ paca_ptrs[j]->thread_sibling_pacas[idx] =
+ paca_ptrs[cpu];
}
}
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 0a253b6..69a4f9e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -410,6 +410,11 @@ struct npu_context {
void *priv;
};
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
/*
* Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
* if none are available.
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
int i;
for (i = 0; i < npu->mmio_atsd_count; i++) {
- if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
+ if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
return i;
}
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
static void put_mmio_atsd_reg(struct npu *npu, int reg)
{
- clear_bit(reg, &npu->mmio_atsd_usage);
+ clear_bit_unlock(reg, &npu->mmio_atsd_usage);
}
/* MMIO ATSD register offsets */
#define XTS_ATSD_AVA 1
#define XTS_ATSD_STAT 2
-static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
- unsigned long va)
+static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
+ unsigned long launch, unsigned long va)
{
- int mmio_atsd_reg;
-
- do {
- mmio_atsd_reg = get_mmio_atsd_reg(npu);
- cpu_relax();
- } while (mmio_atsd_reg < 0);
+ struct npu *npu = mmio_atsd_reg->npu;
+ int reg = mmio_atsd_reg->reg;
__raw_writeq(cpu_to_be64(va),
- npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
+ npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
eieio();
- __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
-
- return mmio_atsd_reg;
+ __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long pid, bool flush)
{
+ int i;
unsigned long launch;
- /* IS set to invalidate matching PID */
- launch = PPC_BIT(12);
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* IS set to invalidate matching PID */
+ launch = PPC_BIT(12);
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
+ /* PRS set to process-scoped */
+ launch |= PPC_BIT(13);
- /* AP */
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* AP */
+ launch |= (u64)
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
- /* Invalidating the entire process doesn't use a va */
- return mmio_launch_invalidate(npu, launch, 0);
+ /* Invalidating the entire process doesn't use a va */
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
+ }
}
-static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid, bool flush)
+static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long va, unsigned long pid, bool flush)
{
+ int i;
unsigned long launch;
- /* IS set to invalidate target VA */
- launch = 0;
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
- /* PRS set to process scoped */
- launch |= PPC_BIT(13);
+ /* IS set to invalidate target VA */
+ launch = 0;
- /* AP */
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* PRS set to process scoped */
+ launch |= PPC_BIT(13);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* AP */
+ launch |= (u64)
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- return mmio_launch_invalidate(npu, launch, va);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
+ }
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
-struct mmio_atsd_reg {
- struct npu *npu;
- int reg;
-};
-
static void mmio_invalidate_wait(
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
{
struct npu *npu;
int i, reg;
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
reg = mmio_atsd_reg[i].reg;
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
cpu_relax();
+ }
+}
- put_mmio_atsd_reg(npu, reg);
+/*
+ * Acquires all the address translation shootdown (ATSD) registers required to
+ * launch an ATSD on all links this npu_context is active on.
+ */
+static void acquire_atsd_reg(struct npu_context *npu_context,
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+ int i, j;
+ struct npu *npu;
+ struct pci_dev *npdev;
+ struct pnv_phb *nphb;
+ for (i = 0; i <= max_npu2_index; i++) {
+ mmio_atsd_reg[i].reg = -1;
+ for (j = 0; j < NV_MAX_LINKS; j++) {
+ /*
+ * There are no ordering requirements with respect to
+ * the setup of struct npu_context, but to ensure
+ * consistent behaviour we need to ensure npdev[][] is
+ * only read once.
+ */
+ npdev = READ_ONCE(npu_context->npdev[i][j]);
+ if (!npdev)
+ continue;
+
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
+ npu = &nphb->npu;
+ mmio_atsd_reg[i].npu = npu;
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+ while (mmio_atsd_reg[i].reg < 0) {
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+ cpu_relax();
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
+ * must be released in the same order they were acquired above in
+ * acquire_atsd_reg.
+ */
+static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+ int i;
+
+ for (i = 0; i <= max_npu2_index; i++) {
/*
- * The GPU requires two flush ATSDs to ensure all entries have
- * been flushed. We use PID 0 as it will never be used for a
- * process on the GPU.
+ * We can't rely on npu_context->npdev[][] being the same here
+ * as when acquire_atsd_reg() was called, hence we use the
+ * values stored in mmio_atsd_reg during the acquire phase
+ * rather than re-reading npdev[][].
*/
- if (flush)
- mmio_invalidate_pid(npu, 0, true);
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
}
}
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
static void mmio_invalidate(struct npu_context *npu_context, int va,
unsigned long address, bool flush)
{
- int i, j;
- struct npu *npu;
- struct pnv_phb *nphb;
- struct pci_dev *npdev;
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
* Loop over all the NPUs this process is active on and launch
* an invalidate.
*/
- for (i = 0; i <= max_npu2_index; i++) {
- mmio_atsd_reg[i].reg = -1;
- for (j = 0; j < NV_MAX_LINKS; j++) {
- npdev = npu_context->npdev[i][j];
- if (!npdev)
- continue;
-
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
- mmio_atsd_reg[i].npu = npu;
-
- if (va)
- mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid,
- flush);
- else
- mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid, flush);
-
- /*
- * The NPU hardware forwards the shootdown to all GPUs
- * so we only have to launch one shootdown per NPU.
- */
- break;
- }
+ acquire_atsd_reg(npu_context, mmio_atsd_reg);
+ if (va)
+ mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+ else
+ mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+
+ mmio_invalidate_wait(mmio_atsd_reg);
+ if (flush) {
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+ mmio_invalidate_wait(mmio_atsd_reg);
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+ mmio_invalidate_wait(mmio_atsd_reg);
}
-
- mmio_invalidate_wait(mmio_atsd_reg, flush);
- if (flush)
- /* Wait for the flush to complete */
- mmio_invalidate_wait(mmio_atsd_reg, false);
+ release_atsd_reg(mmio_atsd_reg);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
+ nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+ if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+ &nvlink_index)))
+ return ERR_PTR(-ENODEV);
+
if (!mm || mm->context.id == 0) {
/*
* Kernel thread contexts are not supported and context id 0 is
@@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
npu_context = mm->context.npu_context;
if (!npu_context) {
+ rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
- if (!npu_context)
- return ERR_PTR(-ENOMEM);
+ if (npu_context) {
+ kref_init(&npu_context->kref);
+ npu_context->mm = mm;
+ npu_context->mn.ops = &nv_nmmu_notifier_ops;
+ rc = __mmu_notifier_register(&npu_context->mn, mm);
+ }
+
+ if (rc) {
+ kfree(npu_context);
+ opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+ PCI_DEVID(gpdev->bus->number,
+ gpdev->devfn));
+ return ERR_PTR(rc);
+ }
mm->context.npu_context = npu_context;
- npu_context->mm = mm;
- npu_context->mn.ops = &nv_nmmu_notifier_ops;
- __mmu_notifier_register(&npu_context->mn, mm);
- kref_init(&npu_context->kref);
} else {
- kref_get(&npu_context->kref);
+ WARN_ON(!kref_get_unless_zero(&npu_context->kref));
}
npu_context->release_cb = cb;
npu_context->priv = priv;
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return ERR_PTR(-ENODEV);
- npu_context->npdev[npu->index][nvlink_index] = npdev;
+
+ /*
+ * npdev is a pci_dev pointer setup by the PCI code. We assign it to
+ * npdev[][] to indicate to the mmu notifiers that an invalidation
+ * should also be sent over this nvlink. The notifiers don't use any
+ * other fields in npu_context, so we just need to ensure that when they
+ * deference npu_context->npdev[][] it is either a valid pointer or
+ * NULL.
+ */
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
if (!nphb->npu.nmmu_flush) {
/*
@@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return;
- npu_context->npdev[npu->index][nvlink_index] = NULL;
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
kref_put(&npu_context->kref, pnv_npu2_release_context);
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 2fa3ac8..b370151 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -303,26 +303,9 @@ invalid_img:
return rc;
}
-/* Return CPUs to OPAL before starting FW update */
-static void flash_return_cpu(void *info)
-{
- int cpu = smp_processor_id();
-
- if (!cpu_online(cpu))
- return;
-
- /* Disable IRQ */
- hard_irq_disable();
-
- /* Return the CPU to OPAL */
- opal_return_cpu();
-}
-
/* This gets called just before system reboots */
-void opal_flash_term_callback(void)
+void opal_flash_update_print_message(void)
{
- struct cpumask mask;
-
if (update_flash_data.status != FLASH_IMG_READY)
return;
@@ -333,15 +316,6 @@ void opal_flash_term_callback(void)
/* Small delay to help getting the above message out */
msleep(500);
-
- /* Return secondary CPUs to firmware */
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
- if (!cpumask_empty(&mask))
- smp_call_function_many(&mask,
- flash_return_cpu, NULL, false);
- /* Hard disable interrupts */
- hard_irq_disable();
}
/*
@@ -418,12 +392,12 @@ static int alloc_image_buf(char *buffer, size_t count)
void *addr;
int size;
- if (count < sizeof(struct image_header_t)) {
+ if (count < sizeof(image_header)) {
pr_warn("FLASH: Invalid candidate image\n");
return -EINVAL;
}
- memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+ memcpy(&image_header, (void *)buffer, sizeof(image_header));
image_data.size = be32_to_cpu(image_header.size);
pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index c9e1a4f..4efc95b 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb,
pr_err("HMI: out of memory, Opal message event not handled\n");
return -ENOMEM;
}
- memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+ memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
list_add(&msg_node->list, &opal_hmi_evt_list);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index f6f55ab..2a14fda 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node,
if (nr_chips <= 0)
return -ENODEV;
- base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+ base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
if (!base_addr_arr)
return -ENOMEM;
- chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+ chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
if (!chipid_arr)
return -ENOMEM;
@@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node,
nr_chips))
goto error;
- pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
- GFP_KERNEL);
+ pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+ GFP_KERNEL);
if (!pmu_ptr->mem_info)
goto error;
@@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
u32 offset;
/* memory for pmu */
- pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
if (!pmu_ptr)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 8ddc1ac..dcb42bc 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb,
"handled\n");
return -ENOMEM;
}
- memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+ memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
spin_lock_irqsave(&opal_mem_err_lock, flags);
list_add(&msg_node->list, &opal_memory_err_list);
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 9db4398..ba2ff06 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -59,6 +59,10 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
}
+
+ if (rc)
+ return -EIO;
+
*index += count;
return count;
}
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index 7313b7f..74986b3 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -136,7 +136,7 @@ void __init opal_psr_init(void)
return;
}
- psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+ psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
GFP_KERNEL);
if (!psr_attrs)
return;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 7e5a235..541c9ea 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void)
if (!nr_attrs)
continue;
- sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+ sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
GFP_KERNEL);
if (!sgs[i].sgattrs)
goto out_sgs_sgattrs;
sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
- sizeof(struct attribute *),
+ sizeof(*sgs[i].sg.attrs),
GFP_KERNEL);
if (!sgs[i].sg.attrs) {
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 1b2936b..3da30c2 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 81c0a94..22d5e11 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
__func__, dev);
return SCOM_MAP_INVALID;
}
- m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+ m = kmalloc(sizeof(*m), GFP_KERNEL);
if (!m)
return NULL;
m->chip = be32_to_cpup(gcid);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c151827..516e23d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
* opal to trigger checkstop explicitly for error analysis.
* The FSP PRD component would have already got notified
* about this error through other channels.
+ * 4. We are running on a newer skiboot that by default does
+ * not cause a checkstop, drops us back to the kernel to
+ * extract context and state at the time of the error.
*/
- ppc_md.restart(NULL);
+ panic(msg);
}
int opal_machine_check(struct pt_regs *regs)
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a0..cee003d 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
#include "pci.h"
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
- return of_node_get(hose->dn);
-}
-EXPORT_SYMBOL(pnv_pci_get_phb_node);
-
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index a6c92c7..3f9c69d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe(
static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
int num, struct iommu_table *tbl)
{
+ struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+ int num2 = (num == 0) ? 1 : 0;
long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
if (ret)
return ret;
- ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
- if (ret)
+ if (table_group->tables[num2])
+ pnv_npu_unset_window(npe, num2);
+
+ ret = pnv_npu_set_window(npe, num, tbl);
+ if (ret) {
pnv_pci_ioda2_unset_window(table_group, num);
+ if (table_group->tables[num2])
+ pnv_npu_set_window(npe, num2,
+ table_group->tables[num2]);
+ }
return ret;
}
@@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window(
struct iommu_table_group *table_group,
int num)
{
+ struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+ int num2 = (num == 0) ? 1 : 0;
long ret = pnv_pci_ioda2_unset_window(table_group, num);
if (ret)
return ret;
- return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+ if (!npe->table_group.tables[num])
+ return 0;
+
+ ret = pnv_npu_unset_window(npe, num);
+ if (ret)
+ return ret;
+
+ if (table_group->tables[num2])
+ ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
+
+ return ret;
}
static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
@@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb_id = be64_to_cpup(prop64);
pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
- phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+ phb = memblock_virt_alloc(sizeof(*phb), 0);
/* Allocate PCI controller */
phb->hose = hose = pcibios_alloc_controller(np);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 69d102c..b265ecc 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/iommu.h>
+#include <linux/sched/mm.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -38,6 +39,7 @@
#include "pci.h"
static DEFINE_MUTEX(p2p_mutex);
+static DEFINE_MUTEX(tunnel_mutex);
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
{
@@ -1092,6 +1094,139 @@ out:
}
EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind)
+{
+ struct device_node *np;
+ const __be32 *prop;
+ struct pnv_ioda_pe *pe;
+ uint16_t window_id;
+ int rc;
+
+ if (!radix_enabled())
+ return -ENXIO;
+
+ if (!(np = pnv_pci_get_phb_node(dev)))
+ return -ENXIO;
+
+ prop = of_get_property(np, "ibm,phb-indications", NULL);
+ of_node_put(np);
+
+ if (!prop || !prop[1])
+ return -ENXIO;
+
+ *asnind = (u64)be32_to_cpu(prop[1]);
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ /* Increase real window size to accept as_notify messages. */
+ window_id = (pe->pe_number << 1 ) + 1;
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number,
+ window_id, pe->tce_bypass_base,
+ (uint64_t)1 << 48);
+ return opal_error_code(rc);
+}
+EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel);
+
+int pnv_pci_disable_tunnel(struct pci_dev *dev)
+{
+ struct pnv_ioda_pe *pe;
+
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ /* Restore default real window size. */
+ pnv_pci_ioda2_set_bypass(pe, true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+ __be64 val;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ u64 tunnel_bar;
+ int rc;
+
+ if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+ if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+
+ hose = pci_bus_to_host(dev->bus);
+ phb = hose->private_data;
+
+ mutex_lock(&tunnel_mutex);
+ rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+ tunnel_bar = be64_to_cpu(val);
+ if (enable) {
+ /*
+ * Only one device per PHB can use atomics.
+ * Our policy is first-come, first-served.
+ */
+ if (tunnel_bar) {
+ if (tunnel_bar != addr)
+ rc = -EBUSY;
+ else
+ rc = 0; /* Setting same address twice is ok */
+ goto out;
+ }
+ } else {
+ /*
+ * The device that owns atomics and wants to release
+ * them must pass the same address with enable == 0.
+ */
+ if (tunnel_bar != addr) {
+ rc = -EPERM;
+ goto out;
+ }
+ addr = 0x0ULL;
+ }
+ rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+ rc = opal_error_code(rc);
+out:
+ mutex_unlock(&tunnel_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+#ifdef CONFIG_PPC64 /* for thread.tidr */
+int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid,
+ u32 *tid)
+{
+ struct mm_struct *mm = NULL;
+
+ if (task == NULL)
+ return -EINVAL;
+
+ mm = get_task_mm(task);
+ if (mm == NULL)
+ return -EINVAL;
+
+ *pid = mm->context.id;
+ mmput(mm);
+
+ *tid = task->thread.tidr;
+ *lpid = mfspr(SPRN_LPID);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info);
+#endif
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 092715b..ef8c9ce 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -38,57 +38,92 @@
#include <asm/smp.h>
#include <asm/tm.h>
#include <asm/setup.h>
+#include <asm/security_features.h>
#include "powernv.h"
+
+static bool fw_feature_is(const char *state, const char *name,
+ struct device_node *fw_features)
+{
+ struct device_node *np;
+ bool rc = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ rc = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+
+ return rc;
+}
+
+static void init_fw_feat_flags(struct device_node *np)
+{
+ if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ */
+ if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+ if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
static void pnv_setup_rfi_flush(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
- int enable;
+ bool enable;
/* Default to fallback in case fw-features are not available */
type = L1D_FLUSH_FALLBACK;
- enable = 1;
np = of_find_node_by_name(NULL, "ibm,opal");
fw_features = of_get_child_by_name(np, "fw-features");
of_node_put(np);
if (fw_features) {
- np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
- if (np && of_property_read_bool(np, "enabled"))
- type = L1D_FLUSH_MTTRIG;
+ init_fw_feat_flags(fw_features);
+ of_node_put(fw_features);
- of_node_put(np);
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ type = L1D_FLUSH_MTTRIG;
- np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
- if (np && of_property_read_bool(np, "enabled"))
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
type = L1D_FLUSH_ORI;
-
- of_node_put(np);
-
- /* Enable unless firmware says NOT to */
- enable = 2;
- np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
- if (np && of_property_read_bool(np, "disabled"))
- enable--;
-
- of_node_put(np);
-
- np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
- if (np && of_property_read_bool(np, "disabled"))
- enable--;
-
- np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
- if (np && of_property_read_bool(np, "disabled"))
- enable = 0;
-
- of_node_put(np);
- of_node_put(fw_features);
}
- setup_rfi_flush(type, enable > 0);
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+ setup_rfi_flush(type, enable);
}
static void __init pnv_setup_arch(void)
@@ -166,17 +201,12 @@ static void pnv_prepare_going_down(void)
*/
opal_event_shutdown();
- /* Soft disable interrupts */
- local_irq_disable();
+ /* Print flash update message if one is scheduled. */
+ opal_flash_update_print_message();
- /*
- * Return secondary CPUs to firwmare if a flash update
- * is pending otherwise we will get all sort of error
- * messages about CPU being stuck etc.. This will also
- * have the side effect of hard disabling interrupts so
- * past this point, the kernel is effectively dead.
- */
- opal_flash_term_callback();
+ smp_send_stop();
+
+ hard_irq_disable();
}
static void __noreturn pnv_restart(char *cmd)
@@ -258,7 +288,7 @@ static void pnv_kexec_wait_secondaries_down(void)
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter OPAL\n",
- i, paca[i].hw_cpu_id);
+ i, paca_ptrs[i]->hw_cpu_id);
notified = i;
}
@@ -270,7 +300,7 @@ static void pnv_kexec_wait_secondaries_down(void)
if (timeout-- == 0) {
printk(KERN_ERR "kexec: timed out waiting for "
"cpu %d (physical %d) to enter OPAL\n",
- i, paca[i].hw_cpu_id);
+ i, paca_ptrs[i]->hw_cpu_id);
break;
}
}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 9664c84..19af6de 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -80,7 +80,7 @@ static int pnv_smp_kick_cpu(int nr)
* If we already started or OPAL is not supported, we just
* kick the CPU via the PACA
*/
- if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+ if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
goto kick;
/*
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 596ae2e..4556300 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -280,7 +280,7 @@ void update_subcore_sibling_mask(void)
int offset = (tid / threads_per_subcore) * threads_per_subcore;
int mask = sibling_mask_first_cpu << offset;
- paca[cpu].subcore_sibling_mask = mask;
+ paca_ptrs[cpu]->subcore_sibling_mask = mask;
}
}
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index ca22f1e..4f7276e 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window)
return;
-free_name:
- kfree(window->dbgname);
- window->dbgname = NULL;
-
remove_dir:
debugfs_remove_recursive(window->dbgdir);
window->dbgdir = NULL;
+
+free_name:
+ kfree(window->dbgname);
+ window->dbgname = NULL;
}
void vas_instance_init_dbgdir(struct vas_instance *vinst)
{
struct dentry *d;
+ vas_init_dbgdir();
if (!vas_debugfs)
return;
@@ -201,8 +202,18 @@ free_name:
vinst->dbgdir = NULL;
}
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
void vas_init_dbgdir(void)
{
+ static bool first_time = true;
+
+ if (!first_time)
+ return;
+
+ first_time = false;
vas_debugfs = debugfs_create_dir("vas", NULL);
if (IS_ERR(vas_debugfs))
vas_debugfs = NULL;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644
index 0000000..a449b9f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT( vas_rx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_rx_win_attr *rxattr),
+
+ TP_ARGS(tsk, vasid, cop, rxattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_rx_win_attr *, rxattr)
+ __field(int, lnotify_lpid)
+ __field(int, lnotify_pid)
+ __field(int, lnotify_tid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lnotify_lpid = rxattr->lnotify_lpid;
+ __entry->lnotify_pid = rxattr->lnotify_pid;
+ __entry->lnotify_tid = rxattr->lnotify_tid;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lnotify_lpid, __entry->lnotify_pid,
+ __entry->lnotify_tid)
+);
+
+TRACE_EVENT( vas_tx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_tx_win_attr *txattr),
+
+ TP_ARGS(tsk, vasid, cop, txattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_tx_win_attr *, txattr)
+ __field(int, lpid)
+ __field(int, pidr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lpid = txattr->lpid;
+ __entry->pidr = txattr->pidr;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT( vas_paste_crb,
+
+ TP_PROTO(struct task_struct *tsk,
+ struct vas_window *win),
+
+ TP_ARGS(tsk, win),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(struct vas_window *, win)
+ __field(int, pid)
+ __field(int, vasid)
+ __field(int, winid)
+ __field(unsigned long, paste_kaddr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = win->vinst->vas_id;
+ __entry->winid = win->winid;
+ __entry->paste_kaddr = (unsigned long)win->paste_kaddr
+ ),
+
+ TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+ __entry->pid, __entry->vasid, __entry->winid,
+ __entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index b7c53a5..ff9f488 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -21,6 +21,9 @@
#include "vas.h"
#include "copy-paste.h"
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
/*
* Compute the paste address region for the window @window using the
* ->paste_base_addr and ->paste_win_id_shift we got from device tree.
@@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
struct vas_winctx winctx;
struct vas_instance *vinst;
+ trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
if (!rx_win_args_valid(cop, rxattr))
return ERR_PTR(-EINVAL);
@@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
struct vas_winctx winctx;
struct vas_instance *vinst;
+ trace_vas_tx_win_open(current, vasid, cop, attr);
+
if (!tx_win_args_valid(cop, attr))
return ERR_PTR(-EINVAL);
@@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
void *addr;
uint64_t val;
+ trace_vas_paste_crb(current, txwin);
+
/*
* Only NX windows are supported for now and hardware assumes
* report-enable flag is set for NX windows. Ensure software
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index aebbe95..5a2b24c 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -160,8 +160,6 @@ static int __init vas_init(void)
int found = 0;
struct device_node *dn;
- vas_init_dbgdir();
-
platform_driver_register(&vas_driver);
for_each_compatible_node(dn, NULL, "ibm,vas") {
@@ -169,8 +167,10 @@ static int __init vas_init(void)
found++;
}
- if (!found)
+ if (!found) {
+ platform_driver_unregister(&vas_driver);
return -ENODEV;
+ }
pr_devel("Found %d instances\n", found);
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 7f870ec..8c7009d 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
int result;
struct dma_chunk *c;
- c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c) {
result = -ENOMEM;
goto fail_alloc;
@@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
- c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c) {
result = -ENOMEM;
goto fail_alloc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 652d3e96..6ef77ca 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -234,7 +234,7 @@ static void pseries_cpu_die(unsigned int cpu)
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
*/
- paca[cpu].cpu_start = 0;
+ paca_ptrs[cpu]->cpu_start = 0;
}
/*
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index eeb1342..3fe1267 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -23,7 +23,12 @@
void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
{
- /* Don't risk a hypervisor call if we're crashing */
+ /*
+ * Don't risk a hypervisor call if we're crashing
+ * XXX: Why? The hypervisor is not crashing. It might be better
+ * to at least attempt unregister to avoid the hypervisor stepping
+ * on our memory.
+ */
if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
int ret;
int cpu = smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0ee4a46..adb996e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -99,7 +99,7 @@ void vpa_init(int cpu)
* reports that. All SPLPAR support SLB shadow buffer.
*/
if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
- addr = __pa(paca[cpu].slb_shadow_ptr);
+ addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
ret = register_slb_shadow(hwcpu, addr);
if (ret)
pr_err("WARNING: SLB shadow buffer registration for "
@@ -111,7 +111,7 @@ void vpa_init(int cpu)
/*
* Register dispatch trace log, if one has been allocated.
*/
- pp = &paca[cpu];
+ pp = paca_ptrs[cpu];
dtl = pp->dispatch_log;
if (dtl) {
pp->dtl_ridx = 0;
@@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
want_v = hpte_encode_avpn(vpn, psize, ssize);
- pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
- want_v, slot, flags, psize);
-
flags = (newpp & 7) | H_AVPN;
if (mmu_has_feature(MMU_FTR_KERNEL_RO))
/* Move pp0 into bit 8 (IBM 55) */
flags |= (newpp & HPTE_R_PP0) >> 55;
+ pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+ want_v, slot, flags, psize);
+
lpar_rc = plpar_pte_protect(flags, slot, want_v);
if (lpar_rc == H_NOT_FOUND) {
@@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return 0;
}
-/* Actually only used for radix, so far */
static int pseries_lpar_register_process_table(unsigned long base,
unsigned long page_size, unsigned long table_size)
{
long rc;
- unsigned long flags = PROC_TABLE_NEW;
+ unsigned long flags = 0;
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
if (radix_enabled())
flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
+ else
+ flags |= PROC_TABLE_HPT_SLB;
for (;;) {
rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
page_size, table_size);
@@ -760,6 +763,7 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+ register_process_table = pseries_lpar_register_process_table;
if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 0f7fb71..8a8033a 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -348,6 +348,9 @@ void post_mobility_fixup(void)
printk(KERN_ERR "Post-mobility device tree update "
"failed: %d\n", rc);
+ /* Possibly switch to a new RFI flush type */
+ pseries_setup_rfi_flush();
+
return;
}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1ae1d9f..60db2ee 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs);
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
#else
static inline void smp_init_pseries(void) { };
#endif
@@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
+void pseries_setup_rfi_flush(void);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1a52762..b55ad42 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -68,6 +68,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/kexec.h>
#include <asm/isa-bridge.h>
+#include <asm/security_features.h>
#include "pseries.h"
@@ -246,7 +247,7 @@ static int alloc_dispatch_logs(void)
return 0;
for_each_possible_cpu(cpu) {
- pp = &paca[cpu];
+ pp = paca_ptrs[cpu];
dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
if (!dtl) {
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
@@ -459,36 +460,78 @@ static void __init find_and_init_phbs(void)
of_pci_check_probe_only();
}
-static void pseries_setup_rfi_flush(void)
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+ /*
+ * The features below are disabled by default, so we instead look to see
+ * if firmware has *enabled* them, and set them if so.
+ */
+ if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ */
+ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+ if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_rfi_flush(void)
{
struct h_cpu_char_result result;
enum l1d_flush_type types;
bool enable;
long rc;
- /* Enable by default */
- enable = true;
+ /*
+ * Set features to the defaults assumed by init_cpu_char_feature_flags()
+ * so it can set/clear again any features that might have changed after
+ * migration, and in case the hypercall fails and it is not even called.
+ */
+ powerpc_security_features = SEC_FTR_DEFAULT;
rc = plpar_get_cpu_characteristics(&result);
- if (rc == H_SUCCESS) {
- types = L1D_FLUSH_NONE;
+ if (rc == H_SUCCESS)
+ init_cpu_char_feature_flags(&result);
- if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
- types |= L1D_FLUSH_MTTRIG;
- if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
- types |= L1D_FLUSH_ORI;
+ /*
+ * We're the guest so this doesn't apply to us, clear it to simplify
+ * handling of it elsewhere.
+ */
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
- /* Use fallback if nothing set in hcall */
- if (types == L1D_FLUSH_NONE)
- types = L1D_FLUSH_FALLBACK;
+ types = L1D_FLUSH_FALLBACK;
- if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
- (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
- enable = false;
- } else {
- /* Default to fallback if case hcall is not available */
- types = L1D_FLUSH_FALLBACK;
- }
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ types |= L1D_FLUSH_MTTRIG;
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+ types |= L1D_FLUSH_ORI;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
setup_rfi_flush(types, enable);
}
@@ -739,7 +782,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
/* PAPR says we can't set HYP */
dawrx &= ~DAWRX_HYP;
- return plapr_set_watchpoint0(dawr, dawrx);
+ return plpar_set_watchpoint0(dawr, dawrx);
}
#define CMO_CHARACTERISTICS_TOKEN 44
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 2e18482..3df4612 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -110,7 +110,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
}
/* Fixup atomic count: it exited inside IRQ handler. */
- task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
#ifdef CONFIG_HOTPLUG_CPU
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
goto out;
@@ -165,7 +165,7 @@ static int smp_pSeries_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- paca[nr].cpu_start = 1;
+ paca_ptrs[nr]->cpu_start = 1;
#ifdef CONFIG_HOTPLUG_CPU
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
@@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu)
hwcpu = get_hard_smp_processor_id(cpu);
}
- if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS)
+ if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
return 1;
return 0;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7306780..1d4e0ef6 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
int i;
u32 mask = 0;
- for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+ for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
return mask;
}
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 1459f4e..37bfbc5 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -164,7 +164,7 @@ void icp_native_cause_ipi_rm(int cpu)
* Just like the cause_ipi functions, it is required to
* include a full barrier before causing the IPI.
*/
- xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
mb();
__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 40c0611..34590150 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -246,7 +246,7 @@ notrace void xmon_xive_do_dump(int cpu)
u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
val & XIVE_ESB_VAL_P ? 'P' : 'p',
- val & XIVE_ESB_VAL_P ? 'Q' : 'q');
+ val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
}
#endif
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 82e1a3e..a0842f1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -41,6 +41,7 @@
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include <asm/plpar_wrappers.h>
#include <asm/cputable.h>
#include <asm/rtas.h>
#include <asm/sstep.h>
@@ -61,12 +62,6 @@
#include <asm/paca.h>
#endif
-#if defined(CONFIG_PPC_SPLPAR)
-#include <asm/plpar_wrappers.h>
-#else
-static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; };
-#endif
-
#include "nonstdio.h"
#include "dis-asm.h"
@@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr)
mtspr(SPRN_CIABR, ciabr);
return;
}
- plapr_set_ciabr(ciabr);
+ plpar_set_ciabr(ciabr);
}
/**
@@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr)
return 1;
}
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+ /* Enable xmon hooks if needed */
+ if (!xmon_on) {
+ printf("xmon: Enabling debugger hooks\n");
+ xmon_on = 1;
+ }
+}
+
static char *breakpoint_help_string =
"Breakpoint command usage:\n"
"b show breakpoints\n"
@@ -1297,6 +1302,10 @@ bpt_cmds(void)
static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
int mode;
case 'd': /* bd - hardware data breakpoint */
+ if (!ppc_breakpoint_available()) {
+ printf("Hardware data breakpoint not supported on this cpu\n");
+ break;
+ }
mode = 7;
cmd = inchar();
if (cmd == 'r')
@@ -1315,6 +1324,8 @@ bpt_cmds(void)
dabr.address &= ~HW_BRK_TYPE_DABR;
dabr.enabled = mode | BP_DABR;
}
+
+ force_enable_xmon();
break;
case 'i': /* bi - hardware instr breakpoint */
@@ -1335,6 +1346,7 @@ bpt_cmds(void)
if (bp != NULL) {
bp->enabled |= BP_CIABR;
iabr = bp;
+ force_enable_xmon();
}
break;
#endif
@@ -1399,8 +1411,10 @@ bpt_cmds(void)
if (!check_bp_loc(a))
break;
bp = new_breakpoint(a);
- if (bp != NULL)
+ if (bp != NULL) {
bp->enabled |= BP_TRAP;
+ force_enable_xmon();
+ }
break;
}
}
@@ -2327,7 +2341,7 @@ static void dump_one_paca(int cpu)
catch_memory_errors = 1;
sync();
- p = &paca[cpu];
+ p = paca_ptrs[cpu];
printf("paca for cpu 0x%x @ %px:\n", cpu, p);
@@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq);
#endif /* CONFIG_MAGIC_SYSRQ */
#ifdef CONFIG_DEBUG_FS
+static void clear_all_bpt(void)
+{
+ int i;
+
+ /* clear/unpatch all breakpoints */
+ remove_bpts();
+ remove_cpu_bpts();
+
+ /* Disable all breakpoints */
+ for (i = 0; i < NBPTS; ++i)
+ bpts[i].enabled = 0;
+
+ /* Clear any data or iabr breakpoints */
+ if (iabr || dabr.enabled) {
+ iabr = NULL;
+ dabr.enabled = 0;
+ }
+
+ printf("xmon: All breakpoints cleared\n");
+}
+
static int xmon_dbgfs_set(void *data, u64 val)
{
xmon_on = !!val;
xmon_init(xmon_on);
+ /* make sure all breakpoints removed when disabling */
+ if (!xmon_on)
+ clear_all_bpt();
return 0;
}
diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c
index 15db69d..ca623e6 100644
--- a/drivers/macintosh/adb-iop.c
+++ b/drivers/macintosh/adb-iop.c
@@ -53,13 +53,13 @@ static void adb_iop_poll(void);
static int adb_iop_reset_bus(void);
struct adb_driver adb_iop_driver = {
- "ISM IOP",
- adb_iop_probe,
- adb_iop_init,
- adb_iop_send_request,
- adb_iop_autopoll,
- adb_iop_poll,
- adb_iop_reset_bus
+ .name = "ISM IOP",
+ .probe = adb_iop_probe,
+ .init = adb_iop_init,
+ .send_request = adb_iop_send_request,
+ .autopoll = adb_iop_autopoll,
+ .poll = adb_iop_poll,
+ .reset_bus = adb_iop_reset_bus
};
static void adb_iop_end_req(struct adb_request *req, int state)
diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c
index 1de81d9..c8e078b 100644
--- a/drivers/macintosh/ans-lcd.c
+++ b/drivers/macintosh/ans-lcd.c
@@ -201,3 +201,4 @@ anslcd_exit(void)
module_init(anslcd_init);
module_exit(anslcd_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 9a6223a..eb3adfb 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -70,14 +70,13 @@ static void macio_adb_poll(void);
static int macio_adb_reset_bus(void);
struct adb_driver macio_adb_driver = {
- "MACIO",
- macio_probe,
- macio_init,
- macio_send_request,
- /*macio_write,*/
- macio_adb_autopoll,
- macio_adb_poll,
- macio_adb_reset_bus
+ .name = "MACIO",
+ .probe = macio_probe,
+ .init = macio_init,
+ .send_request = macio_send_request,
+ .autopoll = macio_adb_autopoll,
+ .poll = macio_adb_poll,
+ .reset_bus = macio_adb_reset_bus,
};
int macio_probe(void)
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 910b5b6..1f29d24 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -154,8 +154,8 @@ static void rackmeter_do_pause(struct rackmeter *rm, int pause)
DBDMA_DO_STOP(rm->dma_regs);
return;
}
- memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1));
- memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2));
+ memset(rdma->buf1, 0, sizeof(rdma->buf1));
+ memset(rdma->buf2, 0, sizeof(rdma->buf2));
rm->dma_buf_v->mark = 0;
@@ -397,7 +397,7 @@ static int rackmeter_probe(struct macio_dev* mdev,
}
/* Create and initialize our instance data */
- rm = kzalloc(sizeof(struct rackmeter), GFP_KERNEL);
+ rm = kzalloc(sizeof(*rm), GFP_KERNEL);
if (rm == NULL) {
printk(KERN_ERR "rackmeter: failed to allocate memory !\n");
rc = -ENOMEM;
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 4ba06a1..cf6f7d5 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -91,13 +91,13 @@ static void macii_poll(void);
static int macii_reset_bus(void);
struct adb_driver via_macii_driver = {
- "Mac II",
- macii_probe,
- macii_init,
- macii_send_request,
- macii_autopoll,
- macii_poll,
- macii_reset_bus
+ .name = "Mac II",
+ .probe = macii_probe,
+ .init = macii_init,
+ .send_request = macii_send_request,
+ .autopoll = macii_autopoll,
+ .poll = macii_poll,
+ .reset_bus = macii_reset_bus,
};
static enum macii_state {
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 94c0f3f..433dbed 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -198,14 +198,14 @@ static const struct file_operations pmu_battery_proc_fops;
static const struct file_operations pmu_options_proc_fops;
#ifdef CONFIG_ADB
-struct adb_driver via_pmu_driver = {
- "PMU",
- pmu_probe,
- pmu_init,
- pmu_send_request,
- pmu_adb_autopoll,
- pmu_poll_adb,
- pmu_adb_reset_bus
+const struct adb_driver via_pmu_driver = {
+ .name = "PMU",
+ .probe = pmu_probe,
+ .init = pmu_init,
+ .send_request = pmu_send_request,
+ .autopoll = pmu_adb_autopoll,
+ .poll = pmu_poll_adb,
+ .reset_bus = pmu_adb_reset_bus,
};
#endif /* CONFIG_ADB */
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index 7d9c4ba..d545ed4 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -120,13 +120,13 @@ static void pmu_enable_backlight(int on);
static void pmu_set_brightness(int level);
struct adb_driver via_pmu_driver = {
- "68K PMU",
- pmu_probe,
- pmu_init,
- pmu_send_request,
- pmu_autopoll,
- pmu_poll,
- pmu_reset_bus
+ .name = "68K PMU",
+ .probe = pmu_probe,
+ .init = pmu_init,
+ .send_request = pmu_send_request,
+ .autopoll = pmu_autopoll,
+ .poll = pmu_poll,
+ .reset_bus = pmu_reset_bus,
};
/*
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 4f015da..a4c9c82 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -369,6 +369,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */
+/****** CXL_PSL_DEBUG *****************************************************/
+#define CXL_PSL_DEBUG_CDC (1ull << (63-27)) /* Coherent Data cache support */
+
/****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/
#define CXL_XSL9_IERAT_MLPID (1ull << (63-0)) /* Match LPID */
#define CXL_XSL9_IERAT_MPID (1ull << (63-1)) /* Match PID */
@@ -669,6 +672,7 @@ struct cxl_native {
irq_hw_number_t err_hwirq;
unsigned int err_virq;
u64 ps_off;
+ bool no_data_cache; /* set if no data cache on the card */
const struct cxl_service_layer_ops *sl_ops;
};
@@ -1065,7 +1069,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
u32 *phb_index, u64 *capp_unit_id);
int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index 30ccba4..0bc7c31 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
if (rc)
return rc;
- rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl);
+ rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
@@ -208,49 +208,74 @@ int cxllib_get_PE_attributes(struct task_struct *task,
}
EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
-int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+ u64 *vma_start, u64 *vma_end,
+ unsigned long *page_size)
{
- int rc;
- u64 dar;
struct vm_area_struct *vma = NULL;
- unsigned long page_size;
-
- if (mm == NULL)
- return -EFAULT;
+ int rc = 0;
down_read(&mm->mmap_sem);
vma = find_vma(mm, addr);
if (!vma) {
- pr_err("Can't find vma for addr %016llx\n", addr);
rc = -EFAULT;
goto out;
}
- /* get the size of the pages allocated */
- page_size = vma_kernel_pagesize(vma);
-
- for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) {
- if (dar < vma->vm_start || dar >= vma->vm_end) {
- vma = find_vma(mm, addr);
- if (!vma) {
- pr_err("Can't find vma for addr %016llx\n", addr);
- rc = -EFAULT;
- goto out;
- }
- /* get the size of the pages allocated */
- page_size = vma_kernel_pagesize(vma);
+ *page_size = vma_kernel_pagesize(vma);
+ *vma_start = vma->vm_start;
+ *vma_end = vma->vm_end;
+out:
+ up_read(&mm->mmap_sem);
+ return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+ int rc;
+ u64 dar, vma_start, vma_end;
+ unsigned long page_size;
+
+ if (mm == NULL)
+ return -EFAULT;
+
+ /*
+ * The buffer we have to process can extend over several pages
+ * and may also cover several VMAs.
+ * We iterate over all the pages. The page size could vary
+ * between VMAs.
+ */
+ rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+ if (rc)
+ return rc;
+
+ for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+ dar += page_size) {
+ if (dar < vma_start || dar >= vma_end) {
+ /*
+ * We don't hold the mm->mmap_sem semaphore
+ * while iterating, since the semaphore is
+ * required by one of the lower-level page
+ * fault processing functions and it could
+ * create a deadlock.
+ *
+ * It means the VMAs can be altered between 2
+ * loop iterations and we could theoretically
+ * miss a page (however unlikely). But that's
+ * not really a problem, as the driver will
+ * retry access, get another page fault on the
+ * missing page and call us again.
+ */
+ rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+ &page_size);
+ if (rc)
+ return rc;
}
rc = cxl_handle_mm_fault(mm, flags, dar);
- if (rc) {
- pr_err("cxl_handle_mm_fault failed %d", rc);
- rc = -EFAULT;
- goto out;
- }
+ if (rc)
+ return -EFAULT;
}
- rc = 0;
-out:
- up_read(&mm->mmap_sem);
- return rc;
+ return 0;
}
EXPORT_SYMBOL_GPL(cxllib_handle_fault);
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 1b3d7c6..98f867f 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -353,8 +353,17 @@ int cxl_data_cache_flush(struct cxl *adapter)
u64 reg;
unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
- pr_devel("Flushing data cache\n");
+ /*
+ * Do a datacache flush only if datacache is available.
+ * In case of PSL9D datacache absent hence flush operation.
+ * would timeout.
+ */
+ if (adapter->native->no_data_cache) {
+ pr_devel("No PSL data cache. Ignoring cache flush req.\n");
+ return 0;
+ }
+ pr_devel("Flushing data cache\n");
reg = cxl_p1_read(adapter, CXL_PSL_Control);
reg |= CXL_PSL_Control_Fr;
cxl_p1_write(adapter, CXL_PSL_Control, reg);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 758842f..83f1d08 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -407,21 +407,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
return 0;
}
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+ u64 *nbwind)
+{
+ static u64 nbw, asn, capi = 0;
+ struct device_node *np;
+ const __be32 *prop;
+
+ mutex_lock(&indications_mutex);
+ if (!capi) {
+ if (!(np = pnv_pci_get_phb_node(dev))) {
+ mutex_unlock(&indications_mutex);
+ return -ENODEV;
+ }
+
+ prop = of_get_property(np, "ibm,phb-indications", NULL);
+ if (!prop) {
+ nbw = 0x0300UL; /* legacy values */
+ asn = 0x0400UL;
+ capi = 0x0200UL;
+ } else {
+ nbw = (u64)be32_to_cpu(prop[2]);
+ asn = (u64)be32_to_cpu(prop[1]);
+ capi = (u64)be32_to_cpu(prop[0]);
+ }
+ of_node_put(np);
+ }
+ *capiind = capi;
+ *asnind = asn;
+ *nbwind = nbw;
+ mutex_unlock(&indications_mutex);
+ return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
{
u64 xsl_dsnctl;
+ u64 capiind, asnind, nbwind;
/*
* CAPI Identifier bits [0:7]
* bit 61:60 MSI bits --> 0
* bit 59 TVT selector --> 0
*/
+ if (get_phb_indications(dev, &capiind, &asnind, &nbwind))
+ return -ENODEV;
/*
* Tell XSL where to route data to.
* The field chipid should match the PHB CAPI_CMPM register
*/
- xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+ xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
/* nMMU_ID Defaults to: b’000001001’*/
@@ -435,14 +473,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
* nbwind=0x03, bits [57:58], must include capi indicator.
* Not supported on P9 DD1.
*/
- xsl_dsnctl |= ((u64)0x03 << (63-47));
+ xsl_dsnctl |= (nbwind << (63-55));
/*
* Upper 16b address bits of ASB_Notify messages sent to the
* system. Need to match the PHB’s ASN Compare/Mask Register.
* Not supported on P9 DD1.
*/
- xsl_dsnctl |= ((u64)0x04 << (63-55));
+ xsl_dsnctl |= asnind;
}
*reg = xsl_dsnctl;
@@ -456,13 +494,14 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
u64 chipid;
u32 phb_index;
u64 capp_unit_id;
+ u64 psl_debug;
int rc;
rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
if (rc)
return rc;
- rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl);
+ rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl);
if (rc)
return rc;
@@ -503,8 +542,22 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
if (cxl_is_power9_dd1()) {
/* Disabling deadlock counter CAR */
cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
- } else
- cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL);
+ /* Enable NORST */
+ cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
+ } else {
+ /* Enable NORST and DD2 features */
+ cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
+ }
+
+ /*
+ * Check if PSL has data-cache. We need to flush adapter datacache
+ * when as its about to be removed.
+ */
+ psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG);
+ if (psl_debug & CXL_PSL_DEBUG_CDC) {
+ dev_dbg(&dev->dev, "No data-cache present\n");
+ adapter->native->no_data_cache = true;
+ }
return 0;
}
@@ -568,12 +621,6 @@ static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_
/* For the PSL this is a multiple for 0 < n <= 7: */
#define PSL_2048_250MHZ_CYCLES 1
-static void write_timebase_ctrl_psl9(struct cxl *adapter)
-{
- cxl_p1_write(adapter, CXL_PSL9_TB_CTLSTAT,
- TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
-}
-
static void write_timebase_ctrl_psl8(struct cxl *adapter)
{
cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
@@ -612,9 +659,6 @@ static u64 timebase_read_xsl(struct cxl *adapter)
static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
{
- u64 psl_tb;
- int delta;
- unsigned int retry = 0;
struct device_node *np;
adapter->psl_timebase_synced = false;
@@ -635,26 +679,13 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
* Setup PSL Timebase Control and Status register
* with the recommended Timebase Sync Count value
*/
- adapter->native->sl_ops->write_timebase_ctrl(adapter);
+ if (adapter->native->sl_ops->write_timebase_ctrl)
+ adapter->native->sl_ops->write_timebase_ctrl(adapter);
/* Enable PSL Timebase */
cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
- /* Wait until CORE TB and PSL TB difference <= 16usecs */
- do {
- msleep(1);
- if (retry++ > 5) {
- dev_info(&dev->dev, "PSL timebase can't synchronize\n");
- return;
- }
- psl_tb = adapter->native->sl_ops->timebase_read(adapter);
- delta = mftb() - psl_tb;
- if (delta < 0)
- delta = -delta;
- } while (tb_to_ns(delta) > 16000);
-
- adapter->psl_timebase_synced = true;
return;
}
@@ -1449,10 +1480,8 @@ int cxl_pci_reset(struct cxl *adapter)
/*
* The adapter is about to be reset, so ignore errors.
- * Not supported on P9 DD1
*/
- if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
- cxl_data_cache_flush(adapter);
+ cxl_data_cache_flush(adapter);
/* pcie_warm_reset requests a fundamental pci reset which includes a
* PERST assert/deassert. PERST triggers a loading of the image
@@ -1801,7 +1830,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
.debugfs_stop_trace = cxl_stop_trace_psl9,
- .write_timebase_ctrl = write_timebase_ctrl_psl9,
.timebase_read = timebase_read_psl9,
.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
.needs_reset_before_disable = true,
@@ -1936,10 +1964,8 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
/*
* Flush adapter datacache as its about to be removed.
- * Not supported on P9 DD1.
*/
- if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
- cxl_data_cache_flush(adapter);
+ cxl_data_cache_flush(adapter);
cxl_deconfigure_adapter(adapter);
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index a8b6d6a..95285b7 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -62,7 +62,19 @@ static ssize_t psl_timebase_synced_show(struct device *device,
char *buf)
{
struct cxl *adapter = to_cxl_adapter(device);
+ u64 psl_tb, delta;
+ /* Recompute the status only in native mode */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ psl_tb = adapter->native->sl_ops->timebase_read(adapter);
+ delta = abs(mftb() - psl_tb);
+
+ /* CORE TB and PSL TB difference <= 16usecs ? */
+ adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false;
+ pr_devel("PSL timebase %s - delta: 0x%016llx\n",
+ (tb_to_ns(delta) < 16000) ? "synchronized" :
+ "not synchronized", tb_to_ns(delta));
+ }
return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
}
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 23da304..d441006 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -919,8 +919,8 @@ static void pnv_php_unregister_one(struct device_node *dn)
return;
php_slot->state = PNV_PHP_STATE_OFFLINE;
- pnv_php_put_slot(php_slot);
pci_hp_deregister(&php_slot->slot);
+ pnv_php_put_slot(php_slot);
}
static void pnv_php_unregister(struct device_node *dn)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 0257aee..ca59883 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -318,6 +318,9 @@ static inline bool memblock_bottom_up(void)
phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
phys_addr_t start, phys_addr_t end,
ulong flags);
+phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
+ phys_addr_t align, phys_addr_t max_addr,
+ int nid, ulong flags);
phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a366cc3..ea85052 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -106,6 +106,10 @@ extern const struct raid6_calls raid6_avx512x1;
extern const struct raid6_calls raid6_avx512x2;
extern const struct raid6_calls raid6_avx512x4;
extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb..3de0d89 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
tables.c
neon?.c
s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 44d6b46..2f8b61d 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index c65aa80..5065b1e 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
+ &raid6_vpermxor1,
+ &raid6_vpermxor2,
+ &raid6_vpermxor4,
+ &raid6_vpermxor8,
#endif
#if defined(CONFIG_S390)
&raid6_s390vx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index fabc477..5d73f5c 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
@@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+vpermxor1.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@@ -117,7 +131,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
spotless: clean
rm -f *~
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..10475dc
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+ 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+ 0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+ 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+ 0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+ void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+ unative_t wp$$, wq$$, wd$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += NSIZE*$#) {
+ wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+ for (z = z0-1; z>=0; z--) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ /* P syndrome */
+ wp$$ = vec_xor(wp$$, wd$$);
+
+ /* Q syndrome */
+ asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+ wq$$ = vec_xor(wq$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+ /* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+ return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+ return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+ raid6_vpermxor$#_gen_syndrome,
+ NULL,
+ raid6_have_altivec_vpermxor,
+ "vpermxor$#",
+ 0
+};
+#endif
diff --git a/mm/memblock.c b/mm/memblock.c
index 9b04568..5108356 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1163,7 +1163,7 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
flags);
}
-static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t max_addr,
int nid, ulong flags)
{
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 04dc1e6..9161679 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,5 +1,7 @@
gettimeofday
context_switch
+fork
+exec_target
mmap_bench
futex_bench
null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a35058e..b4d7432 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
CFLAGS += -O2
@@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c
$(OUTPUT)/context_switch: ../utils.c
$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 0000000..3c9c144
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+void _exit(int);
+void _start(void)
+{
+ _exit(0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 0000000..d312e63
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ perror("sched_setaffinity");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ set_cpu(cpu);
+
+ fn(arg);
+
+ exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+ char *const argv[] = { "./exec_target", NULL };
+
+ if (execve("./exec_target", argv, NULL) == -1) {
+ perror("execve");
+ exit(1);
+ }
+}
+
+static void bench_fork(void)
+{
+ while (1) {
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void bench_vfork(void)
+{
+ while (1) {
+ pid_t pid = vfork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void *null_fn(void *arg)
+{
+ pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ if (cpu != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+ }
+
+ while (1) {
+ rc = pthread_create(&tid, &attr, null_fn, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+ rc = pthread_join(tid, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_join");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ if (do_fork)
+ bench_fork();
+ else if (do_vfork)
+ bench_vfork();
+ else
+ bench_thread();
+
+ return NULL;
+}
+
+static struct option options[] = {
+ { "fork", no_argument, &do_fork, 1 },
+ { "vfork", no_argument, &do_vfork, 1 },
+ { "exec", no_argument, &do_exec, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "exec-target", no_argument, &exec_target, 1 },
+ { NULL },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fork <options> CPU\n\n");
+ fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+ fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+ fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (do_fork && do_vfork) {
+ usage();
+ exit(1);
+ }
+ if (do_exec && !do_fork && !do_vfork) {
+ usage();
+ exit(1);
+ }
+
+ if (do_exec) {
+ char *dirname = strdup(argv[0]);
+ int i;
+ i = strlen(dirname) - 1;
+ while (i) {
+ if (dirname[i] == '/') {
+ dirname[i] = '\0';
+ if (chdir(dirname) == -1) {
+ perror("chdir");
+ exit(1);
+ }
+ break;
+ }
+ i--;
+ }
+ }
+
+ if (exec_target) {
+ exit(0);
+ }
+
+ if (((argc - optind) != 1)) {
+ cpu = -1;
+ } else {
+ cpu = atoi(argv[optind++]);
+ }
+
+ if (do_exec)
+ exec_file = argv[0];
+
+ set_cpu(cpu);
+
+ printf("Using ");
+ if (do_fork)
+ printf("fork");
+ else if (do_vfork)
+ printf("vfork");
+ else
+ printf("clone");
+
+ if (do_exec)
+ printf(" + exec");
+
+ printf(" on cpu %d\n", cpu);
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ start_process_on(bench_proc, NULL, cpu);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index ac4a52e..eedce33 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR)
CFLAGS += -D SELFTEST
CFLAGS += -maltivec
-# Use our CFLAGS for the implicit .S rule
-ASFLAGS = $(CFLAGS)
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
EXTRA_SOURCES := validate.c ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5c72ff9..c0e45d2 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
- $(SIGNAL_CONTEXT_CHK_TESTS)
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 0000000..85d63449
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+ uint64_t ret;
+
+ asm __volatile__(
+ "li 3,1 ;"
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "tsuspend. ;"
+ "1: ;"
+ "std%X[ret] 3, %[ret] ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret)
+ exit(1);
+
+ /*
+ * We return from the signal handle while in a suspended transaction
+ */
+}
+
+
+int tm_sigreturn(void)
+{
+ struct sigaction sa;
+ uint64_t ret = 0;
+
+ SKIP_IF(!have_htm());
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGSEGV, &sa, NULL))
+ exit(1);
+
+ asm __volatile__(
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "std 3,0(3) ;" /* trigger SEGV */
+ "li 3,1 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "tend. ;"
+ "b 2f ;"
+ "1: ;"
+ "li 3,2 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "2: ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret != 2)
+ exit(1);
+
+ exit(0);
+}
+
+int main(void)
+{
+ return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index e6a0fad..156c8e7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg)
return ((condition_reg >> 28) & 0xa) == 0xa;
}
-void *ping(void *input)
+void *tm_una_ping(void *input)
{
/*
@@ -280,7 +280,7 @@ void *ping(void *input)
}
/* Thread to force context switch */
-void *pong(void *not_used)
+void *tm_una_pong(void *not_used)
{
/* Wait thread get its name "pong". */
if (DEBUG)
@@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
do {
int rc;
- /* Bind 'ping' to CPU 0, as specified in 'attr'. */
- rc = pthread_create(&t0, attr, ping, (void *) &flags);
+ /* Bind to CPU 0, as specified in 'attr'. */
+ rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
if (rc)
pr_err(rc, "pthread_create()");
- rc = pthread_setname_np(t0, "ping");
+ rc = pthread_setname_np(t0, "tm_una_ping");
if (rc)
pr_warn(rc, "pthread_setname_np");
rc = pthread_join(t0, &ret_value);
@@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
}
}
-int main(int argc, char **argv)
+int tm_unavailable_test(void)
{
int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
+ SKIP_IF(!have_htm());
+
/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
@@ -354,12 +356,12 @@ int main(int argc, char **argv)
if (rc)
pr_err(rc, "pthread_attr_setaffinity_np()");
- rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+ rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
if (rc)
pr_err(rc, "pthread_create()");
/* Name it for systemtap convenience */
- rc = pthread_setname_np(t1, "pong");
+ rc = pthread_setname_np(t1, "tm_una_pong");
if (rc)
pr_warn(rc, "pthread_create()");
@@ -394,3 +396,9 @@ int main(int argc, char **argv)
exit(0);
}
}
+
+int main(int argc, char **argv)
+{
+ test_harness_set_timeout(220);
+ return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
OpenPOWER on IntegriCloud