summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/boot/Makefile16
-rw-r--r--arch/alpha/boot/main.c1
-rw-r--r--arch/alpha/boot/stdio.c306
-rw-r--r--arch/alpha/boot/tools/objstrip.c3
-rw-r--r--arch/alpha/include/asm/cmpxchg.h2
-rw-r--r--arch/alpha/include/asm/types.h1
-rw-r--r--arch/alpha/include/asm/unistd.h2
-rw-r--r--arch/alpha/include/uapi/asm/unistd.h3
-rw-r--r--arch/alpha/kernel/err_ev6.c1
-rw-r--r--arch/alpha/kernel/irq.c1
-rw-r--r--arch/alpha/kernel/osf_sys.c3
-rw-r--r--arch/alpha/kernel/process.c7
-rw-r--r--arch/alpha/kernel/smp.c8
-rw-r--r--arch/alpha/kernel/srmcons.c3
-rw-r--r--arch/alpha/kernel/sys_marvel.c2
-rw-r--r--arch/alpha/kernel/systbls.S3
-rw-r--r--arch/alpha/kernel/traps.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev4.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev5.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev6.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev67.c1
-rw-r--r--arch/arc/Kconfig.debug13
-rw-r--r--arch/arc/include/asm/atomic.h2
-rw-r--r--arch/arc/include/asm/io.h1
-rw-r--r--arch/arc/mm/cache_arc700.c4
-rw-r--r--arch/arm/boot/dts/Makefile2
-rw-r--r--arch/arm/boot/dts/am335x-boneblack.dts4
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts2
-rw-r--r--arch/arm/boot/dts/am437x-sk-evm.dts4
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts11
-rw-r--r--arch/arm/boot/dts/armada-375.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-38x.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-39x.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts4
-rw-r--r--arch/arm/boot/dts/dove-cubox.dts1
-rw-r--r--arch/arm/boot/dts/dra7.dtsi10
-rw-r--r--arch/arm/boot/dts/exynos4412-odroid-common.dtsi3
-rw-r--r--arch/arm/boot/dts/exynos4412-trats2.dts2
-rw-r--r--arch/arm/boot/dts/exynos5250-snow.dts1
-rw-r--r--arch/arm/boot/dts/exynos5420-peach-pit.dts1
-rw-r--r--arch/arm/boot/dts/exynos5420-trip-points.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos5420.dtsi1
-rw-r--r--arch/arm/boot/dts/exynos5440-trip-points.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos5800-peach-pi.dts1
-rw-r--r--arch/arm/boot/dts/imx23-olinuxino.dts4
-rw-r--r--arch/arm/boot/dts/imx25.dtsi1
-rw-r--r--arch/arm/boot/dts/imx27.dtsi2
-rw-r--r--arch/arm/boot/dts/imx28.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabreauto.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts4
-rw-r--r--arch/arm/boot/dts/omap3.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi2
-rw-r--r--arch/arm/boot/dts/r8a7791-koelsch.dts2
-rw-r--r--arch/arm/boot/dts/ste-dbx5x0.dtsi17
-rw-r--r--arch/arm/boot/dts/ste-href.dtsi15
-rw-r--r--arch/arm/boot/dts/ste-snowball.dts13
-rw-r--r--arch/arm/boot/dts/tegra124.dtsi8
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts1
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca9.dts11
-rw-r--r--arch/arm/boot/dts/zynq-7000.dtsi4
-rw-r--r--arch/arm/configs/multi_v7_defconfig5
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm/include/asm/dma-iommu.h2
-rw-r--r--arch/arm/include/asm/io.h1
-rw-r--r--arch/arm/include/asm/xen/page.h1
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/perf_event_cpu.c12
-rw-r--r--arch/arm/mach-exynos/common.h2
-rw-r--r--arch/arm/mach-exynos/exynos.c27
-rw-r--r--arch/arm/mach-exynos/platsmp.c39
-rw-r--r--arch/arm/mach-exynos/pm_domains.c4
-rw-r--r--arch/arm/mach-exynos/suspend.c7
-rw-r--r--arch/arm/mach-gemini/common.h4
-rw-r--r--arch/arm/mach-gemini/reset.c4
-rw-r--r--arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c2
-rw-r--r--arch/arm/mach-imx/gpc.c16
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c68
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_43xx_data.c70
-rw-r--r--arch/arm/mach-omap2/prcm43xx.h3
-rw-r--r--arch/arm/mach-omap2/prm-regbits-34xx.h1
-rw-r--r--arch/arm/mach-omap2/prm-regbits-44xx.h1
-rw-r--r--arch/arm/mach-omap2/prminst44xx.c20
-rw-r--r--arch/arm/mach-omap2/timer.c13
-rw-r--r--arch/arm/mach-omap2/vc.c12
-rw-r--r--arch/arm/mach-omap2/vc.h2
-rw-r--r--arch/arm/mach-omap2/vc3xxx_data.c1
-rw-r--r--arch/arm/mach-omap2/vc44xx_data.c1
-rw-r--r--arch/arm/mach-pxa/Kconfig9
-rw-r--r--arch/arm/mach-pxa/Makefile1
-rw-r--r--arch/arm/mach-pxa/include/mach/lubbock.h7
-rw-r--r--arch/arm/mach-pxa/include/mach/mainstone.h6
-rw-r--r--arch/arm/mach-pxa/lubbock.c108
-rw-r--r--arch/arm/mach-pxa/mainstone.c115
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c200
-rw-r--r--arch/arm/mach-rockchip/pm.c7
-rw-r--r--arch/arm/mach-rockchip/pm.h4
-rw-r--r--arch/arm/mach-rockchip/rockchip.c19
-rw-r--r--arch/arm/mm/dma-mapping.c13
-rw-r--r--arch/arm/mm/mmu.c20
-rw-r--r--arch/arm/mm/proc-arm1020.S2
-rw-r--r--arch/arm/mm/proc-arm1020e.S2
-rw-r--r--arch/arm/mm/proc-arm925.S3
-rw-r--r--arch/arm/mm/proc-feroceon.S1
-rw-r--r--arch/arm/net/bpf_jit_32.c42
-rw-r--r--arch/arm/xen/enlighten.c1
-rw-r--r--arch/arm/xen/mm.c15
-rw-r--r--arch/arm64/boot/dts/arm/juno-motherboard.dtsi31
-rw-r--r--arch/arm64/crypto/crc32-arm64.c22
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c3
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c3
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/io.h1
-rw-r--r--arch/arm64/kernel/alternative.c53
-rw-r--r--arch/arm64/kernel/perf_event.c8
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/avr32/include/asm/io.h1
-rw-r--r--arch/frv/include/asm/io.h4
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h1
-rw-r--r--arch/ia64/include/asm/barrier.h7
-rw-r--r--arch/ia64/include/asm/irq_remapping.h2
-rw-r--r--arch/ia64/include/uapi/asm/cmpxchg.h2
-rw-r--r--arch/ia64/kernel/msi_ia64.c30
-rw-r--r--arch/ia64/pci/pci.c13
-rw-r--r--arch/m32r/include/asm/cmpxchg.h2
-rw-r--r--arch/m32r/include/asm/io.h1
-rw-r--r--arch/m32r/kernel/smp.c6
-rw-r--r--arch/m68k/include/asm/cmpxchg.h1
-rw-r--r--arch/m68k/include/asm/io_mm.h4
-rw-r--r--arch/m68k/include/asm/io_no.h4
-rw-r--r--arch/metag/include/asm/barrier.h2
-rw-r--r--arch/metag/include/asm/cmpxchg.h2
-rw-r--r--arch/metag/include/asm/io.h3
-rw-r--r--arch/microblaze/include/asm/io.h2
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/ath79/prom.c3
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/include/asm/elf.h4
-rw-r--r--arch/mips/include/asm/smp.h2
-rw-r--r--arch/mips/kernel/elf.c32
-rw-r--r--arch/mips/kernel/irq.c2
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/mips/kernel/smp-cps.c2
-rw-r--r--arch/mips/kernel/smp.c6
-rw-r--r--arch/mips/kernel/traps.c1
-rw-r--r--arch/mips/kvm/emulate.c6
-rw-r--r--arch/mips/lib/strnlen_user.S15
-rw-r--r--arch/mips/math-emu/cp1emu.c4
-rw-r--r--arch/mips/mm/tlb-r4k.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-platform.c4
-rw-r--r--arch/mn10300/include/asm/io.h1
-rw-r--r--arch/nios2/include/asm/io.h1
-rw-r--r--arch/parisc/include/asm/cmpxchg.h2
-rw-r--r--arch/parisc/include/asm/elf.h4
-rw-r--r--arch/parisc/kernel/process.c10
-rw-r--r--arch/parisc/kernel/sys_parisc.c3
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h1
-rw-r--r--arch/powerpc/kernel/mce.c4
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c25
-rw-r--r--arch/powerpc/mm/pgtable_64.c11
-rw-r--r--arch/s390/crypto/ghash_s390.c25
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/s390/include/asm/io.h1
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/net/bpf_jit_comp.c19
-rw-r--r--arch/score/include/asm/cmpxchg.h2
-rw-r--r--arch/sh/include/asm/barrier.h2
-rw-r--r--arch/sh/include/asm/cmpxchg.h2
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h1
-rw-r--r--arch/sparc/include/asm/cmpxchg_64.h2
-rw-r--r--arch/sparc/include/asm/cpudata_64.h3
-rw-r--r--arch/sparc/include/asm/io_32.h1
-rw-r--r--arch/sparc/include/asm/io_64.h1
-rw-r--r--arch/sparc/include/asm/pgtable_64.h22
-rw-r--r--arch/sparc/include/asm/topology_64.h3
-rw-r--r--arch/sparc/include/asm/trap_block.h2
-rw-r--r--arch/sparc/kernel/entry.h2
-rw-r--r--arch/sparc/kernel/leon_pci_grpci2.c1
-rw-r--r--arch/sparc/kernel/mdesc.c136
-rw-r--r--arch/sparc/kernel/pci.c59
-rw-r--r--arch/sparc/kernel/setup_64.c21
-rw-r--r--arch/sparc/kernel/smp_64.c13
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S5
-rw-r--r--arch/sparc/mm/init_64.c74
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/tile/include/asm/io.h2
-rw-r--r--arch/x86/Kbuild5
-rw-r--r--arch/x86/Kconfig231
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/boot/compressed/eboot.c2
-rw-r--r--arch/x86/entry/Makefile10
-rw-r--r--arch/x86/entry/calling.h (renamed from arch/x86/include/asm/calling.h)98
-rw-r--r--arch/x86/entry/entry_32.S1248
-rw-r--r--arch/x86/entry/entry_64.S (renamed from arch/x86/kernel/entry_64.S)1125
-rw-r--r--arch/x86/entry/entry_64_compat.S556
-rw-r--r--arch/x86/entry/syscall_32.c (renamed from arch/x86/kernel/syscall_32.c)6
-rw-r--r--arch/x86/entry/syscall_64.c (renamed from arch/x86/kernel/syscall_64.c)0
-rw-r--r--arch/x86/entry/syscalls/Makefile (renamed from arch/x86/syscalls/Makefile)4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl (renamed from arch/x86/syscalls/syscall_32.tbl)0
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl (renamed from arch/x86/syscalls/syscall_64.tbl)0
-rw-r--r--arch/x86/entry/syscalls/syscallhdr.sh (renamed from arch/x86/syscalls/syscallhdr.sh)0
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh (renamed from arch/x86/syscalls/syscalltbl.sh)0
-rw-r--r--arch/x86/entry/thunk_32.S (renamed from arch/x86/lib/thunk_32.S)15
-rw-r--r--arch/x86/entry/thunk_64.S (renamed from arch/x86/lib/thunk_64.S)46
-rw-r--r--arch/x86/entry/vdso/.gitignore (renamed from arch/x86/vdso/.gitignore)0
-rw-r--r--arch/x86/entry/vdso/Makefile (renamed from arch/x86/vdso/Makefile)2
-rwxr-xr-xarch/x86/entry/vdso/checkundef.sh (renamed from arch/x86/vdso/checkundef.sh)0
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c (renamed from arch/x86/vdso/vclock_gettime.c)0
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S (renamed from arch/x86/vdso/vdso-layout.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdso-note.S (renamed from arch/x86/vdso/vdso-note.S)0
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S (renamed from arch/x86/vdso/vdso.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdso2c.c (renamed from arch/x86/vdso/vdso2c.c)0
-rw-r--r--arch/x86/entry/vdso/vdso2c.h (renamed from arch/x86/vdso/vdso2c.h)0
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c (renamed from arch/x86/vdso/vdso32-setup.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/.gitignore (renamed from arch/x86/vdso/vdso32/.gitignore)0
-rw-r--r--arch/x86/entry/vdso/vdso32/int80.S (renamed from arch/x86/vdso/vdso32/int80.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/note.S (renamed from arch/x86/vdso/vdso32/note.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/sigreturn.S (renamed from arch/x86/vdso/vdso32/sigreturn.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/syscall.S (renamed from arch/x86/vdso/vdso32/syscall.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/sysenter.S (renamed from arch/x86/vdso/vdso32/sysenter.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c (renamed from arch/x86/vdso/vdso32/vclock_gettime.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso-fakesections.c (renamed from arch/x86/vdso/vdso32/vdso-fakesections.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S (renamed from arch/x86/vdso/vdso32/vdso32.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S (renamed from arch/x86/vdso/vdsox32.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vgetcpu.c (renamed from arch/x86/vdso/vgetcpu.c)0
-rw-r--r--arch/x86/entry/vdso/vma.c (renamed from arch/x86/vdso/vma.c)0
-rw-r--r--arch/x86/entry/vsyscall/Makefile7
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c (renamed from arch/x86/kernel/vsyscall_64.c)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S (renamed from arch/x86/kernel/vsyscall_emu_64.S)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c (renamed from arch/x86/kernel/vsyscall_gtod.c)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_trace.h (renamed from arch/x86/kernel/vsyscall_trace.h)2
-rw-r--r--arch/x86/ia32/Makefile2
-rw-r--r--arch/x86/ia32/ia32entry.S611
-rw-r--r--arch/x86/include/asm/alternative-asm.h18
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/asm.h25
-rw-r--r--arch/x86/include/asm/atomic.h30
-rw-r--r--arch/x86/include/asm/atomic64_64.h8
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h6
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/dwarf2.h170
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/frame.h7
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/hpet.h16
-rw-r--r--arch/x86/include/asm/hw_irq.h140
-rw-r--r--arch/x86/include/asm/hypervisor.h2
-rw-r--r--arch/x86/include/asm/io.h9
-rw-r--r--arch/x86/include/asm/io_apic.h114
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h80
-rw-r--r--arch/x86/include/asm/irq_vectors.h51
-rw-r--r--arch/x86/include/asm/irqdomain.h63
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/mce.h28
-rw-r--r--arch/x86/include/asm/msi.h7
-rw-r--r--arch/x86/include/asm/msr-index.h (renamed from arch/x86/include/uapi/asm/msr-index.h)3
-rw-r--r--arch/x86/include/asm/msr.h12
-rw-r--r--arch/x86/include/asm/mtrr.h15
-rw-r--r--arch/x86/include/asm/paravirt.h29
-rw-r--r--arch/x86/include/asm/paravirt_types.h17
-rw-r--r--arch/x86/include/asm/pat.h9
-rw-r--r--arch/x86/include/asm/pci.h5
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/proto.h10
-rw-r--r--arch/x86/include/asm/qspinlock.h57
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h6
-rw-r--r--arch/x86/include/asm/segment.h14
-rw-r--r--arch/x86/include/asm/special_insns.h38
-rw-r--r--arch/x86/include/asm/spinlock.h7
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h6
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/include/asm/uaccess_32.h4
-rw-r--r--arch/x86/include/asm/x86_init.h21
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/include/uapi/asm/msr.h2
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c73
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S6
-rw-r--r--arch/x86/kernel/alternative.c9
-rw-r--r--arch/x86/kernel/apb_timer.c4
-rw-r--r--arch/x86/kernel/apic/htirq.c173
-rw-r--r--arch/x86/kernel/apic/io_apic.c1303
-rw-r--r--arch/x86/kernel/apic/msi.c417
-rw-r--r--arch/x86/kernel/apic/vector.c448
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c21
-rw-r--r--arch/x86/kernel/asm-offsets_32.c18
-rw-r--r--arch/x86/kernel/asm-offsets_64.c23
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/common.c16
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c57
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c141
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c44
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c209
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c48
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c73
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c12
-rw-r--r--arch/x86/kernel/crash.c1
-rw-r--r--arch/x86/kernel/devicetree.c41
-rw-r--r--arch/x86/kernel/entry_32.S1401
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S37
-rw-r--r--arch/x86/kernel/head_64.S24
-rw-r--r--arch/x86/kernel/hpet.c50
-rw-r--r--arch/x86/kernel/i387.c15
-rw-r--r--arch/x86/kernel/i8259.c8
-rw-r--r--arch/x86/kernel/irq.c62
-rw-r--r--arch/x86/kernel/irq_32.c6
-rw-r--r--arch/x86/kernel/irq_64.c6
-rw-r--r--arch/x86/kernel/irq_work.c10
-rw-r--r--arch/x86/kernel/irqinit.c10
-rw-r--r--arch/x86/kernel/kvm.c43
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c24
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c22
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c23
-rw-r--r--arch/x86/kernel/process.c14
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/setup.c3
-rw-r--r--arch/x86/kernel/smp.c19
-rw-r--r--arch/x86/kernel/smpboot.c43
-rw-r--r--arch/x86/kernel/traps.c21
-rw-r--r--arch/x86/kernel/x86_init.c9
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c1
-rw-r--r--arch/x86/kvm/x86.c26
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/atomic64_386_32.S7
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S61
-rw-r--r--arch/x86/lib/checksum_32.S52
-rw-r--r--arch/x86/lib/clear_page_64.S7
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S12
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S11
-rw-r--r--arch/x86/lib/copy_page_64.S11
-rw-r--r--arch/x86/lib/copy_user_64.S127
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S136
-rw-r--r--arch/x86/lib/csum-copy_64.S17
-rw-r--r--arch/x86/lib/getuser.S13
-rw-r--r--arch/x86/lib/iomap_copy_64.S3
-rw-r--r--arch/x86/lib/memcpy_64.S3
-rw-r--r--arch/x86/lib/memmove_64.S3
-rw-r--r--arch/x86/lib/memset_64.S5
-rw-r--r--arch/x86/lib/msr-reg.S44
-rw-r--r--arch/x86/lib/putuser.S8
-rw-r--r--arch/x86/lib/rwsem.S49
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/iomap_32.c12
-rw-r--r--arch/x86/mm/ioremap.c85
-rw-r--r--arch/x86/mm/pageattr-test.c1
-rw-r--r--arch/x86/mm/pageattr.c84
-rw-r--r--arch/x86/mm/pat.c337
-rw-r--r--arch/x86/mm/pat_internal.h2
-rw-r--r--arch/x86/mm/pat_rbtree.c6
-rw-r--r--arch/x86/mm/pgtable.c60
-rw-r--r--arch/x86/net/bpf_jit.S1
-rw-r--r--arch/x86/net/bpf_jit_comp.c35
-rw-r--r--arch/x86/pci/acpi.c37
-rw-r--r--arch/x86/pci/i386.c6
-rw-r--r--arch/x86/pci/intel_mid_pci.c6
-rw-r--r--arch/x86/pci/irq.c13
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_wdt.c5
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c18
-rw-r--r--arch/x86/platform/intel-mid/sfi.c30
-rw-r--r--arch/x86/platform/sfi/sfi.c7
-rw-r--r--arch/x86/platform/uv/uv_irq.c298
-rw-r--r--arch/x86/power/hibernate_asm_64.S8
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/barrier.h3
-rw-r--r--arch/x86/xen/enlighten.c35
-rw-r--r--arch/x86/xen/p2m.c1
-rw-r--r--arch/x86/xen/spinlock.c64
-rw-r--r--arch/x86/xen/suspend.c10
-rw-r--r--arch/x86/xen/xen-asm_64.S28
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/dma-mapping.h13
-rw-r--r--arch/xtensa/include/asm/io.h1
411 files changed, 7668 insertions, 6563 deletions
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
index cd14388..8399bd0 100644
--- a/arch/alpha/boot/Makefile
+++ b/arch/alpha/boot/Makefile
@@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \
tools/bootpzh bootloader bootpheader bootpzheader
OBJSTRIP := $(obj)/tools/objstrip
+HOSTCFLAGS := -Wall -I$(objtree)/usr/include
+BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
+
# SRM bootable image. Copy to offset 512 of a partition.
$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
$(call if_changed,objstrip)
-LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax
-LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax
+LDFLAGS_bootloader := -static -T # -N -relax
+LDFLAGS_bootloader := -static -T # -N -relax
+LDFLAGS_bootpheader := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
-OBJ_bootlx := $(obj)/head.o $(obj)/main.o
-OBJ_bootph := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
$(call if_changed,ld)
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 3baf2d1..dd6eb4a 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -19,7 +19,6 @@
#include "ksize.h"
-extern int vsprintf(char *, const char *, va_list);
extern unsigned long switch_to_osf_pal(unsigned long nr,
struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644
index 0000000..f844dae
--- /dev/null
+++ b/arch/alpha/boot/stdio.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+# define do_div(n, base) ({ \
+ unsigned int __base = (base); \
+ unsigned int __rem; \
+ __rem = ((unsigned long long)(n)) % __base; \
+ (n) = ((unsigned long long)(n)) / __base; \
+ __rem; \
+})
+
+
+static int skip_atoi(const char **s)
+{
+ int i, c;
+
+ for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+ i = i*10 + c - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+ char c,sign,tmp[66];
+ const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+ int i;
+
+ if (type & LARGE)
+ digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if ((signed long long)num < 0) {
+ sign = '-';
+ num = - (signed long long)num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0) {
+ tmp[i++] = digits[do_div(num, base)];
+ }
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(ZEROPAD+LEFT)))
+ while(size-->0)
+ *str++ = ' ';
+ if (sign)
+ *str++ = sign;
+ if (type & SPECIAL) {
+ if (base==8)
+ *str++ = '0';
+ else if (base==16) {
+ *str++ = '0';
+ *str++ = digits[33];
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ *str++ = c;
+ while (i < precision--)
+ *str++ = '0';
+ while (i-- > 0)
+ *str++ = tmp[i];
+ while (size-- > 0)
+ *str++ = ' ';
+ return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char * str;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+ for (str=buf ; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ *str++ = *fmt;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= LEFT; goto repeat;
+ case '+': flags |= PLUS; goto repeat;
+ case ' ': flags |= SPACE; goto repeat;
+ case '#': flags |= SPECIAL; goto repeat;
+ case '0': flags |= ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if ('0' <= *fmt && *fmt <= '9')
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if ('0' <= *fmt && *fmt <= '9')
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'l' && *(fmt + 1) == 'l') {
+ qualifier = 'q';
+ fmt += 2;
+ } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+ || *fmt == 'Z') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ *str++ = ' ';
+ *str++ = (unsigned char) va_arg(args, int);
+ while (--field_width > 0)
+ *str++ = ' ';
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ *str++ = ' ';
+ for (i = 0; i < len; ++i)
+ *str++ = *s++;
+ while (len < field_width--)
+ *str++ = ' ';
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str,
+ (unsigned long) va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (qualifier == 'Z') {
+ size_t * ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ *str++ = '%';
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ *str++ = '%';
+ if (*fmt)
+ *str++ = *fmt;
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l') {
+ num = va_arg(args, unsigned long);
+ if (flags & SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'q') {
+ num = va_arg(args, unsigned long long);
+ if (flags & SIGN)
+ num = (signed long long) num;
+ } else if (qualifier == 'Z') {
+ num = va_arg(args, size_t);
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args, int);
+ if (flags & SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args, unsigned int);
+ if (flags & SIGN)
+ num = (signed int) num;
+ }
+ str = number(str, num, base, field_width, precision, flags);
+ }
+ *str = '\0';
+ return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=vsprintf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index 367d53d..dee8269 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -27,6 +27,9 @@
#include <linux/param.h>
#ifdef __ELF__
# include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
#endif
/* bootfile size must be multiple of BLOCK_SIZE: */
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd..e511776 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
#undef __ASM__MB
#undef ____cmpxchg
-#define __HAVE_ARCH_CMPXCHG 1
-
#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f61e1a5..4cb4b6d 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -2,6 +2,5 @@
#define _ALPHA_TYPES_H
#include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index c509d30..a56e608 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,7 +3,7 @@
#include <uapi/asm/unistd.h>
-#define NR_SYSCALLS 511
+#define NR_SYSCALLS 514
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index d214a035..aa33bf5 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -472,5 +472,8 @@
#define __NR_sched_setattr 508
#define __NR_sched_getattr 509
#define __NR_renameat2 510
+#define __NR_getrandom 511
+#define __NR_memfd_create 512
+#define __NR_execveat 513
#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
index 253cf1a..51267ac 100644
--- a/arch/alpha/kernel/err_ev6.c
+++ b/arch/alpha/kernel/err_ev6.c
@@ -6,7 +6,6 @@
* Error handling code supporting Alpha systems
*/
-#include <linux/init.h>
#include <linux/sched.h>
#include <asm/io.h>
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 7b2be25..51f2c86 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -19,7 +19,6 @@
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#include <linux/random.h>
-#include <linux/init.h>
#include <linux/irq.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index e51f578..36dc91a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
if (tv) {
if (get_tv32((struct timeval *)&kts, tv))
return -EFAULT;
+ kts.tv_nsec *= 1000;
}
if (tz) {
if (copy_from_user(&ktz, tz, sizeof(*tz)))
return -EFAULT;
}
- kts.tv_nsec *= 1000;
-
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
}
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 1941a07..84d1326 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
}
/*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
*/
-
int
copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long arg,
+ unsigned long kthread_arg,
struct task_struct *p)
{
extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
childstack->r26 = (unsigned long) ret_from_kernel_thread;
childstack->r9 = usp; /* function */
- childstack->r10 = arg;
+ childstack->r10 = kthread_arg;
childregs->hae = alpha_mv.hae_cache,
childti->pcb.usp = 0;
return 0;
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 99ac36d..2f24447f 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -63,7 +63,6 @@ static struct {
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
- IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
};
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
-
static void
send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
generic_smp_call_function_interrupt();
break;
- case IPI_CALL_FUNC_SINGLE:
- generic_smp_call_function_single_interrupt();
- break;
-
case IPI_CPU_STOP:
halt();
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void arch_send_call_function_single_ipi(int cpu)
{
- send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+ send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
}
static void
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 6f01d9a..72b5951 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -237,8 +237,7 @@ srmcons_init(void)
return -ENODEV;
}
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
/*
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index f21d61f..24e41bd 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
irq = intline;
- msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ msi_loc = dev->msi_cap;
msg_ctl = 0;
if (msi_loc)
pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 24789713..9b62e3f 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -529,6 +529,9 @@ sys_call_table:
.quad sys_sched_setattr
.quad sys_sched_getattr
.quad sys_renameat2 /* 510 */
+ .quad sys_getrandom
+ .quad sys_memfd_create
+ .quad sys_execveat
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 9c4c189..74aceea 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -14,7 +14,6 @@
#include <linux/tty.h>
#include <linux/delay.h>
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/ratelimit.h>
diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c
index 18aa9b4..086a0d5 100644
--- a/arch/alpha/oprofile/op_model_ev4.c
+++ b/arch/alpha/oprofile/op_model_ev4.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c
index c32f8a0..c300f5e 100644
--- a/arch/alpha/oprofile/op_model_ev5.c
+++ b/arch/alpha/oprofile/op_model_ev5.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c
index 1c84cc2..02edf59 100644
--- a/arch/alpha/oprofile/op_model_ev6.c
+++ b/arch/alpha/oprofile/op_model_ev6.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c
index 34a57a1..adb1744 100644
--- a/arch/alpha/oprofile/op_model_ev67.c
+++ b/arch/alpha/oprofile/op_model_ev67.c
@@ -9,7 +9,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug
index a7fc0da..ff6a4b5 100644
--- a/arch/arc/Kconfig.debug
+++ b/arch/arc/Kconfig.debug
@@ -2,19 +2,6 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config EARLY_PRINTK
- bool "Early printk" if EMBEDDED
- default y
- help
- Write kernel log output directly into the VGA buffer or to a serial
- port.
-
- This is useful for kernel debugging when your machine crashes very
- early before the console code is initialized. For normal operation
- it is not recommended because it looks ugly and doesn't cooperate
- with klogd/syslogd or the X server. You should normally N here,
- unless you want to debug such a crash.
-
config 16KSTACKS
bool "Use 16Kb for kernel stacks instead of 8Kb"
help
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 067551b..9917a45 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -99,7 +99,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
atomic_ops_unlock(flags); \
}
-#define ATOMIC_OP_RETURN(op, c_op) \
+#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index cabd518..7cc4ced 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -20,6 +20,7 @@ extern void iounmap(const void __iomem *addr);
#define ioremap_nocache(phy, sz) ioremap(phy, sz)
#define ioremap_wc(phy, sz) ioremap(phy, sz)
+#define ioremap_wt(phy, sz) ioremap(phy, sz)
/* Change struct page to physical address */
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 8c3a3e0..12b2100 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -266,7 +266,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
* Machine specific helpers for Entire D-Cache or Per Line ops
*/
-static unsigned int __before_dc_op(const int op)
+static inline unsigned int __before_dc_op(const int op)
{
unsigned int reg = reg;
@@ -284,7 +284,7 @@ static unsigned int __before_dc_op(const int op)
return reg;
}
-static void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op, unsigned int reg)
{
if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 86217db..992736b 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
imx25-karo-tx25.dtb \
imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
imx27-apf27.dtb \
imx27-apf27dev.dtb \
imx27-eukrea-mbimxsd27-baseboard.dtb \
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 5c42d25..901739f 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,7 +80,3 @@
status = "okay";
};
};
-
-&rtc {
- system-power-controller;
-};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 87fc7a3..156d05e 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -654,7 +654,7 @@
wlcore: wlcore@2 {
compatible = "ti,wl1271";
reg = <2>;
- interrupt-parent = <&gpio1>;
+ interrupt-parent = <&gpio0>;
interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
ref-clock-frequency = <38400000>;
};
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 8ae29c9..c17097d 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -49,7 +49,7 @@
pinctrl-0 = <&matrix_keypad_pins>;
debounce-delay-ms = <5>;
- col-scan-delay-us = <1500>;
+ col-scan-delay-us = <5>;
row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH /* Bank5, pin5 */
&gpio5 6 GPIO_ACTIVE_HIGH>; /* Bank5, pin6 */
@@ -473,7 +473,7 @@
interrupt-parent = <&gpio0>;
interrupts = <31 0>;
- wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>;
touchscreen-size-x = <480>;
touchscreen-size-y = <272>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 15f198e..7128fad 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -18,6 +18,7 @@
aliases {
rtc0 = &mcp_rtc;
rtc1 = &tps659038_rtc;
+ rtc2 = &rtc;
};
memory {
@@ -83,7 +84,7 @@
gpio_fan: gpio_fan {
/* Based on 5v 500mA AFB02505HHB */
compatible = "gpio-fan";
- gpios = <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
+ gpios = <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>;
gpio-fan,speed-map = <0 0>,
<13000 1>;
#cooling-cells = <2>;
@@ -130,8 +131,8 @@
uart3_pins_default: uart3_pins_default {
pinctrl-single,pins = <
- 0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */
- 0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */
+ 0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */
+ 0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */
>;
};
@@ -455,7 +456,7 @@
mcp_rtc: rtc@6f {
compatible = "microchip,mcp7941x";
reg = <0x6f>;
- interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>; /* IRQ_SYS_1N */
+ interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>; /* IRQ_SYS_1N */
pinctrl-names = "default";
pinctrl-0 = <&mcp79410_pins_default>;
@@ -478,7 +479,7 @@
&uart3 {
status = "okay";
interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
- <&dra7_pmx_core 0x248>;
+ <&dra7_pmx_core 0x3f8>;
pinctrl-names = "default";
pinctrl-0 = <&uart3_pins_default>;
diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index c675257..f076ff8 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -69,7 +69,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
/* 25 MHz reference crystal */
refclk: oscillator {
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index ed2dd8b..218a2ac 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -585,7 +585,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
/* 25 MHz reference crystal */
diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
index 0e85fc1..ecd1318 100644
--- a/arch/arm/boot/dts/armada-39x.dtsi
+++ b/arch/arm/boot/dts/armada-39x.dtsi
@@ -502,7 +502,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
};
};
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index e3b08fb..990e8a2 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -105,6 +105,10 @@
};
internal-regs {
+ rtc@10300 {
+ /* No crystal connected to the internal RTC */
+ status = "disabled";
+ };
serial@12000 {
status = "okay";
};
diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts
index aae7efc..e6fa251 100644
--- a/arch/arm/boot/dts/dove-cubox.dts
+++ b/arch/arm/boot/dts/dove-cubox.dts
@@ -87,6 +87,7 @@
/* connect xtal input to 25MHz reference */
clocks = <&ref25>;
+ clock-names = "xtal";
/* connect xtal input as source of pll0 and pll1 */
silabs,pll-source = <0 0>, <1 0>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 5332b57..f03a091 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -911,7 +911,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>,
- <0x4ae06014 0x4>, <0x4a003b20 0x8>,
+ <0x4ae06014 0x4>, <0x4a003b20 0xc>,
<0x4ae0c158 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -944,7 +944,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>,
- <0x4ae06010 0x4>, <0x4a0025cc 0x8>,
+ <0x4ae06010 0x4>, <0x4a0025cc 0xc>,
<0x4a002470 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -977,7 +977,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>,
- <0x4ae06010 0x4>, <0x4a0025e0 0x8>,
+ <0x4ae06010 0x4>, <0x4a0025e0 0xc>,
<0x4a00246c 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -1010,7 +1010,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>,
- <0x4ae06010 0x4>, <0x4a003b08 0x8>,
+ <0x4ae06010 0x4>, <0x4a003b08 0xc>,
<0x4ae0c154 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -1203,7 +1203,7 @@
status = "disabled";
};
- rtc@48838000 {
+ rtc: rtc@48838000 {
compatible = "ti,am3352-rtc";
reg = <0x48838000 0x100>;
interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index 8de12af7..d6b49e5 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -9,6 +9,7 @@
#include <dt-bindings/sound/samsung-i2s.h>
#include <dt-bindings/input/input.h>
+#include <dt-bindings/clock/maxim,max77686.h>
#include "exynos4412.dtsi"
/ {
@@ -105,6 +106,8 @@
rtc@10070000 {
status = "okay";
+ clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>;
+ clock-names = "rtc", "rtc_src";
};
g2d@10800000 {
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 173ffa4..792394d 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -736,7 +736,7 @@
display-timings {
timing-0 {
- clock-frequency = <0>;
+ clock-frequency = <57153600>;
hactive = <720>;
vactive = <1280>;
hfront-porch = <5>;
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index 2657e84..1eca97e 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -567,6 +567,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
samsung,dw-mshc-ciu-div = <3>;
samsung,dw-mshc-sdr-timing = <2 3>;
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index 0788d08..146e711 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -711,6 +711,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
clock-frequency = <400000000>;
samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
index 5d31fc1..2180a01 100644
--- a/arch/arm/boot/dts/exynos5420-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -28,7 +28,7 @@ trips {
type = "active";
};
cpu-crit-0 {
- temperature = <1200000>; /* millicelsius */
+ temperature = <120000>; /* millicelsius */
hysteresis = <0>; /* millicelsius */
type = "critical";
};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index f67b23f..4531753 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -536,6 +536,7 @@
clock-names = "dp";
phys = <&dp_phy>;
phy-names = "dp";
+ power-domains = <&disp_pd>;
};
mipi_phy: video-phy@10040714 {
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
index 48adfa8..356e963 100644
--- a/arch/arm/boot/dts/exynos5440-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -18,7 +18,7 @@ trips {
type = "active";
};
cpu-crit-0 {
- temperature = <1050000>; /* millicelsius */
+ temperature = <105000>; /* millicelsius */
hysteresis = <0>; /* millicelsius */
type = "critical";
};
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 412f41d..02eb8b1 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -674,6 +674,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
clock-frequency = <400000000>;
samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts
index 7e6eef2..8204539 100644
--- a/arch/arm/boot/dts/imx23-olinuxino.dts
+++ b/arch/arm/boot/dts/imx23-olinuxino.dts
@@ -12,6 +12,7 @@
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
#include "imx23.dtsi"
/ {
@@ -93,6 +94,7 @@
ahb@80080000 {
usb0: usb@80080000 {
+ dr_mode = "host";
vbus-supply = <&reg_usb0_vbus>;
status = "okay";
};
@@ -122,7 +124,7 @@
user {
label = "green";
- gpios = <&gpio2 1 1>;
+ gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
};
};
};
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index e4d3aec..677f81d 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -428,6 +428,7 @@
pwm4: pwm@53fc8000 {
compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
+ #pwm-cells = <2>;
reg = <0x53fc8000 0x4000>;
clocks = <&clks 108>, <&clks 52>;
clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 6951b66..bc215e4 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -533,7 +533,7 @@
fec: ethernet@1002b000 {
compatible = "fsl,imx27-fec";
- reg = <0x1002b000 0x4000>;
+ reg = <0x1002b000 0x1000>;
interrupts = <50>;
clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
<&clks IMX27_CLK_FEC_AHB_GATE>;
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 25e25f8..4e073e8 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -913,7 +913,7 @@
80 81 68 69
70 71 72 73
74 75 76 77>;
- interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+ interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
"saif0", "saif1", "i2c0", "i2c1",
"auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
"auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 19cc269..1ce6133 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -31,6 +31,7 @@
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
gpio = <&gpio4 15 0>;
+ enable-active-high;
};
reg_usb_h1_vbus: regulator@1 {
@@ -40,6 +41,7 @@
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
gpio = <&gpio1 0 0>;
+ enable-active-high;
};
};
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 46b2fed..3b24b126 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -185,7 +185,6 @@
&i2c3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c3>;
- pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>;
status = "okay";
max7310_a: gpio@30 {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index 134d3f2..921de66 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -110,6 +110,8 @@
nand@0,0 {
reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
nand-bus-width = <16>;
+ gpmc,device-width = <2>;
+ ti,nand-ecc-opt = "sw";
gpmc,sync-clk-ps = <0>;
gpmc,cs-on-ns = <0>;
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index a293158..5c16145 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -498,6 +498,8 @@
DRVDD-supply = <&vmmc2>;
IOVDD-supply = <&vio>;
DVDD-supply = <&vio>;
+
+ ai3x-micbias-vg = <1>;
};
tlv320aic3x_aux: tlv320aic3x@19 {
@@ -509,6 +511,8 @@
DRVDD-supply = <&vmmc2>;
IOVDD-supply = <&vio>;
DVDD-supply = <&vio>;
+
+ ai3x-micbias-vg = <2>;
};
tsl2563: tsl2563@29 {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index d18a90f..69a40cf 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -456,6 +456,7 @@
};
mmu_isp: mmu@480bd400 {
+ #iommu-cells = <0>;
compatible = "ti,omap2-iommu";
reg = <0x480bd400 0x80>;
interrupts = <24>;
@@ -464,6 +465,7 @@
};
mmu_iva: mmu@5d000000 {
+ #iommu-cells = <0>;
compatible = "ti,omap2-iommu";
reg = <0x5d000000 0x80>;
interrupts = <28>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index efe5f73..7d24ae0 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -128,7 +128,7 @@
* hierarchy.
*/
ocp {
- compatible = "ti,omap4-l3-noc", "simple-bus";
+ compatible = "ti,omap5-l3-noc", "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges;
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts
index 74c3212..824ddab 100644
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -545,7 +545,7 @@
compatible = "adi,adv7511w";
reg = <0x39>;
interrupt-parent = <&gpio3>;
- interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
adi,input-depth = <8>;
adi,input-colorspace = "rgb";
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index bfd3f1c..2201cd5 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1017,23 +1017,6 @@
status = "disabled";
};
- vmmci: regulator-gpio {
- compatible = "regulator-gpio";
-
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <2900000>;
- regulator-name = "mmci-reg";
- regulator-type = "voltage";
-
- startup-delay-us = <100>;
- enable-active-high;
-
- states = <1800000 0x1
- 2900000 0x0>;
-
- status = "disabled";
- };
-
mcde@a0350000 {
compatible = "stericsson,mcde";
reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index bf8f0ed..744c1e3 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -111,6 +111,21 @@
pinctrl-1 = <&i2c3_sleep_mode>;
};
+ vmmci: regulator-gpio {
+ compatible = "regulator-gpio";
+
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2900000>;
+ regulator-name = "mmci-reg";
+ regulator-type = "voltage";
+
+ startup-delay-us = <100>;
+ enable-active-high;
+
+ states = <1800000 0x1
+ 2900000 0x0>;
+ };
+
// External Micro SD slot
sdi0_per1@80126000 {
arm,primecell-periphid = <0x10480180>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 206826a..1bc84eb 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -146,8 +146,21 @@
};
vmmci: regulator-gpio {
+ compatible = "regulator-gpio";
+
gpios = <&gpio7 4 0x4>;
enable-gpio = <&gpio6 25 0x4>;
+
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2900000>;
+ regulator-name = "mmci-reg";
+ regulator-type = "voltage";
+
+ startup-delay-us = <100>;
+ enable-active-high;
+
+ states = <1800000 0x1
+ 2900000 0x0>;
};
// External Micro SD slot
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index cf01c81..13cc7ca 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -826,7 +826,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 59>, <&tegra_car 22>;
+ resets = <&tegra_car 22>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
@@ -838,6 +838,7 @@
nvidia,hssquelch-level = <2>;
nvidia,hsdiscon-level = <5>;
nvidia,xcvr-hsslew = <12>;
+ nvidia,has-utmi-pad-registers;
status = "disabled";
};
@@ -862,7 +863,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 22>, <&tegra_car 22>;
+ resets = <&tegra_car 58>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
@@ -874,7 +875,6 @@
nvidia,hssquelch-level = <2>;
nvidia,hsdiscon-level = <5>;
nvidia,xcvr-hsslew = <12>;
- nvidia,has-utmi-pad-registers;
status = "disabled";
};
@@ -899,7 +899,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 58>, <&tegra_car 22>;
+ resets = <&tegra_car 59>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 7a2aeac..107395c 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -191,6 +191,7 @@
compatible = "arm,cortex-a15-pmu";
interrupts = <0 68 4>,
<0 69 4>;
+ interrupt-affinity = <&cpu0>, <&cpu1>;
};
oscclk6a: oscclk6a {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 23662b5..d949fac 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -33,28 +33,28 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@0 {
+ A9_0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0>;
next-level-cache = <&L2>;
};
- cpu@1 {
+ A9_1: cpu@1 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <1>;
next-level-cache = <&L2>;
};
- cpu@2 {
+ A9_2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <2>;
next-level-cache = <&L2>;
};
- cpu@3 {
+ A9_3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <3>;
@@ -170,6 +170,7 @@
compatible = "arm,pl310-cache";
reg = <0x1e00a000 0x1000>;
interrupts = <0 43 4>;
+ cache-unified;
cache-level = <2>;
arm,data-latency = <1 1 1>;
arm,tag-latency = <1 1 1>;
@@ -181,6 +182,8 @@
<0 61 4>,
<0 62 4>,
<0 63 4>;
+ interrupt-affinity = <&A9_0>, <&A9_1>, <&A9_2>, <&A9_3>;
+
};
dcc {
diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index a5cd2ed..9ea54b3 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -193,7 +193,7 @@
};
gem0: ethernet@e000b000 {
- compatible = "cdns,gem";
+ compatible = "cdns,zynq-gem";
reg = <0xe000b000 0x1000>;
status = "disabled";
interrupts = <0 22 4>;
@@ -204,7 +204,7 @@
};
gem1: ethernet@e000c000 {
- compatible = "cdns,gem";
+ compatible = "cdns,zynq-gem";
reg = <0xe000c000 0x1000>;
status = "disabled";
interrupts = <0 45 4>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ab86655..fbbb191 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y
CONFIG_ARCH_KEYSTONE=y
CONFIG_ARCH_MESON=y
CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX50=y
CONFIG_SOC_IMX51=y
CONFIG_SOC_IMX53=y
CONFIG_SOC_IMX6Q=y
CONFIG_SOC_IMX6SL=y
+CONFIG_SOC_IMX6SX=y
CONFIG_SOC_VF610=y
+CONFIG_SOC_LS1021A=y
CONFIG_ARCH_OMAP3=y
CONFIG_ARCH_OMAP4=y
CONFIG_SOC_OMAP5=y
@@ -426,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
CONFIG_USB_EHCI_TEGRA=y
CONFIG_USB_EHCI_HCD_STI=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_STI=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 9ff7b54..3743ca2 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y
CONFIG_DMA_OMAP=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXTCON=m
-CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_USB_GPIO=m
CONFIG_EXTCON_PALMAS=m
CONFIG_TI_EMIF=m
CONFIG_PWM=y
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6..6c2327e 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 8e3fcb9..2ef282f 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -25,7 +25,7 @@ struct dma_iommu_mapping {
};
struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size);
void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index db58deb..1b7677d 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -336,6 +336,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC)
+#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
#define iounmap __arm_iounmap
/*
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff..0b579b2 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21..4e7f40c 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,7 +33,9 @@ ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq @ disable interrupts
- ldr r1, [tsk, #TI_FLAGS]
+ ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
+ tst r1, #_TIF_SYSCALL_WORK
+ bne __sys_trace_return
tst r1, #_TIF_WORK_MASK
bne fast_work_pending
asm_trace_hardirqs_on
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 91c7ba1..3b8c283 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -303,9 +303,15 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int of_pmu_irq_cfg(struct platform_device *pdev)
{
- int i;
- int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ int i, irq;
+ int *irqs;
+ /* Don't bother with PPIs; they're already affine */
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0 && irq_is_percpu(irq))
+ return 0;
+
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
if (!irqs)
return -ENOMEM;
@@ -317,7 +323,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
i);
if (!dn) {
pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
- of_node_full_name(dn), i);
+ of_node_full_name(pdev->dev.of_node), i);
break;
}
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index acd5b56..5f5cd56 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -159,6 +159,8 @@ extern void exynos_enter_aftr(void);
extern struct cpuidle_exynos_data cpuidle_coupled_exynos_data;
+extern void exynos_set_delayed_reset_assertion(bool enable);
+
extern void s5p_init_cpu(void __iomem *cpuid_addr);
extern unsigned int samsung_rev(void);
extern void __iomem *cpu_boot_reg_base(void);
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bcde0dd..5917a30 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -167,6 +167,33 @@ static void __init exynos_init_io(void)
}
/*
+ * Set or clear the USE_DELAYED_RESET_ASSERTION option. Used by smp code
+ * and suspend.
+ *
+ * This is necessary only on Exynos4 SoCs. When system is running
+ * USE_DELAYED_RESET_ASSERTION should be set so the ARM CLK clock down
+ * feature could properly detect global idle state when secondary CPU is
+ * powered down.
+ *
+ * However this should not be set when such system is going into suspend.
+ */
+void exynos_set_delayed_reset_assertion(bool enable)
+{
+ if (of_machine_is_compatible("samsung,exynos4")) {
+ unsigned int tmp, core_id;
+
+ for (core_id = 0; core_id < num_possible_cpus(); core_id++) {
+ tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
+ if (enable)
+ tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
+ else
+ tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
+ pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
+ }
+ }
+}
+
+/*
* Apparently, these SoCs are not able to wake-up from suspend using
* the PMU. Too bad. Should they suddenly become capable of such a
* feat, the matches below should be moved to suspend.c.
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index ebd135b..a825bca 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -34,30 +34,6 @@
extern void exynos4_secondary_startup(void);
-/*
- * Set or clear the USE_DELAYED_RESET_ASSERTION option, set on Exynos4 SoCs
- * during hot-(un)plugging CPUx.
- *
- * The feature can be cleared safely during first boot of secondary CPU.
- *
- * Exynos4 SoCs require setting USE_DELAYED_RESET_ASSERTION during powering
- * down a CPU so the CPU idle clock down feature could properly detect global
- * idle state when CPUx is off.
- */
-static void exynos_set_delayed_reset_assertion(u32 core_id, bool enable)
-{
- if (soc_is_exynos4()) {
- unsigned int tmp;
-
- tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
- if (enable)
- tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
- else
- tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
- pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
- }
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static inline void cpu_leave_lowpower(u32 core_id)
{
@@ -73,8 +49,6 @@ static inline void cpu_leave_lowpower(u32 core_id)
: "=&r" (v)
: "Ir" (CR_C), "Ir" (0x40)
: "cc");
-
- exynos_set_delayed_reset_assertion(core_id, false);
}
static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
@@ -87,14 +61,6 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
/* Turn the CPU off on next WFI instruction. */
exynos_cpu_power_down(core_id);
- /*
- * Exynos4 SoCs require setting
- * USE_DELAYED_RESET_ASSERTION so the CPU idle
- * clock down feature could properly detect
- * global idle state when CPUx is off.
- */
- exynos_set_delayed_reset_assertion(core_id, true);
-
wfi();
if (pen_release == core_id) {
@@ -371,9 +337,6 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
udelay(10);
}
- /* No harm if this is called during first boot of secondary CPU */
- exynos_set_delayed_reset_assertion(core_id, false);
-
/*
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
@@ -420,6 +383,8 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
exynos_sysram_init();
+ exynos_set_delayed_reset_assertion(true);
+
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(scu_base_addr());
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index cbe56b3..a968653 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -188,7 +188,7 @@ no_clk:
args.np = np;
args.args_count = 0;
child_domain = of_genpd_get_from_provider(&args);
- if (!child_domain)
+ if (IS_ERR(child_domain))
continue;
if (of_parse_phandle_with_args(np, "power-domains",
@@ -196,7 +196,7 @@ no_clk:
continue;
parent_domain = of_genpd_get_from_provider(&args);
- if (!parent_domain)
+ if (IS_ERR(parent_domain))
continue;
if (pm_genpd_add_subdomain(parent_domain, child_domain))
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 3e6aea7..c0b6dcc 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -342,6 +342,8 @@ static void exynos_pm_enter_sleep_mode(void)
static void exynos_pm_prepare(void)
{
+ exynos_set_delayed_reset_assertion(false);
+
/* Set wake-up mask registers */
exynos_pm_set_wakeup_mask();
@@ -482,6 +484,7 @@ early_wakeup:
/* Clear SLEEP mode set in INFORM1 */
pmu_raw_writel(0x0, S5P_INFORM1);
+ exynos_set_delayed_reset_assertion(true);
}
static void exynos3250_pm_resume(void)
@@ -723,8 +726,10 @@ void __init exynos_pm_init(void)
return;
}
- if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+ if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+ return;
+ }
pm_data = (const struct exynos_pm_data *) match->data;
diff --git a/arch/arm/mach-gemini/common.h b/arch/arm/mach-gemini/common.h
index 38a4526..dd88369 100644
--- a/arch/arm/mach-gemini/common.h
+++ b/arch/arm/mach-gemini/common.h
@@ -12,6 +12,8 @@
#ifndef __GEMINI_COMMON_H__
#define __GEMINI_COMMON_H__
+#include <linux/reboot.h>
+
struct mtd_partition;
extern void gemini_map_io(void);
@@ -26,6 +28,6 @@ extern int platform_register_pflash(unsigned int size,
struct mtd_partition *parts,
unsigned int nr_parts);
-extern void gemini_restart(char mode, const char *cmd);
+extern void gemini_restart(enum reboot_mode mode, const char *cmd);
#endif /* __GEMINI_COMMON_H__ */
diff --git a/arch/arm/mach-gemini/reset.c b/arch/arm/mach-gemini/reset.c
index b266597..21a6d6d 100644
--- a/arch/arm/mach-gemini/reset.c
+++ b/arch/arm/mach-gemini/reset.c
@@ -14,7 +14,9 @@
#include <mach/hardware.h>
#include <mach/global_reg.h>
-void gemini_restart(char mode, const char *cmd)
+#include "common.h"
+
+void gemini_restart(enum reboot_mode mode, const char *cmd)
{
__raw_writel(RESET_GLOBAL | RESET_CPU1,
IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
index fb8d4a2..a5edd7d 100644
--- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
+++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de>
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 4d60005..6d0893a 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
- if (WARN_ON(!np ||
- !of_find_property(np, "interrupt-controller", NULL)))
- pr_warn("Outdated DT detected, system is about to crash!!!\n");
+ if (WARN_ON(!np))
+ return;
+
+ if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+ pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+ /* map GPC, so that at least CPUidle and WARs keep working */
+ gpc_base = of_iomap(np, 0);
+ }
}
#ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
struct regulator *pu_reg;
int ret;
+ /* bail out if DT too old and doesn't provide the necessary info */
+ if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+ return 0;
+
pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
if (PTR_ERR(pu_reg) == -ENODEV)
pu_reg = NULL;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 355b089..752969f 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -171,6 +171,12 @@
*/
#define LINKS_PER_OCP_IF 2
+/*
+ * Address offset (in bytes) between the reset control and the reset
+ * status registers: 4 bytes on OMAP4
+ */
+#define OMAP4_RST_CTRL_ST_OFFSET 4
+
/**
* struct omap_hwmod_soc_ops - fn ptrs for some SoC-specific operations
* @enable_module: function to enable a module (via MODULEMODE)
@@ -3016,10 +3022,12 @@ static int _omap4_deassert_hardreset(struct omap_hwmod *oh,
if (ohri->st_shift)
pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n",
oh->name, ohri->name);
- return omap_prm_deassert_hardreset(ohri->rst_shift, 0,
+ return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->rst_shift,
oh->clkdm->pwrdm.ptr->prcm_partition,
oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs, 0);
+ oh->prcm.omap4.rstctrl_offs,
+ oh->prcm.omap4.rstctrl_offs +
+ OMAP4_RST_CTRL_ST_OFFSET);
}
/**
@@ -3048,27 +3056,6 @@ static int _omap4_is_hardreset_asserted(struct omap_hwmod *oh,
}
/**
- * _am33xx_assert_hardreset - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to assert hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_assert_hardreset() with parameters extracted
- * from the hwmod @oh and the hardreset line data @ohri. Only
- * intended for use as an soc_ops function pointer. Passes along the
- * return value from am33xx_prminst_assert_hardreset(). XXX This
- * function is scheduled for removal when the PRM code is moved into
- * drivers/.
- */
-static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
- struct omap_hwmod_rst_info *ohri)
-
-{
- return omap_prm_assert_hardreset(ohri->rst_shift, 0,
- oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs);
-}
-
-/**
* _am33xx_deassert_hardreset - call AM33XX PRM hardreset fn with hwmod args
* @oh: struct omap_hwmod * to deassert hardreset
* @ohri: hardreset line data
@@ -3083,32 +3070,13 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
static int _am33xx_deassert_hardreset(struct omap_hwmod *oh,
struct omap_hwmod_rst_info *ohri)
{
- return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, 0,
+ return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift,
+ oh->clkdm->pwrdm.ptr->prcm_partition,
oh->clkdm->pwrdm.ptr->prcm_offs,
oh->prcm.omap4.rstctrl_offs,
oh->prcm.omap4.rstst_offs);
}
-/**
- * _am33xx_is_hardreset_asserted - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to test hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_is_hardreset_asserted() with parameters
- * extracted from the hwmod @oh and the hardreset line data @ohri.
- * Only intended for use as an soc_ops function pointer. Passes along
- * the return value from am33xx_prminst_is_hardreset_asserted(). XXX
- * This function is scheduled for removal when the PRM code is moved
- * into drivers/.
- */
-static int _am33xx_is_hardreset_asserted(struct omap_hwmod *oh,
- struct omap_hwmod_rst_info *ohri)
-{
- return omap_prm_is_hardreset_asserted(ohri->rst_shift, 0,
- oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs);
-}
-
/* Public functions */
u32 omap_hwmod_read(struct omap_hwmod *oh, u16 reg_offs)
@@ -3908,21 +3876,13 @@ void __init omap_hwmod_init(void)
soc_ops.init_clkdm = _init_clkdm;
soc_ops.update_context_lost = _omap4_update_context_lost;
soc_ops.get_context_lost = _omap4_get_context_lost;
- } else if (soc_is_am43xx()) {
+ } else if (cpu_is_ti816x() || soc_is_am33xx() || soc_is_am43xx()) {
soc_ops.enable_module = _omap4_enable_module;
soc_ops.disable_module = _omap4_disable_module;
soc_ops.wait_target_ready = _omap4_wait_target_ready;
soc_ops.assert_hardreset = _omap4_assert_hardreset;
- soc_ops.deassert_hardreset = _omap4_deassert_hardreset;
- soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
- soc_ops.init_clkdm = _init_clkdm;
- } else if (cpu_is_ti816x() || soc_is_am33xx()) {
- soc_ops.enable_module = _omap4_enable_module;
- soc_ops.disable_module = _omap4_disable_module;
- soc_ops.wait_target_ready = _omap4_wait_target_ready;
- soc_ops.assert_hardreset = _am33xx_assert_hardreset;
soc_ops.deassert_hardreset = _am33xx_deassert_hardreset;
- soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted;
+ soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
soc_ops.init_clkdm = _init_clkdm;
} else {
WARN(1, "omap_hwmod: unknown SoC type\n");
diff --git a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
index e222314..17e8004 100644
--- a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
@@ -544,6 +544,44 @@ static struct omap_hwmod am43xx_hdq1w_hwmod = {
},
};
+static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = {
+ .rev_offs = 0x0,
+ .sysc_offs = 0x104,
+ .sysc_flags = SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE,
+ .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+ MSTANDBY_FORCE | MSTANDBY_SMART | MSTANDBY_NO),
+ .sysc_fields = &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class am43xx_vpfe_hwmod_class = {
+ .name = "vpfe",
+ .sysc = &am43xx_vpfe_sysc,
+};
+
+static struct omap_hwmod am43xx_vpfe0_hwmod = {
+ .name = "vpfe0",
+ .class = &am43xx_vpfe_hwmod_class,
+ .clkdm_name = "l3s_clkdm",
+ .prcm = {
+ .omap4 = {
+ .modulemode = MODULEMODE_SWCTRL,
+ .clkctrl_offs = AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET,
+ },
+ },
+};
+
+static struct omap_hwmod am43xx_vpfe1_hwmod = {
+ .name = "vpfe1",
+ .class = &am43xx_vpfe_hwmod_class,
+ .clkdm_name = "l3s_clkdm",
+ .prcm = {
+ .omap4 = {
+ .modulemode = MODULEMODE_SWCTRL,
+ .clkctrl_offs = AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET,
+ },
+ },
+};
+
/* Interfaces */
static struct omap_hwmod_ocp_if am43xx_l3_main__l4_hs = {
.master = &am33xx_l3_main_hwmod,
@@ -825,6 +863,34 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = {
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = {
+ .master = &am43xx_vpfe0_hwmod,
+ .slave = &am33xx_l3_main_hwmod,
+ .clk = "l3_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe1 = {
+ .master = &am43xx_vpfe1_hwmod,
+ .slave = &am33xx_l3_main_hwmod,
+ .clk = "l3_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe0 = {
+ .master = &am33xx_l4_ls_hwmod,
+ .slave = &am43xx_vpfe0_hwmod,
+ .clk = "l4ls_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe1 = {
+ .master = &am33xx_l4_ls_hwmod,
+ .slave = &am43xx_vpfe1_hwmod,
+ .clk = "l4ls_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
&am33xx_l4_wkup__synctimer,
&am43xx_l4_ls__timer8,
@@ -925,6 +991,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
&am43xx_l4_ls__dss_dispc,
&am43xx_l4_ls__dss_rfbi,
&am43xx_l4_ls__hdq1w,
+ &am43xx_l3__vpfe0,
+ &am43xx_l3__vpfe1,
+ &am43xx_l4_ls__vpfe0,
+ &am43xx_l4_ls__vpfe1,
NULL,
};
diff --git a/arch/arm/mach-omap2/prcm43xx.h b/arch/arm/mach-omap2/prcm43xx.h
index 48df3b5..d026199 100644
--- a/arch/arm/mach-omap2/prcm43xx.h
+++ b/arch/arm/mach-omap2/prcm43xx.h
@@ -144,5 +144,6 @@
#define AM43XX_CM_PER_USBPHYOCP2SCP1_CLKCTRL_OFFSET 0x05C0
#define AM43XX_CM_PER_DSS_CLKCTRL_OFFSET 0x0a20
#define AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET 0x04a0
-
+#define AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET 0x0068
+#define AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET 0x0070
#endif
diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h
index cbefbd7..661d753 100644
--- a/arch/arm/mach-omap2/prm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-34xx.h
@@ -112,6 +112,7 @@
#define OMAP3430_VC_CMD_ONLP_SHIFT 16
#define OMAP3430_VC_CMD_RET_SHIFT 8
#define OMAP3430_VC_CMD_OFF_SHIFT 0
+#define OMAP3430_SREN_MASK (1 << 4)
#define OMAP3430_HSEN_MASK (1 << 3)
#define OMAP3430_MCODE_MASK (0x7 << 0)
#define OMAP3430_VALID_MASK (1 << 24)
diff --git a/arch/arm/mach-omap2/prm-regbits-44xx.h b/arch/arm/mach-omap2/prm-regbits-44xx.h
index b1c7a33..e794828 100644
--- a/arch/arm/mach-omap2/prm-regbits-44xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-44xx.h
@@ -35,6 +35,7 @@
#define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT 1
#define OMAP4430_GLOBAL_WUEN_MASK (1 << 16)
#define OMAP4430_HSMCODE_MASK (0x7 << 0)
+#define OMAP4430_SRMODEEN_MASK (1 << 4)
#define OMAP4430_HSMODEEN_MASK (1 << 3)
#define OMAP4430_HSSCLL_SHIFT 24
#define OMAP4430_ICEPICK_RST_SHIFT 9
diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c
index c4859c4..d0b15db 100644
--- a/arch/arm/mach-omap2/prminst44xx.c
+++ b/arch/arm/mach-omap2/prminst44xx.c
@@ -87,12 +87,6 @@ u32 omap4_prminst_rmw_inst_reg_bits(u32 mask, u32 bits, u8 part, s16 inst,
return v;
}
-/*
- * Address offset (in bytes) between the reset control and the reset
- * status registers: 4 bytes on OMAP4
- */
-#define OMAP4_RST_CTRL_ST_OFFSET 4
-
/**
* omap4_prminst_is_hardreset_asserted - read the HW reset line state of
* submodules contained in the hwmod module
@@ -141,11 +135,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
* omap4_prminst_deassert_hardreset - deassert a submodule hardreset line and
* wait
* @shift: register bit shift corresponding to the reset line to deassert
- * @st_shift: status bit offset, not used for OMAP4+
+ * @st_shift: status bit offset corresponding to the reset line
* @part: PRM partition
* @inst: PRM instance offset
* @rstctrl_offs: reset register offset
- * @st_offs: reset status register offset, not used for OMAP4+
+ * @rstst_offs: reset status register offset
*
* Some IPs like dsp, ipu or iva contain processors that require an HW
* reset line to be asserted / deasserted in order to fully enable the
@@ -157,11 +151,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
* of reset, or -EBUSY if the submodule did not exit reset promptly.
*/
int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
- u16 rstctrl_offs, u16 st_offs)
+ u16 rstctrl_offs, u16 rstst_offs)
{
int c;
u32 mask = 1 << shift;
- u16 rstst_offs = rstctrl_offs + OMAP4_RST_CTRL_ST_OFFSET;
+ u32 st_mask = 1 << st_shift;
/* Check the current status to avoid de-asserting the line twice */
if (omap4_prminst_is_hardreset_asserted(shift, part, inst,
@@ -169,13 +163,13 @@ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
return -EEXIST;
/* Clear the reset status by writing 1 to the status bit */
- omap4_prminst_rmw_inst_reg_bits(0xffffffff, mask, part, inst,
+ omap4_prminst_rmw_inst_reg_bits(0xffffffff, st_mask, part, inst,
rstst_offs);
/* de-assert the reset control line */
omap4_prminst_rmw_inst_reg_bits(mask, 0, part, inst, rstctrl_offs);
/* wait the status to be set */
- omap_test_timeout(omap4_prminst_is_hardreset_asserted(shift, part, inst,
- rstst_offs),
+ omap_test_timeout(omap4_prminst_is_hardreset_asserted(st_shift, part,
+ inst, rstst_offs),
MAX_MODULE_HARDRESET_WAIT, c);
return (c == MAX_MODULE_HARDRESET_WAIT) ? -EBUSY : 0;
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index cef67af..cac46d8 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -298,14 +298,11 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
if (IS_ERR(src))
return PTR_ERR(src);
- if (clk_get_parent(timer->fclk) != src) {
- r = clk_set_parent(timer->fclk, src);
- if (r < 0) {
- pr_warn("%s: %s cannot set source\n", __func__,
- oh->name);
- clk_put(src);
- return r;
- }
+ r = clk_set_parent(timer->fclk, src);
+ if (r < 0) {
+ pr_warn("%s: %s cannot set source\n", __func__, oh->name);
+ clk_put(src);
+ return r;
}
clk_put(src);
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index be9ef83..076fd20 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm)
* idle. And we can also scale voltages to zero for off-idle.
* Note that no actual voltage scaling during off-idle will
* happen unless the board specific twl4030 PMIC scripts are
- * loaded.
+ * loaded. See also omap_vc_i2c_init for comments regarding
+ * erratum i531.
*/
val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET);
if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) {
@@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm)
return;
}
+ /*
+ * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around
+ * erratum i531 "Extra Power Consumed When Repeated Start Operation
+ * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)".
+ * Otherwise I2C4 eventually leads into about 23mW extra power being
+ * consumed even during off idle using VMODE.
+ */
i2c_high_speed = voltdm->pmic->i2c_high_speed;
if (i2c_high_speed)
- voltdm->rmw(vc->common->i2c_cfg_hsen_mask,
+ voltdm->rmw(vc->common->i2c_cfg_clear_mask,
vc->common->i2c_cfg_hsen_mask,
vc->common->i2c_cfg_reg);
diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h
index cdbdd78..89b83b7 100644
--- a/arch/arm/mach-omap2/vc.h
+++ b/arch/arm/mach-omap2/vc.h
@@ -34,6 +34,7 @@ struct voltagedomain;
* @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register
* @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register
* @i2c_cfg_reg: I2C configuration register offset
+ * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register
* @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register
* @i2c_mcode_mask: MCODE field mask for I2C config register
*
@@ -52,6 +53,7 @@ struct omap_vc_common {
u8 cmd_ret_shift;
u8 cmd_off_shift;
u8 i2c_cfg_reg;
+ u8 i2c_cfg_clear_mask;
u8 i2c_cfg_hsen_mask;
u8 i2c_mcode_mask;
};
diff --git a/arch/arm/mach-omap2/vc3xxx_data.c b/arch/arm/mach-omap2/vc3xxx_data.c
index 75bc4aa..71d74c9 100644
--- a/arch/arm/mach-omap2/vc3xxx_data.c
+++ b/arch/arm/mach-omap2/vc3xxx_data.c
@@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = {
.cmd_onlp_shift = OMAP3430_VC_CMD_ONLP_SHIFT,
.cmd_ret_shift = OMAP3430_VC_CMD_RET_SHIFT,
.cmd_off_shift = OMAP3430_VC_CMD_OFF_SHIFT,
+ .i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK,
.i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK,
.i2c_cfg_reg = OMAP3_PRM_VC_I2C_CFG_OFFSET,
.i2c_mcode_mask = OMAP3430_MCODE_MASK,
diff --git a/arch/arm/mach-omap2/vc44xx_data.c b/arch/arm/mach-omap2/vc44xx_data.c
index 085e5d6..2abd5fa 100644
--- a/arch/arm/mach-omap2/vc44xx_data.c
+++ b/arch/arm/mach-omap2/vc44xx_data.c
@@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = {
.cmd_ret_shift = OMAP4430_RET_SHIFT,
.cmd_off_shift = OMAP4430_OFF_SHIFT,
.i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET,
+ .i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK,
.i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK,
.i2c_mcode_mask = OMAP4430_HSMCODE_MASK,
};
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 8896e71..f096836 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111
config PXA310_ULPI
bool
+config PXA_SYSTEMS_CPLDS
+ tristate "Motherboard cplds"
+ default ARCH_LUBBOCK || MACH_MAINSTONE
+ help
+ This driver supports the Lubbock and Mainstone multifunction chip
+ found on the pxa25x development platform system (Lubbock) and pxa27x
+ development platform system (Mainstone). This IO board supports the
+ interrupts handling, ethernet controller, flash chips, etc ...
+
endif
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index eb0bf76..4087d33 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR) += raumfeld.o
obj-$(CONFIG_MACH_RAUMFELD_SPEAKER) += raumfeld.o
obj-$(CONFIG_MACH_ZIPIT2) += z2.o
+obj-$(CONFIG_PXA_SYSTEMS_CPLDS) += pxa_cplds_irqs.o
obj-$(CONFIG_TOSA_BT) += tosa-bt.o
diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h
index 958cd6af..1eecf79 100644
--- a/arch/arm/mach-pxa/include/mach/lubbock.h
+++ b/arch/arm/mach-pxa/include/mach/lubbock.h
@@ -37,7 +37,9 @@
#define LUB_GP __LUB_REG(LUBBOCK_FPGA_PHYS + 0x100)
/* Board specific IRQs */
-#define LUBBOCK_IRQ(x) (IRQ_BOARD_START + (x))
+#define LUBBOCK_NR_IRQS IRQ_BOARD_START
+
+#define LUBBOCK_IRQ(x) (LUBBOCK_NR_IRQS + (x))
#define LUBBOCK_SD_IRQ LUBBOCK_IRQ(0)
#define LUBBOCK_SA1111_IRQ LUBBOCK_IRQ(1)
#define LUBBOCK_USB_IRQ LUBBOCK_IRQ(2) /* usb connect */
@@ -47,8 +49,7 @@
#define LUBBOCK_USB_DISC_IRQ LUBBOCK_IRQ(6) /* usb disconnect */
#define LUBBOCK_LAST_IRQ LUBBOCK_IRQ(6)
-#define LUBBOCK_SA1111_IRQ_BASE (IRQ_BOARD_START + 16)
-#define LUBBOCK_NR_IRQS (IRQ_BOARD_START + 16 + 55)
+#define LUBBOCK_SA1111_IRQ_BASE (LUBBOCK_NR_IRQS + 32)
#ifndef __ASSEMBLY__
extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set);
diff --git a/arch/arm/mach-pxa/include/mach/mainstone.h b/arch/arm/mach-pxa/include/mach/mainstone.h
index 1bfc4e8..e82a7d3 100644
--- a/arch/arm/mach-pxa/include/mach/mainstone.h
+++ b/arch/arm/mach-pxa/include/mach/mainstone.h
@@ -120,7 +120,9 @@
#define MST_PCMCIA_PWR_VCC_50 0x4 /* voltage VCC = 5.0V */
/* board specific IRQs */
-#define MAINSTONE_IRQ(x) (IRQ_BOARD_START + (x))
+#define MAINSTONE_NR_IRQS IRQ_BOARD_START
+
+#define MAINSTONE_IRQ(x) (MAINSTONE_NR_IRQS + (x))
#define MAINSTONE_MMC_IRQ MAINSTONE_IRQ(0)
#define MAINSTONE_USIM_IRQ MAINSTONE_IRQ(1)
#define MAINSTONE_USBC_IRQ MAINSTONE_IRQ(2)
@@ -136,6 +138,4 @@
#define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14)
#define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15)
-#define MAINSTONE_NR_IRQS (IRQ_BOARD_START + 16)
-
#endif
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index d8a1be6..4ac9ab8 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -12,6 +12,7 @@
* published by the Free Software Foundation.
*/
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set)
}
EXPORT_SYMBOL(lubbock_set_misc_wr);
-static unsigned long lubbock_irq_enabled;
-
-static void lubbock_mask_irq(struct irq_data *d)
-{
- int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
- LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq));
-}
-
-static void lubbock_unmask_irq(struct irq_data *d)
-{
- int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
- /* the irq can be acknowledged only if deasserted, so it's done here */
- LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq);
- LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq));
-}
-
-static struct irq_chip lubbock_irq_chip = {
- .name = "FPGA",
- .irq_ack = lubbock_mask_irq,
- .irq_mask = lubbock_mask_irq,
- .irq_unmask = lubbock_unmask_irq,
-};
-
-static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
- unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
- do {
- /* clear our parent irq */
- desc->irq_data.chip->irq_ack(&desc->irq_data);
- if (likely(pending)) {
- irq = LUBBOCK_IRQ(0) + __ffs(pending);
- generic_handle_irq(irq);
- }
- pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
- } while (pending);
-}
-
-static void __init lubbock_init_irq(void)
-{
- int irq;
-
- pxa25x_init_irq();
-
- /* setup extra lubbock irqs */
- for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) {
- irq_set_chip_and_handler(irq, &lubbock_irq_chip,
- handle_level_irq);
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
- }
-
- irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler);
- irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void lubbock_irq_resume(void)
-{
- LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-}
-
-static struct syscore_ops lubbock_irq_syscore_ops = {
- .resume = lubbock_irq_resume,
-};
-
-static int __init lubbock_irq_device_init(void)
-{
- if (machine_is_lubbock()) {
- register_syscore_ops(&lubbock_irq_syscore_ops);
- return 0;
- }
- return -ENODEV;
-}
-
-device_initcall(lubbock_irq_device_init);
-
-#endif
-
static int lubbock_udc_is_connected(void)
{
return (LUB_MISC_RD & (1 << 9)) == 0;
@@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = {
},
};
+static struct resource lubbock_cplds_resources[] = {
+ [0] = {
+ .start = LUBBOCK_FPGA_PHYS + 0xc0,
+ .end = LUBBOCK_FPGA_PHYS + 0xe0 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = PXA_GPIO_TO_IRQ(0),
+ .end = PXA_GPIO_TO_IRQ(0),
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+ },
+ [2] = {
+ .start = LUBBOCK_IRQ(0),
+ .end = LUBBOCK_IRQ(6),
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device lubbock_cplds_device = {
+ .name = "pxa_cplds_irqs",
+ .id = -1,
+ .resource = &lubbock_cplds_resources[0],
+ .num_resources = 3,
+};
+
+
static struct platform_device *devices[] __initdata = {
&sa1111_device,
&smc91x_device,
&lubbock_flash_device[0],
&lubbock_flash_device[1],
+ &lubbock_cplds_device,
};
static struct pxafb_mode_info sharp_lm8v31_mode = {
@@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
/* Maintainer: MontaVista Software Inc. */
.map_io = lubbock_map_io,
.nr_irqs = LUBBOCK_NR_IRQS,
- .init_irq = lubbock_init_irq,
+ .init_irq = pxa25x_init_irq,
.handle_irq = pxa25x_handle_irq,
.init_time = pxa_timer_init,
.init_machine = lubbock_init,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 78b84c0..2c0658c 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -13,6 +13,7 @@
* published by the Free Software Foundation.
*/
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/syscore_ops.h>
@@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = {
GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
};
-static unsigned long mainstone_irq_enabled;
-
-static void mainstone_mask_irq(struct irq_data *d)
-{
- int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
- MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq));
-}
-
-static void mainstone_unmask_irq(struct irq_data *d)
-{
- int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
- /* the irq can be acknowledged only if deasserted, so it's done here */
- MST_INTSETCLR &= ~(1 << mainstone_irq);
- MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq));
-}
-
-static struct irq_chip mainstone_irq_chip = {
- .name = "FPGA",
- .irq_ack = mainstone_mask_irq,
- .irq_mask = mainstone_mask_irq,
- .irq_unmask = mainstone_unmask_irq,
-};
-
-static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
- unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled;
- do {
- /* clear useless edge notification */
- desc->irq_data.chip->irq_ack(&desc->irq_data);
- if (likely(pending)) {
- irq = MAINSTONE_IRQ(0) + __ffs(pending);
- generic_handle_irq(irq);
- }
- pending = MST_INTSETCLR & mainstone_irq_enabled;
- } while (pending);
-}
-
-static void __init mainstone_init_irq(void)
-{
- int irq;
-
- pxa27x_init_irq();
-
- /* setup extra Mainstone irqs */
- for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) {
- irq_set_chip_and_handler(irq, &mainstone_irq_chip,
- handle_level_irq);
- if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14))
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN);
- else
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
- }
- set_irq_flags(MAINSTONE_IRQ(8), 0);
- set_irq_flags(MAINSTONE_IRQ(12), 0);
-
- MST_INTMSKENA = 0;
- MST_INTSETCLR = 0;
-
- irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler);
- irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void mainstone_irq_resume(void)
-{
- MST_INTMSKENA = mainstone_irq_enabled;
-}
-
-static struct syscore_ops mainstone_irq_syscore_ops = {
- .resume = mainstone_irq_resume,
-};
-
-static int __init mainstone_irq_device_init(void)
-{
- if (machine_is_mainstone())
- register_syscore_ops(&mainstone_irq_syscore_ops);
-
- return 0;
-}
-
-device_initcall(mainstone_irq_device_init);
-
-#endif
-
-
static struct resource smc91x_resources[] = {
[0] = {
.start = (MST_ETH_PHYS + 0x300),
@@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = {
},
};
+static struct resource mst_cplds_resources[] = {
+ [0] = {
+ .start = MST_FPGA_PHYS + 0xc0,
+ .end = MST_FPGA_PHYS + 0xe0 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = PXA_GPIO_TO_IRQ(0),
+ .end = PXA_GPIO_TO_IRQ(0),
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+ },
+ [2] = {
+ .start = MAINSTONE_IRQ(0),
+ .end = MAINSTONE_IRQ(15),
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device mst_cplds_device = {
+ .name = "pxa_cplds_irqs",
+ .id = -1,
+ .resource = &mst_cplds_resources[0],
+ .num_resources = 3,
+};
+
static struct platform_device *platform_devices[] __initdata = {
&smc91x_device,
&mst_flash_device[0],
&mst_flash_device[1],
&mst_gpio_keys_device,
+ &mst_cplds_device,
};
static struct pxaohci_platform_data mainstone_ohci_platform_data = {
@@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
.atag_offset = 0x100, /* BLOB boot parameter setting */
.map_io = mainstone_map_io,
.nr_irqs = MAINSTONE_NR_IRQS,
- .init_irq = mainstone_init_irq,
+ .init_irq = pxa27x_init_irq,
.handle_irq = pxa27x_handle_irq,
.init_time = pxa_timer_init,
.init_machine = mainstone_init,
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
new file mode 100644
index 0000000..2385052
--- /dev/null
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -0,0 +1,200 @@
+/*
+ * Intel Reference Systems cplds
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Cplds motherboard driver, supporting lubbock and mainstone SoC board.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#define FPGA_IRQ_MASK_EN 0x0
+#define FPGA_IRQ_SET_CLR 0x10
+
+#define CPLDS_NB_IRQ 32
+
+struct cplds {
+ void __iomem *base;
+ int irq;
+ unsigned int irq_mask;
+ struct gpio_desc *gpio0;
+ struct irq_domain *irqdomain;
+};
+
+static irqreturn_t cplds_irq_handler(int in_irq, void *d)
+{
+ struct cplds *fpga = d;
+ unsigned long pending;
+ unsigned int bit;
+
+ pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
+ for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+ generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit));
+
+ return IRQ_HANDLED;
+}
+
+static void cplds_irq_mask_ack(struct irq_data *d)
+{
+ struct cplds *fpga = irq_data_get_irq_chip_data(d);
+ unsigned int cplds_irq = irqd_to_hwirq(d);
+ unsigned int set, bit = BIT(cplds_irq);
+
+ fpga->irq_mask &= ~bit;
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+ set = readl(fpga->base + FPGA_IRQ_SET_CLR);
+ writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR);
+}
+
+static void cplds_irq_unmask(struct irq_data *d)
+{
+ struct cplds *fpga = irq_data_get_irq_chip_data(d);
+ unsigned int cplds_irq = irqd_to_hwirq(d);
+ unsigned int bit = BIT(cplds_irq);
+
+ fpga->irq_mask |= bit;
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+}
+
+static struct irq_chip cplds_irq_chip = {
+ .name = "pxa_cplds",
+ .irq_mask_ack = cplds_irq_mask_ack,
+ .irq_unmask = cplds_irq_unmask,
+ .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct cplds *fpga = d->host_data;
+
+ irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq);
+ irq_set_chip_data(irq, fpga);
+
+ return 0;
+}
+
+static const struct irq_domain_ops cplds_irq_domain_ops = {
+ .xlate = irq_domain_xlate_twocell,
+ .map = cplds_irq_domain_map,
+};
+
+static int cplds_resume(struct platform_device *pdev)
+{
+ struct cplds *fpga = platform_get_drvdata(pdev);
+
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+
+ return 0;
+}
+
+static int cplds_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ struct cplds *fpga;
+ int ret;
+ int base_irq;
+ unsigned long irqflags = 0;
+
+ fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
+ if (!fpga)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (res) {
+ fpga->irq = (unsigned int)res->start;
+ irqflags = res->flags;
+ }
+ if (!fpga->irq)
+ return -ENODEV;
+
+ base_irq = platform_get_irq(pdev, 1);
+ if (base_irq < 0)
+ base_irq = 0;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ fpga->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fpga->base))
+ return PTR_ERR(fpga->base);
+
+ platform_set_drvdata(pdev, fpga);
+
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+ writel(0, fpga->base + FPGA_IRQ_SET_CLR);
+
+ ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler,
+ irqflags, dev_name(&pdev->dev), fpga);
+ if (ret == -ENOSYS)
+ return -EPROBE_DEFER;
+
+ if (ret) {
+ dev_err(&pdev->dev, "couldn't request main irq%d: %d\n",
+ fpga->irq, ret);
+ return ret;
+ }
+
+ irq_set_irq_wake(fpga->irq, 1);
+ fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+ CPLDS_NB_IRQ,
+ &cplds_irq_domain_ops, fpga);
+ if (!fpga->irqdomain)
+ return -ENODEV;
+
+ if (base_irq) {
+ ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
+ CPLDS_NB_IRQ);
+ if (ret) {
+ dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
+ base_irq, base_irq + CPLDS_NB_IRQ);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int cplds_remove(struct platform_device *pdev)
+{
+ struct cplds *fpga = platform_get_drvdata(pdev);
+
+ irq_set_chip_and_handler(fpga->irq, NULL, NULL);
+
+ return 0;
+}
+
+static const struct of_device_id cplds_id_table[] = {
+ { .compatible = "intel,lubbock-cplds-irqs", },
+ { .compatible = "intel,mainstone-cplds-irqs", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, cplds_id_table);
+
+static struct platform_driver cplds_driver = {
+ .driver = {
+ .name = "pxa_cplds_irqs",
+ .of_match_table = of_match_ptr(cplds_id_table),
+ },
+ .probe = cplds_probe,
+ .remove = cplds_remove,
+ .resume = cplds_resume,
+};
+
+module_platform_driver(cplds_driver);
+
+MODULE_DESCRIPTION("PXA Cplds interrupts driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index b07d886..b0dcbe2 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -83,6 +83,13 @@ static void rk3288_slp_mode_set(int level)
SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN
| SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE);
+ /*
+ * The dapswjdp can not auto reset before resume, that cause it may
+ * access some illegal address during resume. Let's disable it before
+ * suspend, and the MASKROM will enable it back.
+ */
+ regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE);
+
/* booting address of resuming system is from this register value */
regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR,
rk3288_bootram_phy);
diff --git a/arch/arm/mach-rockchip/pm.h b/arch/arm/mach-rockchip/pm.h
index 03ff31d..3e8d39c 100644
--- a/arch/arm/mach-rockchip/pm.h
+++ b/arch/arm/mach-rockchip/pm.h
@@ -55,6 +55,10 @@ static inline void rockchip_suspend_init(void)
#define SGRF_FAST_BOOT_EN BIT(8)
#define SGRF_FAST_BOOT_EN_WRITE BIT(24)
+#define RK3288_SGRF_CPU_CON0 (0x40)
+#define SGRF_DAPDEVICEEN BIT(0)
+#define SGRF_DAPDEVICEEN_WRITE BIT(16)
+
#define RK3288_CRU_MODE_CON 0x50
#define RK3288_CRU_SEL0_CON 0x60
#define RK3288_CRU_SEL1_CON 0x64
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d360ec0..b6cf3b4 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -30,11 +30,30 @@
#include "pm.h"
#define RK3288_GRF_SOC_CON0 0x244
+#define RK3288_TIMER6_7_PHYS 0xff810000
static void __init rockchip_timer_init(void)
{
if (of_machine_is_compatible("rockchip,rk3288")) {
struct regmap *grf;
+ void __iomem *reg_base;
+
+ /*
+ * Most/all uboot versions for rk3288 don't enable timer7
+ * which is needed for the architected timer to work.
+ * So make sure it is running during early boot.
+ */
+ reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K);
+ if (reg_base) {
+ writel(0, reg_base + 0x30);
+ writel(0xffffffff, reg_base + 0x20);
+ writel(0xffffffff, reg_base + 0x24);
+ writel(1, reg_base + 0x30);
+ dsb();
+ iounmap(reg_base);
+ } else {
+ pr_err("rockchip: could not map timer7 registers\n");
+ }
/*
* Disable auto jtag/sdmmc switching that causes issues
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 09c5fe3..7e7583d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = {
* arm_iommu_attach_device function.
*/
struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
{
unsigned int bits = size >> PAGE_SHIFT;
unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
int extensions = 1;
int err = -ENOMEM;
+ /* currently only 32-bit DMA address space is supported */
+ if (size > DMA_BIT_MASK(32) + 1)
+ return ERR_PTR(-ERANGE);
+
if (!bitmap_size)
return ERR_PTR(-EINVAL);
@@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
if (!iommu)
return false;
- /*
- * currently arm_iommu_create_mapping() takes a max of size_t
- * for size param. So check this limit for now.
- */
- if (size > SIZE_MAX)
- return false;
-
mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
if (IS_ERR(mapping)) {
pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e6ef89..7186382 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
}
/*
- * Find the first non-section-aligned page, and point
+ * Find the first non-pmd-aligned page, and point
* memblock_limit at it. This relies on rounding the
- * limit down to be section-aligned, which happens at
- * the end of this function.
+ * limit down to be pmd-aligned, which happens at the
+ * end of this function.
*
* With this algorithm, the start or end of almost any
- * bank can be non-section-aligned. The only exception
- * is that the start of the bank 0 must be section-
+ * bank can be non-pmd-aligned. The only exception is
+ * that the start of the bank 0 must be section-
* aligned, since otherwise memory would need to be
* allocated when mapping the start of bank 0, which
* occurs before any free memory is mapped.
*/
if (!memblock_limit) {
- if (!IS_ALIGNED(block_start, SECTION_SIZE))
+ if (!IS_ALIGNED(block_start, PMD_SIZE))
memblock_limit = block_start;
- else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+ else if (!IS_ALIGNED(block_end, PMD_SIZE))
memblock_limit = arm_lowmem_limit;
}
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
high_memory = __va(arm_lowmem_limit - 1) + 1;
/*
- * Round the memblock limit down to a section size. This
+ * Round the memblock limit down to a pmd size. This
* helps to ensure that we will allocate memory from the
- * last full section, which should be mapped.
+ * last full pmd, which should be mapped.
*/
if (memblock_limit)
- memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+ memblock_limit = round_down(memblock_limit, PMD_SIZE);
if (!memblock_limit)
memblock_limit = arm_lowmem_limit;
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index aa0519e..774ef13 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -22,8 +22,6 @@
*
* These are the low level assembler for performing cache and TLB
* functions on the arm1020.
- *
- * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
*/
#include <linux/linkage.h>
#include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index bff4c7f..ae3c27b 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -22,8 +22,6 @@
*
* These are the low level assembler for performing cache and TLB
* functions on the arm1020e.
- *
- * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
*/
#include <linux/linkage.h>
#include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ede8c54..32a47cc 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext)
.type __arm925_setup, #function
__arm925_setup:
mov r0, #0
-#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
- orr r0,r0,#1 << 7
-#endif
/* Transparent on, D-cache clean & flush mode. See NOTE2 above */
orr r0,r0,#1 << 1 @ transparent mode on
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index e494d6d..92e08bf 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -602,7 +602,6 @@ __\name\()_proc_info:
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
initfn __feroceon_setup, __\name\()_proc_info
- .long __feroceon_setup
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e1268f9..e0e2358 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -54,6 +54,7 @@
#define SEEN_DATA (1 << (BPF_MEMWORDS + 3))
#define FLAG_NEED_X_RESET (1 << 0)
+#define FLAG_IMM_OVERFLOW (1 << 1)
struct jit_ctx {
const struct bpf_prog *skf;
@@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
/* PC in ARM mode == address of the instruction + 8 */
imm = offset - (8 + ctx->idx * 4);
+ if (imm & ~0xfff) {
+ /*
+ * literal pool is too far, signal it into flags. we
+ * can only detect it on the second pass unfortunately.
+ */
+ ctx->flags |= FLAG_IMM_OVERFLOW;
+ return 0;
+ }
+
return imm;
}
@@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
return;
}
#endif
- if (rm != ARM_R0)
- emit(ARM_MOV_R(ARM_R0, rm), ctx);
+
+ /*
+ * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
+ * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
+ * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
+ * before using it as a source for ARM_R1.
+ *
+ * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
+ * ARM_R5 (r_X) so there is no particular register overlap
+ * issues.
+ */
if (rn != ARM_R1)
emit(ARM_MOV_R(ARM_R1, rn), ctx);
+ if (rm != ARM_R0)
+ emit(ARM_MOV_R(ARM_R0, rm), ctx);
ctx->seen |= SEEN_CALL;
emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
@@ -855,6 +876,14 @@ b_epilogue:
default:
return -1;
}
+
+ if (ctx->flags & FLAG_IMM_OVERFLOW)
+ /*
+ * this instruction generated an overflow when
+ * trying to access the literal pool, so
+ * delegate this filter to the kernel interpreter.
+ */
+ return -1;
}
/* compute offsets only during the first pass */
@@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp)
ctx.idx = 0;
build_prologue(&ctx);
- build_body(&ctx);
+ if (build_body(&ctx) < 0) {
+#if __LINUX_ARM_ARCH__ < 7
+ if (ctx.imm_count)
+ kfree(ctx.imms);
+#endif
+ bpf_jit_binary_free(header);
+ goto out;
+ }
build_epilogue(&ctx);
flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 224081c..7d0f070 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
void xen_arch_post_suspend(int suspend_cancelled) { }
void xen_timer_resume(void) { }
void xen_arch_resume(void) { }
+void xen_arch_suspend(void) { }
/* In the hypervisor.S file. */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d..4983250 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/export.h>
+#include <linux/memblock.h>
#include <linux/of_address.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -21,6 +22,20 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+ struct memblock_region *reg;
+ gfp_t flags = __GFP_NOWARN;
+
+ for_each_memblock(memory, reg) {
+ if (reg->base < (phys_addr_t)0xffffffff) {
+ flags |= __GFP_DMA;
+ break;
+ }
+ }
+ return __get_free_pages(flags, order);
+}
+
enum dma_cache_op {
DMA_UNMAP,
DMA_MAP,
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
index c138b95..351c95b 100644
--- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -21,6 +21,20 @@
clock-output-names = "juno_mb:clk25mhz";
};
+ v2m_refclk1mhz: refclk1mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000>;
+ clock-output-names = "juno_mb:refclk1mhz";
+ };
+
+ v2m_refclk32khz: refclk32khz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "juno_mb:refclk32khz";
+ };
+
motherboard {
compatible = "arm,vexpress,v2p-p1", "simple-bus";
#address-cells = <2>; /* SMB chipselect number and offset */
@@ -66,6 +80,15 @@
#size-cells = <1>;
ranges = <0 3 0 0x200000>;
+ v2m_sysctl: sysctl@020000 {
+ compatible = "arm,sp810", "arm,primecell";
+ reg = <0x020000 0x1000>;
+ clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>;
+ clock-names = "refclk", "timclk", "apb_pclk";
+ #clock-cells = <1>;
+ clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+ };
+
mmci@050000 {
compatible = "arm,pl180", "arm,primecell";
reg = <0x050000 0x1000>;
@@ -106,16 +129,16 @@
compatible = "arm,sp804", "arm,primecell";
reg = <0x110000 0x10000>;
interrupts = <9>;
- clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
- clock-names = "timclken1", "apb_pclk";
+ clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
};
v2m_timer23: timer@120000 {
compatible = "arm,sp804", "arm,primecell";
reg = <0x120000 0x10000>;
interrupts = <9>;
- clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
- clock-names = "timclken1", "apb_pclk";
+ clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
};
rtc@170000 {
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
index 9499199..6a37c3c 100644
--- a/arch/arm64/crypto/crc32-arm64.c
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+ put_unaligned_le32(ctx->crc, out);
+ return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
- put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out);
+ put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
return 0;
}
@@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+ mctx->key = 0;
+ return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
mctx->key = ~0;
return 0;
}
@@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = {
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksumc_update,
- .final = chksum_final,
+ .final = chksumc_final,
.finup = chksumc_finup,
.digest = chksumc_digest,
.descsize = sizeof(struct chksum_desc_ctx),
@@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = {
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
- .cra_init = crc32_cra_init,
+ .cra_init = crc32c_cra_init,
}
};
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 114e7cc..aefda98 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+ sctx->finalize = 0;
kernel_neon_begin_partial(16);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 1340e44c..7cd5875 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+ sctx->finalize = 0;
kernel_neon_begin_partial(28);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4..0fa47c4 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop");
#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 540f7c0..7116d39 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
+#define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define iounmap __iounmap
/*
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 21033bb..28f8365 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,7 +24,6 @@
#include <asm/cacheflush.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/insn.h>
#include <linux/stop_machine.h>
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -34,48 +33,6 @@ struct alt_region {
struct alt_instr *end;
};
-/*
- * Decode the imm field of a b/bl instruction, and return the byte
- * offset as a signed value (so it can be used when computing a new
- * branch target).
- */
-static s32 get_branch_offset(u32 insn)
-{
- s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-
- /* sign-extend the immediate before turning it into a byte offset */
- return (imm << 6) >> 4;
-}
-
-static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
-{
- u32 insn;
-
- aarch64_insn_read(altinsnptr, &insn);
-
- /* Stop the world on instructions we don't support... */
- BUG_ON(aarch64_insn_is_cbz(insn));
- BUG_ON(aarch64_insn_is_cbnz(insn));
- BUG_ON(aarch64_insn_is_bcond(insn));
- /* ... and there is probably more. */
-
- if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
- enum aarch64_insn_branch_type type;
- unsigned long target;
-
- if (aarch64_insn_is_b(insn))
- type = AARCH64_INSN_BRANCH_NOLINK;
- else
- type = AARCH64_INSN_BRANCH_LINK;
-
- target = (unsigned long)altinsnptr + get_branch_offset(insn);
- insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
- target, type);
- }
-
- return insn;
-}
-
static int __apply_alternatives(void *alt_region)
{
struct alt_instr *alt;
@@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region)
u8 *origptr, *replptr;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i;
-
if (!cpus_have_cap(alt->cpufeature))
continue;
@@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region)
origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-
- for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
- insn = get_alt_insn(origptr + i, replptr + i);
- aarch64_insn_write(origptr + i, insn);
- }
-
+ memcpy(origptr, replptr, alt->alt_len);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + alt->alt_len));
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 23f25ac..cce18c8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev)
if (!cpu_pmu)
return -ENODEV;
- irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
- if (!irqs)
- return -ENOMEM;
-
/* Don't bother with PPIs; they're already affine */
irq = platform_get_irq(pdev, 0);
if (irq >= 0 && irq_is_percpu(irq))
return 0;
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ if (!irqs)
+ return -ENOMEM;
+
for (i = 0; i < pdev->num_resources; ++i) {
struct device_node *dn;
int cpu;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 74c2567..f3d6221 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -328,10 +328,12 @@ static int ptdump_init(void)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
address_markers[VMEMMAP_START_NR].start_address =
(unsigned long)virt_to_page(PAGE_OFFSET);
address_markers[VMEMMAP_END_NR].start_address =
(unsigned long)virt_to_page(high_memory);
+#endif
pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
&ptdump_fops);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index edba042..dc6a484 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -487,7 +487,7 @@ emit_cond_jmp:
return -EINVAL;
}
- imm64 = (u64)insn1.imm << 32 | imm;
+ imm64 = (u64)insn1.imm << 32 | (u32)imm;
emit_a64_mov_i64(dst, imm64, ctx);
return 1;
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6ae..366bbea 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h
index 4f5ec2b..e998ff5 100644
--- a/arch/avr32/include/asm/io.h
+++ b/arch/avr32/include/asm/io.h
@@ -296,6 +296,7 @@ extern void __iounmap(void __iomem *addr);
__iounmap(addr)
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
#define cached(addr) P1SEGADDR(addr)
#define uncached(addr) P2SEGADDR(addr)
diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h
index 0b78bc8..a31b63e 100644
--- a/arch/frv/include/asm/io.h
+++ b/arch/frv/include/asm/io.h
@@ -17,6 +17,8 @@
#ifdef __KERNEL__
+#define ARCH_HAS_IOREMAP_WT
+
#include <linux/types.h>
#include <asm/virtconvert.h>
#include <asm/string.h>
@@ -265,7 +267,7 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon
return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
}
-static inline void __iomem *ioremap_writethrough(unsigned long physaddr, unsigned long size)
+static inline void __iomem *ioremap_wt(unsigned long physaddr, unsigned long size)
{
return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
}
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e78029..a6e34e2 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
* looks just like atomic_cmpxchg on our arch currently with a bunch of
* variable casting.
*/
-#define __HAVE_ARCH_CMPXCHG 1
#define cmpxchg(ptr, old, new) \
({ \
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb..843ba43 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
___p1; \
})
-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet. Grrr...
- */
-#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h
index e3b3556..a8687b1 100644
--- a/arch/ia64/include/asm/irq_remapping.h
+++ b/arch/ia64/include/asm/irq_remapping.h
@@ -1,6 +1,4 @@
#ifndef __IA64_INTR_REMAPPING_H
#define __IA64_INTR_REMAPPING_H
#define irq_remapping_enabled 0
-#define dmar_alloc_hwirq create_irq
-#define dmar_free_hwirq destroy_irq
#endif
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b..a0e3620 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
* indicated by comparing RETURN with OLD.
*/
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* This function doesn't exist, so you'll get a linker error
* if something tries to do an invalid cmpxchg().
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 9dd7464..d70bf15 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -165,7 +165,7 @@ static struct irq_chip dmar_msi_type = {
.irq_retrigger = ia64_msi_retrigger_irq,
};
-static int
+static void
msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg + irq;
@@ -186,21 +186,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
- return 0;
}
-int arch_setup_dmar_msi(unsigned int irq)
+int dmar_alloc_hwirq(int id, int node, void *arg)
{
- int ret;
+ int irq;
struct msi_msg msg;
- ret = msi_compose_msg(NULL, irq, &msg);
- if (ret < 0)
- return ret;
- dmar_msi_write(irq, &msg);
- irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
- return 0;
+ irq = create_irq();
+ if (irq > 0) {
+ irq_set_handler_data(irq, arg);
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type,
+ handle_edge_irq, "edge");
+ msi_compose_msg(NULL, irq, &msg);
+ dmar_msi_write(irq, &msg);
+ }
+
+ return irq;
+}
+
+void dmar_free_hwirq(int irq)
+{
+ irq_set_handler_data(irq, NULL);
+ destroy_irq(irq);
}
#endif /* CONFIG_INTEL_IOMMU */
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index d4e162d..7cc3be9 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- struct pci_controller *controller = bridge->bus->sysdata;
-
- ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+ /*
+ * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+ * here, pci_create_root_bus() has been called by someone else and
+ * sysdata is likely to be different from what we expect. Let it go in
+ * that case.
+ */
+ if (!bridge->dev.parent) {
+ struct pci_controller *controller = bridge->bus->sysdata;
+ ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+ }
return 0;
}
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db..14bf9b7 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
{
diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h
index 9cc00db..0c3f25e 100644
--- a/arch/m32r/include/asm/io.h
+++ b/arch/m32r/include/asm/io.h
@@ -68,6 +68,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
extern void iounmap(volatile void __iomem *addr);
#define ioremap_nocache(off,size) ioremap(off,size)
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
/*
* IO bus memory addresses are also 1:1 with the physical address
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index ce7aea3..c18ddc7 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0;
/*
* For flush_tlb_others()
*/
-static volatile cpumask_t flush_cpumask;
+static cpumask_t flush_cpumask;
static struct mm_struct *flush_mm;
static struct vm_area_struct *flush_vma;
static volatile unsigned long flush_va;
@@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
*/
send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
- while (!cpumask_empty((cpumask_t*)&flush_cpumask)) {
+ while (!cpumask_empty(&flush_cpumask)) {
/* nothing. lockup detection does not belong here */
mb();
}
@@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void)
__flush_tlb_page(va);
}
}
- cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask);
+ cpumask_clear_cpu(cpu_id, &flush_cpumask);
}
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc..83b1df8 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
* indicated by comparing RETURN with OLD.
*/
#ifdef CONFIG_RMW_INSNS
-#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
unsigned long new, int size)
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 8955b40..618c85d3 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -20,6 +20,8 @@
#ifdef __KERNEL__
+#define ARCH_HAS_IOREMAP_WT
+
#include <linux/compiler.h>
#include <asm/raw_io.h>
#include <asm/virtconvert.h>
@@ -465,7 +467,7 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon
{
return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
}
-static inline void __iomem *ioremap_writethrough(unsigned long physaddr,
+static inline void __iomem *ioremap_wt(unsigned long physaddr,
unsigned long size)
{
return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index a93c8cd..ad7bd40 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#define ARCH_HAS_IOREMAP_WT
+
#include <asm/virtconvert.h>
#include <asm-generic/iomap.h>
@@ -153,7 +155,7 @@ static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size)
{
return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
}
-static inline void *ioremap_writethrough(unsigned long physaddr, unsigned long size)
+static inline void *ioremap_wt(unsigned long physaddr, unsigned long size)
{
return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
}
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e..5a696e5 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be..be29e3e 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old;
}
-#define __HAVE_ARCH_CMPXCHG 1
-
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
index d5779b0..9890f21 100644
--- a/arch/metag/include/asm/io.h
+++ b/arch/metag/include/asm/io.h
@@ -160,6 +160,9 @@ extern void __iounmap(void __iomem *addr);
#define ioremap_wc(offset, size) \
__ioremap((offset), (size), _PAGE_WR_COMBINE)
+#define ioremap_wt(offset, size) \
+ __ioremap((offset), (size), 0)
+
#define iounmap(addr) \
__iounmap(addr)
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 940f5fc..39b6315 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -39,10 +39,10 @@ extern resource_size_t isa_mem_base;
extern void iounmap(void __iomem *addr);
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-#define ioremap_writethrough(addr, size) ioremap((addr), (size))
#define ioremap_nocache(addr, size) ioremap((addr), (size))
#define ioremap_fullcache(addr, size) ioremap((addr), (size))
#define ioremap_wc(addr, size) ioremap((addr), (size))
+#define ioremap_wt(addr, size) ioremap((addr), (size))
#endif /* CONFIG_MMU */
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5200f64..ae2dd59 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -277,7 +277,7 @@ LDFLAGS += -m $(ld-emul)
ifdef CONFIG_MIPS
CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
- sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
+ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
ifdef CONFIG_64BIT
CHECKFLAGS += -m64
endif
diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c
index e1fe630..597899a 100644
--- a/arch/mips/ath79/prom.c
+++ b/arch/mips/ath79/prom.c
@@ -1,6 +1,7 @@
/*
* Atheros AR71XX/AR724X/AR913X specific prom routines
*
+ * Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
* Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
* Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
*
@@ -25,12 +26,14 @@ void __init prom_init(void)
{
fw_init_cmdline();
+#ifdef CONFIG_BLK_DEV_INITRD
/* Read the initrd address from the firmware environment */
initrd_start = fw_getenvl("initrd_start");
if (initrd_start) {
initrd_start = KSEG0ADDR(initrd_start);
initrd_end = initrd_start + fw_getenvl("initrd_size");
}
+#endif
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 0026806..b2a577e 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
CONFIG_USB_C67X00_HCD=m
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=m
CONFIG_USB_R8A66597_HCD=m
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbc..7ecba84 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
#define __WEAK_LLSC_MB " \n"
#endif
-#define set_mb(var, value) \
- do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) \
+ do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945..b71ab4a 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
})
-#define __HAVE_ARCH_CMPXCHG 1
-
#define __cmpxchg_asm(ld, st, m, old, new) \
({ \
__typeof(*(m)) __ret; \
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a594d8e..f19e890 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -304,7 +304,7 @@ do { \
\
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \
+ current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
} while (0)
#endif /* CONFIG_32BIT */
@@ -366,7 +366,7 @@ do { \
else \
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \
+ current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
\
p = personality(current->personality); \
if (p != PER_LINUX32 && p != PER_LINUX) \
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bb02fac..2b25d1b 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_DUMP 0x8
#define SMP_ASK_C0COUNT 0x10
-extern volatile cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callin_map;
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index be4899f..4a4d9e0 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
/* Lets see if this is an O32 ELF */
if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
- /* FR = 1 for N32 */
- if (ehdr32->e_flags & EF_MIPS_ABI2)
- state->overall_fp_mode = FP_FR1;
- else
- /* Set a good default FPU mode for O32 */
- state->overall_fp_mode = cpu_has_mips_r6 ?
- FP_FRE : FP_FR0;
-
if (ehdr32->e_flags & EF_MIPS_FP64) {
/*
* Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
@@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
(char *)&abiflags,
sizeof(abiflags));
} else {
- /* FR=1 is really the only option for 64-bit */
- state->overall_fp_mode = FP_FR1;
-
if (phdr64->p_type != PT_MIPS_ABIFLAGS)
return 0;
if (phdr64->p_filesz < sizeof(abiflags))
@@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
struct elf32_hdr *ehdr = _ehdr;
struct mode_req prog_req, interp_req;
int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
+ bool is_mips64;
if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
return 0;
@@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
abi0 = abi1 = fp_abi;
}
- /* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
- max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
- (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
- MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
+ is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
+ (ehdr->e_flags & EF_MIPS_ABI2);
+
+ if (is_mips64) {
+ /* MIPS64 code always uses FR=1, thus the default is easy */
+ state->overall_fp_mode = FP_FR1;
+
+ /* Disallow access to the various FPXX & FP64 ABIs */
+ max_abi = MIPS_ABI_FP_SOFT;
+ } else {
+ /* Default to a mode capable of running code expecting FR=0 */
+ state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+
+ /* Allow all ABIs we know about */
+ max_abi = MIPS_ABI_FP_64A;
+ }
if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
(abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index d2bfbc2..51f57d8 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -29,7 +29,7 @@
int kgdb_early_setup;
#endif
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
int allocate_irqno(void)
{
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d544e77..e933a30 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -176,7 +176,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
__get_user(value, data + 64);
fcr31 = child->thread.fpu.fcr31;
- mask = current_cpu_data.fpu_msk31;
+ mask = boot_cpu_data.fpu_msk31;
child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
/* FIR may not be written. */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index fd528d7..336708a 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
static void bmips_wr_vec(unsigned long dst, char *start, char *end)
{
memcpy((void *)dst, start, end - start);
- dma_cache_wback((unsigned long)start, end - start);
+ dma_cache_wback(dst, end - start);
local_flush_icache_range(dst, dst + (end - start));
instruction_hazard();
}
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 7e011f9..4251d39 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -92,7 +92,7 @@ static void __init cps_smp_setup(void)
#ifdef CONFIG_MIPS_MT_FPAFF
/* If we have an FPU, enroll ourselves in the FPU-full mask */
if (cpu_has_fpu)
- cpu_set(0, mt_fpu_cpumask);
+ cpumask_set_cpu(0, &mt_fpu_cpumask);
#endif /* CONFIG_MIPS_MT_FPAFF */
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 193ace7..faa46eb 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -43,7 +43,7 @@
#include <asm/time.h>
#include <asm/setup.h>
-volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
+cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
EXPORT_SYMBOL(__cpu_number_map);
@@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
/*
* Trust is futile. We should really have timeouts ...
*/
- while (!cpumask_test_cpu(cpu, &cpu_callin_map))
+ while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
udelay(100);
+ schedule();
+ }
synchronise_count_master(cpu);
return 0;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ba32e48..d2d1c19 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -269,7 +269,6 @@ static void __show_regs(const struct pt_regs *regs)
*/
printk("epc : %0*lx %pS\n", field, regs->cp0_epc,
(void *) regs->cp0_epc);
- printk(" %s\n", print_tainted());
printk("ra : %0*lx %pS\n", field, regs->regs[31],
(void *) regs->regs[31]);
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 6230f37..4b50c57 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -2389,7 +2389,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
{
unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
enum emulation_result er = EMULATE_DONE;
- unsigned long curr_pc;
if (run->mmio.len > sizeof(*gpr)) {
kvm_err("Bad MMIO length: %d", run->mmio.len);
@@ -2397,11 +2396,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
goto done;
}
- /*
- * Update PC and hold onto current PC in case there is
- * an error and we want to rollback the PC
- */
- curr_pc = vcpu->arch.pc;
er = update_pc(vcpu, vcpu->arch.pending_load_cause);
if (er == EMULATE_FAIL)
return er;
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 7d12c0d..77e6494 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
FEXPORT(__strnlen_\func\()_nocheck_asm)
move v0, a0
PTR_ADDU a1, a0 # stop pointer
-1: beq v0, a1, 1f # limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+ .set noat
+ li AT, 1
+#endif
+ beq v0, a1, 1f # limit reached?
.ifeqs "\func", "kernel"
EX(lb, t0, (v0), .Lfault\@)
.else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
.endif
.set noreorder
bnez t0, 1b
-1: PTR_ADDIU v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+ PTR_ADDIU v0, 1
+#else
+ PTR_ADDU v0, AT
+ .set at
+#endif
.set reorder
PTR_SUBU v0, a0
jr ra
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d31c537..22b9b2c 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -889,7 +889,7 @@ static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
break;
case FPCREG_RID:
- value = current_cpu_data.fpu_id;
+ value = boot_cpu_data.fpu_id;
break;
default:
@@ -921,7 +921,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
(void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
/* Preserve read-only bits. */
- mask = current_cpu_data.fpu_msk31;
+ mask = boot_cpu_data.fpu_msk31;
fcr31 = (value & ~mask) | (fcr31 & mask);
break;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index a27a088..08318ec 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -495,7 +495,7 @@ static void r4k_tlb_configure(void)
if (cpu_has_rixi) {
/*
- * Enable the no read, no exec bits, and enable large virtual
+ * Enable the no read, no exec bits, and enable large physical
* address.
*/
#ifdef CONFIG_64BIT
diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 0134db2..5a2a821 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -130,9 +130,9 @@ struct platform_device ip32_rtc_device = {
.resource = ip32_rtc_resources,
};
-+static int __init sgio2_rtc_devinit(void)
+static __init int sgio2_rtc_devinit(void)
{
return platform_device_register(&ip32_rtc_device);
}
-device_initcall(sgio2_cmos_devinit);
+device_initcall(sgio2_rtc_devinit);
diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h
index cc4a2ba..07c5b4a 100644
--- a/arch/mn10300/include/asm/io.h
+++ b/arch/mn10300/include/asm/io.h
@@ -282,6 +282,7 @@ static inline void __iomem *ioremap_nocache(unsigned long offset, unsigned long
}
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
static inline void iounmap(void __iomem *addr)
{
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 6e24d7c..c5a62da 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -46,6 +46,7 @@ static inline void iounmap(void __iomem *addr)
}
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
/* Pages to physical address... */
#define page_to_phys(page) virt_to_phys(page_to_virt(page))
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd1335..0a90b96 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
/* bug catcher for when unsupported size is used - won't link */
extern void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3391d06..78c9fd3 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -348,6 +348,10 @@ struct pt_regs; /* forward declaration... */
#define ELF_HWCAP 0
+#define STACK_RND_MASK (is_32bit_task() ? \
+ 0x7ff >> (PAGE_SHIFT - 12) : \
+ 0x3ffff >> (PAGE_SHIFT - 12))
+
struct mm_struct;
extern unsigned long arch_randomize_brk(struct mm_struct *);
#define arch_randomize_brk arch_randomize_brk
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8a488c2..809905a 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -181,9 +181,12 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
return 1;
}
+/*
+ * Copy architecture-specific thread state
+ */
int
copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long arg, struct task_struct *p)
+ unsigned long kthread_arg, struct task_struct *p)
{
struct pt_regs *cregs = &(p->thread.regs);
void *stack = task_stack_page(p);
@@ -195,11 +198,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
extern void * const child_return;
if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
memset(cregs, 0, sizeof(struct pt_regs));
if (!usp) /* idle thread */
return 0;
-
- /* kernel thread */
/* Must exit via ret_from_kernel_thread in order
* to call schedule_tail()
*/
@@ -215,7 +217,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
#else
cregs->gr[26] = usp;
#endif
- cregs->gr[25] = arg;
+ cregs->gr[25] = kthread_arg;
} else {
/* user thread */
/* usp must be word aligned. This also prevents users from
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e1ffea2..5aba01a 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,6 +77,9 @@ static unsigned long mmap_upper_limit(void)
if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX;
+ /* Add space for stack randomization. */
+ stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
return PAGE_ALIGN(STACK_TOP - stack_base);
}
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be..39505d6 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68..ad6263c 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-#define __HAVE_ARCH_CMPXCHG 1
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 15c99b6..b2eb468 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
uint64_t nip, uint64_t addr)
{
uint64_t srr1;
- int index = __this_cpu_inc_return(mce_nest_count);
+ int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
/*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
- index = __this_cpu_inc_return(mce_queue_count);
+ index = __this_cpu_inc_return(mce_queue_count) - 1;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
__this_cpu_dec(mce_queue_count);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index f096e72..1db6851 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -213,6 +213,7 @@ SECTIONS
*(.opd)
}
+ . = ALIGN(256);
.got : AT(ADDR(.got) - LOAD_OFFSET) {
__toc_start = .;
#ifndef CONFIG_RELOCATABLE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d..df81caa 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu, *vnext;
int i;
int srcu_idx;
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ce968b..3385e3d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
- pte_t *ptep;
- struct page *page;
+ pte_t *ptep, pte;
unsigned shift;
unsigned long mask, flags;
+ struct page *page = ERR_PTR(-EINVAL);
+
+ local_irq_save(flags);
+ ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ if (!ptep)
+ goto no_page;
+ pte = READ_ONCE(*ptep);
/*
+ * Verify it is a huge page else bail.
* Transparent hugepages are handled by generic code. We can skip them
* here.
*/
- local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
+ goto no_page;
- /* Verify it is a huge page else bail. */
- if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
- local_irq_restore(flags);
- return ERR_PTR(-EINVAL);
+ if (!pte_present(pte)) {
+ page = NULL;
+ goto no_page;
}
mask = (1UL << shift) - 1;
- page = pte_page(*ptep);
+ page = pte_page(pte);
if (page)
page += (address & mask) / PAGE_SIZE;
+no_page:
local_irq_restore(flags);
return page;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 59daa5e..6bfadf1 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
* hash fault look at them.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
+ /*
+ * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_linux_pte_or_hugepage to finish.
+ */
+ kick_all_cpus_sync();
return old_pmd;
}
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 7940dc9..b258110 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -16,11 +16,12 @@
#define GHASH_DIGEST_SIZE 16
struct ghash_ctx {
- u8 icv[16];
- u8 key[16];
+ u8 key[GHASH_BLOCK_SIZE];
};
struct ghash_desc_ctx {
+ u8 icv[GHASH_BLOCK_SIZE];
+ u8 key[GHASH_BLOCK_SIZE];
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
memset(dctx, 0, sizeof(*dctx));
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
return 0;
}
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
}
memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
- memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
return 0;
}
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
unsigned int n;
u8 *buf = dctx->buffer;
int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
src += n;
if (!dctx->bytes) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
n = srclen & ~(GHASH_BLOCK_SIZE - 1);
if (n) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
if (ret != n)
return -EIO;
src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
return 0;
}
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
{
u8 *buf = dctx->buffer;
int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
memset(pos, 0, dctx->bytes);
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
+
+ dctx->bytes = 0;
}
- dctx->bytes = 0;
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
int ret;
- ret = ghash_flush(ctx, dctx);
+ ret = ghash_flush(dctx);
if (!ret)
- memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return ret;
}
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1f374b3..9d5192c 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
/* fill page with urandom bytes */
get_random_bytes(pg, PAGE_SIZE);
/* exor page with stckf values */
- for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+ for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
u64 *p = ((u64 *)pg) + n;
*p ^= get_tod_clock_fast();
}
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d72471..e6f8615 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec4..411464f 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
__old; \
})
-#define __HAVE_ARCH_CMPXCHG
-
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 30fd5c8..cb5fdf3 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fc64239..ef24a21 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
}
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long origin_mask;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7690dc8..20c146d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -443,8 +443,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
/*
* Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
*/
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
@@ -588,8 +591,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
- case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -602,10 +605,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* llgfr %dst,%src (u32 cast) */
- EMIT4(0xb9160000, dst_reg, src_reg);
/* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, dst_reg);
+ EMIT4(0xb9870000, REG_W0, src_reg);
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -632,8 +633,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
- case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -649,7 +650,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9040000, REG_W1, dst_reg);
/* dlg %w0,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64((u32) imm));
+ EMIT_CONST_U64(imm));
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839..cc3f642 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
(unsigned long)(o), \
(unsigned long)(n)))
-#define __HAVE_ARCH_CMPXCHG 1
-
#include <asm-generic/cmpxchg-local.h>
#endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 4371530..bf91037 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd140..85c97b18 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
* if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 7664894..809941e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define dma_rmb() rmb()
#define dma_wmb() wmb()
-#define set_mb(__var, __value) \
- do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+#define smp_store_mb(__var, __value) \
+ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
#ifdef CONFIG_SMP
#define smp_mb() mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52d..83ffb83 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
*
* Cribbed from <asm-parisc/atomic.h>
*/
-#define __HAVE_ARCH_CMPXCHG 1
/* bug catcher for when unsupported size is used - won't link */
void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6c..faa2f61 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
#include <asm-generic/cmpxchg-local.h>
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile int *m, int old, int new)
{
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6e424d..a6cfdab 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,7 +24,8 @@ typedef struct {
unsigned int icache_line_size;
unsigned int ecache_size;
unsigned int ecache_line_size;
- int core_id;
+ unsigned short sock_id;
+ unsigned short core_id;
int proc_id;
} cpuinfo_sparc;
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 407ac14..57f26c3 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -129,6 +129,7 @@ static inline void sbus_memcpy_toio(volatile void __iomem *dst,
void __iomem *ioremap(unsigned long offset, unsigned long size);
#define ioremap_nocache(X,Y) ioremap((X),(Y))
#define ioremap_wc(X,Y) ioremap((X),(Y))
+#define ioremap_wt(X,Y) ioremap((X),(Y))
void iounmap(volatile void __iomem *addr);
/* Create a virtual mapping cookie for an IO port range */
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 50d4840..c32fa3f 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -402,6 +402,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
#define ioremap_nocache(X,Y) ioremap((X),(Y))
#define ioremap_wc(X,Y) ioremap((X),(Y))
+#define ioremap_wt(X,Y) ioremap((X),(Y))
static inline void iounmap(volatile void __iomem *addr)
{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index dc165eb..2a52c91 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
" sllx %1, 32, %1\n"
" or %0, %1, %0\n"
" .previous\n"
+ " .section .sun_m7_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sethi %%hi(%4), %0\n"
+ " .word 662b\n"
+ " or %1, %%ulo(%4), %1\n"
+ " or %0, %%lo(%4), %0\n"
+ " .word 663b\n"
+ " sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
+ _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
+ "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+ _PAGE_CP_4V | _PAGE_E_4V |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
" andn %0, %4, %0\n"
" or %0, %5, %0\n"
" .previous\n"
+ " .section .sun_m7_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " andn %0, %6, %0\n"
+ " or %0, %5, %0\n"
+ " .previous\n"
: "=r" (val)
: "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
- "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+ "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
+ "i" (_PAGE_CP_4V));
return __pgprot(val);
}
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index ed8f071..d1761df 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
-#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
+#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#endif /* CONFIG_SMP */
extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_map[NR_CPUS];
static inline const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_core_map[cpu];
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 6fd4436..ec9c04d 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry {
};
extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
__sun4v_2insn_patch_end;
+extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
+ __sun_m7_2insn_patch_end;
#endif /* !(__ASSEMBLY__) */
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 07cc49e5..0f67942 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
struct sun4v_1insn_patch_entry *);
void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
struct sun4v_2insn_patch_entry *);
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+ struct sun4v_2insn_patch_entry *);
extern unsigned int dcache_parity_tl1_occurred;
extern unsigned int icache_parity_tl1_occurred;
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index 94e392b..814fb17 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev)
err = -ENOMEM;
goto err1;
}
- memset(grpci2priv, 0, sizeof(*grpci2priv));
priv->regs = regs;
priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 26c80e1..6f80936 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
}
}
-static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
+static void find_back_node_value(struct mdesc_handle *hp, u64 node,
+ char *srch_val,
+ void (*func)(struct mdesc_handle *, u64, int),
+ u64 val, int depth)
{
- u64 a;
-
- mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
- u64 t = mdesc_arc_target(hp, a);
- const char *name;
- const u64 *id;
+ u64 arc;
- name = mdesc_node_name(hp, t);
- if (!strcmp(name, "cpu")) {
- id = mdesc_get_property(hp, t, "id", NULL);
- if (*id < NR_CPUS)
- cpu_data(*id).core_id = core_id;
- } else {
- u64 j;
+ /* Since we have an estimate of recursion depth, do a sanity check. */
+ if (depth == 0)
+ return;
- mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
- u64 n = mdesc_arc_target(hp, j);
- const char *n_name;
+ mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 n = mdesc_arc_target(hp, arc);
+ const char *name = mdesc_node_name(hp, n);
- n_name = mdesc_node_name(hp, n);
- if (strcmp(n_name, "cpu"))
- continue;
+ if (!strcmp(srch_val, name))
+ (*func)(hp, n, val);
- id = mdesc_get_property(hp, n, "id", NULL);
- if (*id < NR_CPUS)
- cpu_data(*id).core_id = core_id;
- }
- }
+ find_back_node_value(hp, n, srch_val, func, val, depth-1);
}
}
+static void __mark_core_id(struct mdesc_handle *hp, u64 node,
+ int core_id)
+{
+ const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+ if (*id < num_possible_cpus())
+ cpu_data(*id).core_id = core_id;
+}
+
+static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
+ int sock_id)
+{
+ const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+ if (*id < num_possible_cpus())
+ cpu_data(*id).sock_id = sock_id;
+}
+
+static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
+ int core_id)
+{
+ find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
+}
+
+static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
+ int sock_id)
+{
+ find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+}
+
static void set_core_ids(struct mdesc_handle *hp)
{
int idx;
u64 mp;
idx = 1;
+
+ /* Identify unique cores by looking for cpus backpointed to by
+ * level 1 instruction caches.
+ */
mdesc_for_each_node_by_name(hp, mp, "cache") {
const u64 *level;
const char *type;
@@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp)
continue;
mark_core_ids(hp, mp, idx);
+ idx++;
+ }
+}
+
+static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+{
+ u64 mp;
+ int idx = 1;
+ int fnd = 0;
+
+ /* Identify unique sockets by looking for cpus backpointed to by
+ * shared level n caches.
+ */
+ mdesc_for_each_node_by_name(hp, mp, "cache") {
+ const u64 *cur_lvl;
+
+ cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
+ if (*cur_lvl != level)
+ continue;
+
+ mark_sock_ids(hp, mp, idx);
+ idx++;
+ fnd = 1;
+ }
+ return fnd;
+}
+
+static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
+{
+ int idx = 1;
+ mdesc_for_each_node_by_name(hp, mp, "socket") {
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (strcmp(name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < num_possible_cpus())
+ cpu_data(*id).sock_id = idx;
+ }
idx++;
}
}
+static void set_sock_ids(struct mdesc_handle *hp)
+{
+ u64 mp;
+
+ /* If machine description exposes sockets data use it.
+ * Otherwise fallback to use shared L3 or L2 caches.
+ */
+ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
+ if (mp != MDESC_NODE_NULL)
+ return set_sock_ids_by_socket(hp, mp);
+
+ if (!set_sock_ids_by_cache(hp, 3))
+ set_sock_ids_by_cache(hp, 2);
+}
+
static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
{
u64 a;
@@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
continue;
mark_proc_ids(hp, mp, idx);
-
idx++;
}
}
@@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask)
set_core_ids(hp);
set_proc_ids(hp);
+ set_sock_ids(hp);
mdesc_release(hp);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 6f7251f..c928bc6 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1002,6 +1002,38 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
#ifdef CONFIG_SYSFS
+
+#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */
+
+static void pcie_bus_slot_names(struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ char name[SLOT_NAME_SIZE];
+ struct pci_slot *pci_slot;
+ const u32 *slot_num;
+ int len;
+
+ slot_num = of_get_property(pdev->dev.of_node,
+ "physical-slot#", &len);
+
+ if (slot_num == NULL || len != 4)
+ continue;
+
+ snprintf(name, sizeof(name), "%u", slot_num[0]);
+ pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
+
+ if (IS_ERR(pci_slot))
+ pr_err("PCI: pci_create_slot returned %ld.\n",
+ PTR_ERR(pci_slot));
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pcie_bus_slot_names(bus);
+}
+
static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
{
const struct pci_slot_names {
@@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void)
while ((pbus = pci_find_next_bus(pbus)) != NULL) {
struct device_node *node;
+ struct pci_dev *pdev;
+
+ pdev = list_first_entry(&pbus->devices, struct pci_dev,
+ bus_list);
- if (pbus->self) {
- /* PCI->PCI bridge */
- node = pbus->self->dev.of_node;
+ if (pdev && pci_is_pcie(pdev)) {
+ pcie_bus_slot_names(pbus);
} else {
- struct pci_pbm_info *pbm = pbus->sysdata;
- /* Host PCI controller */
- node = pbm->op->dev.of_node;
- }
+ if (pbus->self) {
+
+ /* PCI->PCI bridge */
+ node = pbus->self->dev.of_node;
+
+ } else {
+ struct pci_pbm_info *pbm = pbus->sysdata;
- pci_bus_slot_names(node, pbus);
+ /* Host PCI controller */
+ node = pbm->op->dev.of_node;
+ }
+
+ pci_bus_slot_names(node, pbus);
+ }
}
return 0;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c38d19f..f7b2617 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
}
}
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+ struct sun4v_2insn_patch_entry *end)
+{
+ while (start < end) {
+ unsigned long addr = start->addr;
+
+ *(unsigned int *) (addr + 0) = start->insns[0];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 0));
+
+ *(unsigned int *) (addr + 4) = start->insns[1];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 4));
+
+ start++;
+ }
+}
+
static void __init sun4v_patch(void)
{
extern void sun4v_hvapi_init(void);
@@ -267,6 +285,9 @@ static void __init sun4v_patch(void)
sun4v_patch_2insn_range(&__sun4v_2insn_patch,
&__sun4v_2insn_patch_end);
+ if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7)
+ sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
+ &__sun_m7_2insn_patch_end);
sun4v_hvapi_init();
}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 61139d9..19cd08d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
static cpumask_t smp_commenced_mask;
@@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void)
}
}
+ for_each_present_cpu(i) {
+ unsigned int j;
+
+ for_each_present_cpu(j) {
+ if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+ cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+ }
+ }
+
for_each_present_cpu(i) {
unsigned int j;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 0924305..f1a2f68 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -138,6 +138,11 @@ SECTIONS
*(.pause_3insn_patch)
__pause_3insn_patch_end = .;
}
+ .sun_m7_2insn_patch : {
+ __sun_m7_2insn_patch = .;
+ *(.sun_m7_2insn_patch)
+ __sun_m7_2insn_patch_end = .;
+ }
PERCPU_SECTION(SMP_CACHE_BYTES)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ca0d6b..559cb744 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -54,6 +54,7 @@
#include "init_64.h"
unsigned long kern_linear_pte_xor[4] __read_mostly;
+static unsigned long page_cache4v_flag;
/* A bitmap, two bits for every 256MB of physical memory. These two
* bits determine what page size we use for kernel linear
@@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void)
static void __init sun4v_linear_pte_xor_finalize(void)
{
+ unsigned long pagecv_flag;
+
+ /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
+ * enables MCD error. Do not set bit 9 on M7 processor.
+ */
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_SPARC_M7:
+ pagecv_flag = 0x00;
+ break;
+ default:
+ pagecv_flag = _PAGE_CV_4V;
+ break;
+ }
#ifndef CONFIG_DEBUG_PAGEALLOC
if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
@@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
@@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
@@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void)
return available;
}
+#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
/* We need to exclude reserved regions. This exclusion will include
* vmlinux and initrd. To be more precise the initrd size could be used to
* compute a new lower limit because it is freed later during initialization.
@@ -2034,6 +2055,25 @@ void __init paging_init(void)
memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
#endif
+ /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
+ * bit on M7 processor. This is a conflicting usage of the same
+ * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
+ * Detection error on all pages and this will lead to problems
+ * later. Kernel does not run with MCD enabled and hence rest
+ * of the required steps to fully configure memory corruption
+ * detection are not taken. We need to ensure TTE.mcde is not
+ * set on M7 processor. Compute the value of cacheability
+ * flag for use later taking this into consideration.
+ */
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_SPARC_M7:
+ page_cache4v_flag = _PAGE_CP_4V;
+ break;
+ default:
+ page_cache4v_flag = _PAGE_CACHE_4V;
+ break;
+ }
+
if (tlb_type == hypervisor)
sun4v_pgprot_init();
else
@@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
-#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
-#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
-#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
-#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
-#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
-#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
-
pgprot_t PAGE_KERNEL __read_mostly;
EXPORT_SYMBOL(PAGE_KERNEL);
@@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
_PAGE_P_4U | _PAGE_W_4U);
if (tlb_type == hypervisor)
pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
- _PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
pte_base |= _PAGE_PMD_HUGE;
@@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void)
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
- _PAGE_CACHE_4V | _PAGE_P_4V |
+ page_cache4v_flag | _PAGE_P_4V |
__ACCESS_BITS_4V | __DIRTY_BITS_4V |
_PAGE_EXEC_4V);
PAGE_KERNEL_LOCKED = PAGE_KERNEL;
_PAGE_IE = _PAGE_IE_4V;
_PAGE_E = _PAGE_E_4V;
- _PAGE_CACHE = _PAGE_CACHE_4V;
+ _PAGE_CACHE = page_cache4v_flag;
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
@@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void)
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
PAGE_OFFSET;
#endif
- kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
+ _PAGE_W_4V);
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
@@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void)
_PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
_PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
- page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
- page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
+ page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
- page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_EXEC_4V);
- page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_EXEC_4V);
page_exec_bit = _PAGE_EXEC_4V;
@@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr)
_PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
if (tlb_type == hypervisor)
val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
- _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
+ page_cache4v_flag | _PAGE_P_4V |
_PAGE_EXEC_4V | _PAGE_W_4V);
return val | paddr;
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5f..0496970 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index 6ef4eca..dc61de1 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -54,7 +54,7 @@ extern void iounmap(volatile void __iomem *addr);
#define ioremap_nocache(physaddr, size) ioremap(physaddr, size)
#define ioremap_wc(physaddr, size) ioremap(physaddr, size)
-#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size)
+#define ioremap_wt(physaddr, size) ioremap(physaddr, size)
#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
#define mmiowb()
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 3942f74..1538562 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,3 +1,6 @@
+
+obj-y += entry/
+
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
@@ -11,7 +14,7 @@ obj-y += kernel/
obj-y += mm/
obj-y += crypto/
-obj-y += vdso/
+
obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 066d9bd..7e39f9b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -9,140 +9,141 @@ config 64BIT
config X86_32
def_bool y
depends on !64BIT
- select CLKSRC_I8253
- select HAVE_UID16
config X86_64
def_bool y
depends on 64BIT
- select X86_DEV_DMA_OPS
- select ARCH_USE_CMPXCHG_LOCKREF
- select HAVE_LIVEPATCH
### Arch settings
config X86
def_bool y
- select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
- select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
+ select ACPI_LEGACY_TABLES_LOOKUP if ACPI
+ select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
+ select ANON_INODES
+ select ARCH_CLOCKSOURCE_DATA
+ select ARCH_DISCARD_MEMBLOCK
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_SG_CHAIN
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
- select HAVE_AOUT if X86_32
- select HAVE_UNSTABLE_SCHED_CLOCK
- select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
- select ARCH_SUPPORTS_INT128 if X86_64
- select HAVE_IDE
- select HAVE_OPROFILE
- select HAVE_PCSPKR_PLATFORM
- select HAVE_PERF_EVENTS
- select HAVE_IOREMAP_PROT
- select HAVE_KPROBES
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select ARCH_DISCARD_MEMBLOCK
- select ARCH_WANT_OPTIONAL_GPIOLIB
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_INT128 if X86_64
+ select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF if X86_64
+ select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_FRAME_POINTERS
- select HAVE_DMA_ATTRS
- select HAVE_DMA_CONTIGUOUS
- select HAVE_KRETPROBES
+ select ARCH_WANT_IPC_PARSE_VERSION if X86_32
+ select ARCH_WANT_OPTIONAL_GPIOLIB
+ select BUILDTIME_EXTABLE_SORT
+ select CLKEVT_I8253
+ select CLKSRC_I8253 if X86_32
+ select CLOCKSOURCE_VALIDATE_LAST_CYCLE
+ select CLOCKSOURCE_WATCHDOG
+ select CLONE_BACKWARDS if X86_32
+ select COMPAT_OLD_SIGACTION if IA32_EMULATION
+ select DCACHE_WORD_ACCESS
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
+ select GENERIC_CLOCKEVENTS_MIN_ADJUST
+ select GENERIC_CMOS_UPDATE
+ select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
- select HAVE_OPTPROBES
- select HAVE_KPROBES_ON_FTRACE
- select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_FENTRY if X86_64
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_IOMAP
+ select GENERIC_IRQ_PROBE
+ select GENERIC_IRQ_SHOW
+ select GENERIC_PENDING_IRQ if SMP
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
+ select GENERIC_TIME_VSYSCALL
+ select HAVE_ACPI_APEI if ACPI
+ select HAVE_ACPI_APEI_NMI if ACPI
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_AOUT if X86_32
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
+ select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
+ select HAVE_ARCH_KGDB
+ select HAVE_ARCH_KMEMCHECK
+ select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SOFT_DIRTY if X86_64
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_BPF_JIT if X86_64
+ select HAVE_CC_STACKPROTECTOR
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_CMPXCHG_LOCAL
+ select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_C_RECORDMCOUNT
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DEBUG_STACKOVERFLOW
+ select HAVE_DMA_API_DEBUG
+ select HAVE_DMA_ATTRS
+ select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select HAVE_FUNCTION_TRACER
- select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_GRAPH_FP_TEST
- select HAVE_SYSCALL_TRACEPOINTS
- select SYSCTL_EXCEPTION_TRACE
- select HAVE_KVM
- select HAVE_ARCH_KGDB
- select HAVE_ARCH_TRACEHOOK
- select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select USER_STACKTRACE_SUPPORT
- select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_DMA_API_DEBUG
- select HAVE_KERNEL_GZIP
+ select HAVE_FENTRY if X86_64
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_GRAPH_FP_TEST
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER
+ select HAVE_GENERIC_DMA_COHERENT if X86_32
+ select HAVE_HW_BREAKPOINT
+ select HAVE_IDE
+ select HAVE_IOREMAP_PROT
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
+ select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
- select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
- select HAVE_KERNEL_LZ4
- select HAVE_HW_BREAKPOINT
+ select HAVE_KERNEL_XZ
+ select HAVE_KPROBES
+ select HAVE_KPROBES_ON_FTRACE
+ select HAVE_KRETPROBES
+ select HAVE_KVM
+ select HAVE_LIVEPATCH if X86_64
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MIXED_BREAKPOINTS_REGS
- select PERF_EVENTS
+ select HAVE_OPROFILE
+ select HAVE_OPTPROBES
+ select HAVE_PCSPKR_PLATFORM
+ select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_DEBUG_KMEMLEAK
- select ANON_INODES
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
- select HAVE_CMPXCHG_LOCAL
- select HAVE_CMPXCHG_DOUBLE
- select HAVE_ARCH_KMEMCHECK
- select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_UID16 if X86_32
+ select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
- select ARCH_HAS_ELF_RANDOMIZE
- select HAVE_ARCH_JUMP_LABEL
- select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
- select SPARSE_IRQ
- select GENERIC_FIND_FIRST_BIT
- select GENERIC_IRQ_PROBE
- select GENERIC_PENDING_IRQ if SMP
- select GENERIC_IRQ_SHOW
- select GENERIC_CLOCKEVENTS_MIN_ADJUST
select IRQ_FORCED_THREADING
- select HAVE_BPF_JIT if X86_64
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE)
- select ARCH_HAS_SG_CHAIN
- select CLKEVT_I8253
- select ARCH_HAVE_NMI_SAFE_CMPXCHG
- select GENERIC_IOMAP
- select DCACHE_WORD_ACCESS
- select GENERIC_SMP_IDLE_THREAD
- select ARCH_WANT_IPC_PARSE_VERSION if X86_32
- select HAVE_ARCH_SECCOMP_FILTER
- select BUILDTIME_EXTABLE_SORT
- select GENERIC_CMOS_UPDATE
- select HAVE_ARCH_SOFT_DIRTY if X86_64
- select CLOCKSOURCE_WATCHDOG
- select GENERIC_CLOCKEVENTS
- select ARCH_CLOCKSOURCE_DATA
- select CLOCKSOURCE_VALIDATE_LAST_CYCLE
- select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
- select GENERIC_TIME_VSYSCALL
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
- select HAVE_CONTEXT_TRACKING if X86_64
- select HAVE_IRQ_TIME_ACCOUNTING
- select VIRT_TO_BUS
- select MODULES_USE_ELF_REL if X86_32
- select MODULES_USE_ELF_RELA if X86_64
- select CLONE_BACKWARDS if X86_32
- select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_QUEUE_RWLOCK
- select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
- select OLD_SIGACTION if X86_32
- select COMPAT_OLD_SIGACTION if IA32_EMULATION
+ select MODULES_USE_ELF_RELA if X86_64
+ select MODULES_USE_ELF_REL if X86_32
+ select OLD_SIGACTION if X86_32
+ select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
+ select PERF_EVENTS
select RTC_LIB
- select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
- select HAVE_CC_STACKPROTECTOR
- select GENERIC_CPU_AUTOPROBE
- select HAVE_ARCH_AUDITSYSCALL
- select ARCH_SUPPORTS_ATOMIC_RMW
- select HAVE_ACPI_APEI if ACPI
- select HAVE_ACPI_APEI_NMI if ACPI
- select ACPI_LEGACY_TABLES_LOOKUP if ACPI
- select X86_FEATURE_NAMES if PROC_FS
+ select SPARSE_IRQ
select SRCU
+ select SYSCTL_EXCEPTION_TRACE
+ select USER_STACKTRACE_SUPPORT
+ select VIRT_TO_BUS
+ select X86_DEV_DMA_OPS if X86_64
+ select X86_FEATURE_NAMES if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -260,10 +261,6 @@ config X86_64_SMP
def_bool y
depends on X86_64 && SMP
-config X86_HT
- def_bool y
- depends on SMP
-
config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
@@ -341,7 +338,7 @@ config X86_FEATURE_NAMES
config X86_X2APIC
bool "Support x2apic"
- depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
+ depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
---help---
This enables x2apic support on CPUs that have this feature.
@@ -467,7 +464,6 @@ config X86_INTEL_CE
select X86_REBOOTFIXUPS
select OF
select OF_EARLY_FLATTREE
- select IRQ_DOMAIN
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -667,7 +663,7 @@ config PARAVIRT_DEBUG
config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP
- select UNINLINE_SPIN_UNLOCK
+ select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
---help---
Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly
@@ -852,11 +848,12 @@ config NR_CPUS
default "1" if !SMP
default "8192" if MAXSMP
default "32" if SMP && X86_BIGSMP
- default "8" if SMP
+ default "8" if SMP && X86_32
+ default "64" if SMP
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
- supported value is 4096, otherwise the maximum value is 512. The
+ supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
@@ -864,7 +861,7 @@ config NR_CPUS
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
- depends on X86_HT
+ depends on SMP
---help---
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -874,7 +871,7 @@ config SCHED_SMT
config SCHED_MC
def_bool y
prompt "Multi-core scheduler support"
- depends on X86_HT
+ depends on SMP
---help---
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
@@ -915,12 +912,12 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
- select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+ select IRQ_DOMAIN_HIERARCHY
+ select PCI_MSI_IRQ_DOMAIN if PCI_MSI
config X86_IO_APIC
def_bool y
depends on X86_LOCAL_APIC || X86_UP_IOAPIC
- select IRQ_DOMAIN
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
bool "Reroute for broken boot IRQs"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2fda005..118e6de 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -77,6 +77,12 @@ else
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ # Align jump targets to 1 byte, not the default 16 bytes:
+ KBUILD_CFLAGS += -falign-jumps=1
+
+ # Pack loops tightly as well:
+ KBUILD_CFLAGS += -falign-loops=1
+
# Don't autogenerate traditional x87 instructions
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
@@ -84,6 +90,9 @@ else
# Use -mpreferred-stack-boundary=3 if supported.
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+ # Use -mskip-rax-setup if supported.
+ KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
+
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
@@ -140,12 +149,6 @@ endif
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp
-# do binutils support CFI?
-cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
-# is .cfi_signal_frame supported too?
-cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
-cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
-
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
@@ -153,8 +156,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
@@ -178,7 +181,7 @@ archscripts: scripts_basic
# Syscall table generation
archheaders:
- $(Q)$(MAKE) $(build)=arch/x86/syscalls all
+ $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
archprepare:
ifeq ($(CONFIG_KEXEC_FILE),y)
@@ -241,7 +244,7 @@ install:
PHONY += vdso_install
vdso_install:
- $(Q)$(MAKE) $(build)=arch/x86/vdso $@
+ $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
archclean:
$(Q)rm -rf $(objtree)/arch/i386
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ef17683..48304b8 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
if (!cmdline_ptr)
goto fail;
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+ /* Fill in upper bits of command line address, NOP on 32 bit */
+ boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
hdr->ramdisk_image = 0;
hdr->ramdisk_size = 0;
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
new file mode 100644
index 0000000..7a14497
--- /dev/null
+++ b/arch/x86/entry/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the x86 low level entry code
+#
+obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+
+obj-y += vdso/
+obj-y += vsyscall/
+
+obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
+
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/entry/calling.h
index 1c8b50e..f4e6308 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/entry/calling.h
@@ -46,8 +46,6 @@ For 32-bit we have the following conventions - kernel is built with
*/
-#include <asm/dwarf2.h>
-
#ifdef CONFIG_X86_64
/*
@@ -91,28 +89,27 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
- subq $15*8+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+ addq $-(15*8+\addskip), %rsp
.endm
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
.if \r11
- movq_cfi r11, 6*8+\offset
+ movq %r11, 6*8+\offset(%rsp)
.endif
.if \r8910
- movq_cfi r10, 7*8+\offset
- movq_cfi r9, 8*8+\offset
- movq_cfi r8, 9*8+\offset
+ movq %r10, 7*8+\offset(%rsp)
+ movq %r9, 8*8+\offset(%rsp)
+ movq %r8, 9*8+\offset(%rsp)
.endif
.if \rax
- movq_cfi rax, 10*8+\offset
+ movq %rax, 10*8+\offset(%rsp)
.endif
.if \rcx
- movq_cfi rcx, 11*8+\offset
+ movq %rcx, 11*8+\offset(%rsp)
.endif
- movq_cfi rdx, 12*8+\offset
- movq_cfi rsi, 13*8+\offset
- movq_cfi rdi, 14*8+\offset
+ movq %rdx, 12*8+\offset(%rsp)
+ movq %rsi, 13*8+\offset(%rsp)
+ movq %rdi, 14*8+\offset(%rsp)
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
@@ -131,24 +128,24 @@ For 32-bit we have the following conventions - kernel is built with
.endm
.macro SAVE_EXTRA_REGS offset=0
- movq_cfi r15, 0*8+\offset
- movq_cfi r14, 1*8+\offset
- movq_cfi r13, 2*8+\offset
- movq_cfi r12, 3*8+\offset
- movq_cfi rbp, 4*8+\offset
- movq_cfi rbx, 5*8+\offset
+ movq %r15, 0*8+\offset(%rsp)
+ movq %r14, 1*8+\offset(%rsp)
+ movq %r13, 2*8+\offset(%rsp)
+ movq %r12, 3*8+\offset(%rsp)
+ movq %rbp, 4*8+\offset(%rsp)
+ movq %rbx, 5*8+\offset(%rsp)
.endm
.macro SAVE_EXTRA_REGS_RBP offset=0
- movq_cfi rbp, 4*8+\offset
+ movq %rbp, 4*8+\offset(%rsp)
.endm
.macro RESTORE_EXTRA_REGS offset=0
- movq_cfi_restore 0*8+\offset, r15
- movq_cfi_restore 1*8+\offset, r14
- movq_cfi_restore 2*8+\offset, r13
- movq_cfi_restore 3*8+\offset, r12
- movq_cfi_restore 4*8+\offset, rbp
- movq_cfi_restore 5*8+\offset, rbx
+ movq 0*8+\offset(%rsp), %r15
+ movq 1*8+\offset(%rsp), %r14
+ movq 2*8+\offset(%rsp), %r13
+ movq 3*8+\offset(%rsp), %r12
+ movq 4*8+\offset(%rsp), %rbp
+ movq 5*8+\offset(%rsp), %rbx
.endm
.macro ZERO_EXTRA_REGS
@@ -162,24 +159,24 @@ For 32-bit we have the following conventions - kernel is built with
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
- movq_cfi_restore 6*8, r11
+ movq 6*8(%rsp), %r11
.endif
.if \rstor_r8910
- movq_cfi_restore 7*8, r10
- movq_cfi_restore 8*8, r9
- movq_cfi_restore 9*8, r8
+ movq 7*8(%rsp), %r10
+ movq 8*8(%rsp), %r9
+ movq 9*8(%rsp), %r8
.endif
.if \rstor_rax
- movq_cfi_restore 10*8, rax
+ movq 10*8(%rsp), %rax
.endif
.if \rstor_rcx
- movq_cfi_restore 11*8, rcx
+ movq 11*8(%rsp), %rcx
.endif
.if \rstor_rdx
- movq_cfi_restore 12*8, rdx
+ movq 12*8(%rsp), %rdx
.endif
- movq_cfi_restore 13*8, rsi
- movq_cfi_restore 14*8, rdi
+ movq 13*8(%rsp), %rsi
+ movq 14*8(%rsp), %rdi
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
@@ -204,8 +201,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
- addq $15*8+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
+ subq $-(15*8+\addskip), %rsp
.endm
.macro icebp
@@ -224,23 +220,23 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro SAVE_ALL
- pushl_cfi_reg eax
- pushl_cfi_reg ebp
- pushl_cfi_reg edi
- pushl_cfi_reg esi
- pushl_cfi_reg edx
- pushl_cfi_reg ecx
- pushl_cfi_reg ebx
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
.endm
.macro RESTORE_ALL
- popl_cfi_reg ebx
- popl_cfi_reg ecx
- popl_cfi_reg edx
- popl_cfi_reg esi
- popl_cfi_reg edi
- popl_cfi_reg ebp
- popl_cfi_reg eax
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
.endm
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
new file mode 100644
index 0000000..21dc60a
--- /dev/null
+++ b/arch/x86/entry/entry_32.S
@@ -0,0 +1,1248 @@
+/*
+ * Copyright (C) 1991,1992 Linus Torvalds
+ *
+ * entry_32.S contains the system-call and low-level fault and trap handling routines.
+ *
+ * Stack layout in 'syscall_exit':
+ * ptrace needs to have all registers on the stack.
+ * If the order here is changed, it needs to be
+ * updated in fork.c:copy_process(), signal.c:do_signal(),
+ * ptrace.c and ptrace.h
+ *
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - %fs
+ * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
+ */
+
+#include <linux/linkage.h>
+#include <linux/err.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
+#include <asm/ftrace.h>
+#include <asm/irq_vectors.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/smap.h>
+
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
+#ifndef CONFIG_AUDITSYSCALL
+# define sysenter_audit syscall_trace_entry
+# define sysexit_audit syscall_exit_work
+#endif
+
+ .section .entry.text, "ax"
+
+/*
+ * We use macros for low-level operations which need to be overridden
+ * for paravirtualization. The following will never clobber any registers:
+ * INTERRUPT_RETURN (aka. "iret")
+ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
+ * Allowing a register to be clobbered can shrink the paravirt replacement
+ * enough to patch inline, increasing performance.
+ */
+
+#ifdef CONFIG_PREEMPT
+# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
+#else
+# define preempt_stop(clobbers)
+# define resume_kernel restore_all
+#endif
+
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
+ jz 1f
+ TRACE_IRQS_ON
+1:
+#endif
+.endm
+
+/*
+ * User gs save/restore
+ *
+ * %gs is used for userland TLS and kernel only uses it for stack
+ * canary which is required to be at %gs:20 by gcc. Read the comment
+ * at the top of stackprotector.h for more info.
+ *
+ * Local labels 98 and 99 are used.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+
+ /* unfortunately push/pop can't be no-op */
+.macro PUSH_GS
+ pushl $0
+.endm
+.macro POP_GS pop=0
+ addl $(4 + \pop), %esp
+.endm
+.macro POP_GS_EX
+.endm
+
+ /* all the rest are no-op */
+.macro PTGS_TO_GS
+.endm
+.macro PTGS_TO_GS_EX
+.endm
+.macro GS_TO_REG reg
+.endm
+.macro REG_TO_PTGS reg
+.endm
+.macro SET_KERNEL_GS reg
+.endm
+
+#else /* CONFIG_X86_32_LAZY_GS */
+
+.macro PUSH_GS
+ pushl %gs
+.endm
+
+.macro POP_GS pop=0
+98: popl %gs
+ .if \pop <> 0
+ add $\pop, %esp
+ .endif
+.endm
+.macro POP_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, (%esp)
+ jmp 98b
+.popsection
+ _ASM_EXTABLE(98b, 99b)
+.endm
+
+.macro PTGS_TO_GS
+98: mov PT_GS(%esp), %gs
+.endm
+.macro PTGS_TO_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, PT_GS(%esp)
+ jmp 98b
+.popsection
+ _ASM_EXTABLE(98b, 99b)
+.endm
+
+.macro GS_TO_REG reg
+ movl %gs, \reg
+.endm
+.macro REG_TO_PTGS reg
+ movl \reg, PT_GS(%esp)
+.endm
+.macro SET_KERNEL_GS reg
+ movl $(__KERNEL_STACK_CANARY), \reg
+ movl \reg, %gs
+.endm
+
+#endif /* CONFIG_X86_32_LAZY_GS */
+
+.macro SAVE_ALL
+ cld
+ PUSH_GS
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ movl $(__USER_DS), %edx
+ movl %edx, %ds
+ movl %edx, %es
+ movl $(__KERNEL_PERCPU), %edx
+ movl %edx, %fs
+ SET_KERNEL_GS %edx
+.endm
+
+.macro RESTORE_INT_REGS
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+.endm
+
+.macro RESTORE_REGS pop=0
+ RESTORE_INT_REGS
+1: popl %ds
+2: popl %es
+3: popl %fs
+ POP_GS \pop
+.pushsection .fixup, "ax"
+4: movl $0, (%esp)
+ jmp 1b
+5: movl $0, (%esp)
+ jmp 2b
+6: movl $0, (%esp)
+ jmp 3b
+.popsection
+ _ASM_EXTABLE(1b, 4b)
+ _ASM_EXTABLE(2b, 5b)
+ _ASM_EXTABLE(3b, 6b)
+ POP_GS_EX
+.endm
+
+ENTRY(ret_from_fork)
+ pushl %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl %eax
+ pushl $0x0202 # Reset kernel eflags
+ popfl
+ jmp syscall_exit
+END(ret_from_fork)
+
+ENTRY(ret_from_kernel_thread)
+ pushl %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl %eax
+ pushl $0x0202 # Reset kernel eflags
+ popfl
+ movl PT_EBP(%esp), %eax
+ call *PT_EBX(%esp)
+ movl $0, PT_EAX(%esp)
+ jmp syscall_exit
+ENDPROC(ret_from_kernel_thread)
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+ # userspace resumption stub bypassing syscall exit tracing
+ ALIGN
+ret_from_exception:
+ preempt_stop(CLBR_ANY)
+ret_from_intr:
+ GET_THREAD_INFO(%ebp)
+#ifdef CONFIG_VM86
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+#else
+ /*
+ * We can be coming here from child spawned by kernel_thread().
+ */
+ movl PT_CS(%esp), %eax
+ andl $SEGMENT_RPL_MASK, %eax
+#endif
+ cmpl $USER_RPL, %eax
+ jb resume_kernel # not returning to v8086 or userspace
+
+ENTRY(resume_userspace)
+ LOCKDEP_SYS_EXIT
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
+ # int/exception return?
+ jne work_pending
+ jmp restore_all
+END(ret_from_exception)
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+ DISABLE_INTERRUPTS(CLBR_ANY)
+need_resched:
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz restore_all
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz restore_all
+ call preempt_schedule_irq
+ jmp need_resched
+END(resume_kernel)
+#endif
+
+/*
+ * SYSENTER_RETURN points to after the SYSENTER instruction
+ * in the vsyscall page. See vsyscall-sysentry.S, which defines
+ * the symbol.
+ */
+
+ # SYSENTER call handler stub
+ENTRY(entry_SYSENTER_32)
+ movl TSS_sysenter_sp0(%esp), %esp
+sysenter_past_esp:
+ /*
+ * Interrupts are disabled here, but we can't trace it until
+ * enough kernel state to call TRACE_IRQS_OFF can be called - but
+ * we immediately enable interrupts at that point anyway.
+ */
+ pushl $__USER_DS
+ pushl %ebp
+ pushfl
+ orl $X86_EFLAGS_IF, (%esp)
+ pushl $__USER_CS
+ /*
+ * Push current_thread_info()->sysenter_return to the stack.
+ * A tiny bit of offset fixup is necessary: TI_sysenter_return
+ * is relative to thread_info, which is at the bottom of the
+ * kernel stack page. 4*4 means the 4 words pushed above;
+ * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+ * and THREAD_SIZE takes us to the bottom.
+ */
+ pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
+
+ pushl %eax
+ SAVE_ALL
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+ cmpl $__PAGE_OFFSET-3, %ebp
+ jae syscall_fault
+ ASM_STAC
+1: movl (%ebp), %ebp
+ ASM_CLAC
+ movl %ebp, PT_EBP(%esp)
+ _ASM_EXTABLE(1b, syscall_fault)
+
+ GET_THREAD_INFO(%ebp)
+
+ testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
+ jnz sysenter_audit
+sysenter_do_call:
+ cmpl $(NR_syscalls), %eax
+ jae sysenter_badsys
+ call *sys_call_table(, %eax, 4)
+sysenter_after_call:
+ movl %eax, PT_EAX(%esp)
+ LOCKDEP_SYS_EXIT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testl $_TIF_ALLWORK_MASK, %ecx
+ jnz sysexit_audit
+sysenter_exit:
+/* if something modifies registers it must also disable sysexit */
+ movl PT_EIP(%esp), %edx
+ movl PT_OLDESP(%esp), %ecx
+ xorl %ebp, %ebp
+ TRACE_IRQS_ON
+1: mov PT_FS(%esp), %fs
+ PTGS_TO_GS
+ ENABLE_INTERRUPTS_SYSEXIT
+
+#ifdef CONFIG_AUDITSYSCALL
+sysenter_audit:
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
+ jnz syscall_trace_entry
+ /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
+ movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
+ /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
+ pushl PT_ESI(%esp) /* a3: 5th arg */
+ pushl PT_EDX+4(%esp) /* a2: 4th arg */
+ call __audit_syscall_entry
+ popl %ecx /* get that remapped edx off the stack */
+ popl %ecx /* get that remapped esi off the stack */
+ movl PT_EAX(%esp), %eax /* reload syscall number */
+ jmp sysenter_do_call
+
+sysexit_audit:
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ jnz syscall_exit_work
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ movl %eax, %edx /* second arg, syscall return value */
+ cmpl $-MAX_ERRNO, %eax /* is it an error ? */
+ setbe %al /* 1 if so, 0 if not */
+ movzbl %al, %eax /* zero-extend that */
+ call __audit_syscall_exit
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ jnz syscall_exit_work
+ movl PT_EAX(%esp), %eax /* reload syscall return value */
+ jmp sysenter_exit
+#endif
+
+.pushsection .fixup, "ax"
+2: movl $0, PT_FS(%esp)
+ jmp 1b
+.popsection
+ _ASM_EXTABLE(1b, 2b)
+ PTGS_TO_GS_EX
+ENDPROC(entry_SYSENTER_32)
+
+ # system call handler stub
+ENTRY(entry_INT80_32)
+ ASM_CLAC
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+ # system call tracing in operation / emulation
+ testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
+ jnz syscall_trace_entry
+ cmpl $(NR_syscalls), %eax
+ jae syscall_badsys
+syscall_call:
+ call *sys_call_table(, %eax, 4)
+syscall_after_call:
+ movl %eax, PT_EAX(%esp) # store the return value
+syscall_exit:
+ LOCKDEP_SYS_EXIT
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testl $_TIF_ALLWORK_MASK, %ecx # current->work
+ jnz syscall_exit_work
+
+restore_all:
+ TRACE_IRQS_IRET
+restore_all_notrace:
+#ifdef CONFIG_X86_ESPFIX32
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
+ /*
+ * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
+ * are returning to the kernel.
+ * See comments in process.c:copy_thread() for details.
+ */
+ movb PT_OLDSS(%esp), %ah
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
+ cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
+ je ldt_ss # returning to user-space with LDT SS
+#endif
+restore_nocheck:
+ RESTORE_REGS 4 # skip orig_eax/error_code
+irq_return:
+ INTERRUPT_RETURN
+.section .fixup, "ax"
+ENTRY(iret_exc )
+ pushl $0 # no error code
+ pushl $do_iret_error
+ jmp error_code
+.previous
+ _ASM_EXTABLE(irq_return, iret_exc)
+
+#ifdef CONFIG_X86_ESPFIX32
+ldt_ss:
+#ifdef CONFIG_PARAVIRT
+ /*
+ * The kernel can't run on a non-flat stack if paravirt mode
+ * is active. Rather than try to fixup the high bits of
+ * ESP, bypass this code entirely. This may break DOSemu
+ * and/or Wine support in a paravirt VM, although the option
+ * is still available to implement the setting of the high
+ * 16-bits in the INTERRUPT_RETURN paravirt-op.
+ */
+ cmpl $0, pv_info+PARAVIRT_enabled
+ jne restore_nocheck
+#endif
+
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
+ mov %esp, %edx /* load kernel esp */
+ mov PT_OLDESP(%esp), %eax /* load userspace esp */
+ mov %dx, %ax /* eax: new kernel esp */
+ sub %eax, %edx /* offset (low word is 0) */
+ shr $16, %edx
+ mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
+ mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
+ pushl $__ESPFIX_SS
+ pushl %eax /* new kernel esp */
+ /*
+ * Disable interrupts, but do not irqtrace this section: we
+ * will soon execute iret and the tracer was already set to
+ * the irqstate after the IRET:
+ */
+ DISABLE_INTERRUPTS(CLBR_EAX)
+ lss (%esp), %esp /* switch to espfix segment */
+ jmp restore_nocheck
+#endif
+ENDPROC(entry_INT80_32)
+
+ # perform work that needs to be done immediately before resumption
+ ALIGN
+work_pending:
+ testb $_TIF_NEED_RESCHED, %cl
+ jz work_notifysig
+work_resched:
+ call schedule
+ LOCKDEP_SYS_EXIT
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
+ # than syscall tracing?
+ jz restore_all
+ testb $_TIF_NEED_RESCHED, %cl
+ jnz work_resched
+
+work_notifysig: # deal with pending signals and
+ # notify-resume requests
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
+ movl %esp, %eax
+ jnz work_notifysig_v86 # returning to kernel-space or
+ # vm86-space
+1:
+#else
+ movl %esp, %eax
+#endif
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
+ xorl %edx, %edx
+ call do_notify_resume
+ jmp resume_userspace
+
+#ifdef CONFIG_VM86
+ ALIGN
+work_notifysig_v86:
+ pushl %ecx # save ti_flags for do_notify_resume
+ call save_v86_state # %eax contains pt_regs pointer
+ popl %ecx
+ movl %eax, %esp
+ jmp 1b
+#endif
+END(work_pending)
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_trace_entry:
+ movl $-ENOSYS, PT_EAX(%esp)
+ movl %esp, %eax
+ call syscall_trace_enter
+ /* What it returned is what we'll actually use. */
+ cmpl $(NR_syscalls), %eax
+ jnae syscall_call
+ jmp syscall_exit
+END(syscall_trace_entry)
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_exit_work:
+ testl $_TIF_WORK_SYSCALL_EXIT, %ecx
+ jz work_pending
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
+ # schedule() instead
+ movl %esp, %eax
+ call syscall_trace_leave
+ jmp resume_userspace
+END(syscall_exit_work)
+
+syscall_fault:
+ ASM_CLAC
+ GET_THREAD_INFO(%ebp)
+ movl $-EFAULT, PT_EAX(%esp)
+ jmp resume_userspace
+END(syscall_fault)
+
+syscall_badsys:
+ movl $-ENOSYS, %eax
+ jmp syscall_after_call
+END(syscall_badsys)
+
+sysenter_badsys:
+ movl $-ENOSYS, %eax
+ jmp sysenter_after_call
+END(sysenter_badsys)
+
+.macro FIXUP_ESPFIX_STACK
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+#ifdef CONFIG_X86_ESPFIX32
+ /* fixup the stack */
+ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
+ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
+ shl $16, %eax
+ addl %esp, %eax /* the adjusted stack pointer */
+ pushl $__KERNEL_DS
+ pushl %eax
+ lss (%esp), %esp /* switch to the normal stack segment */
+#endif
+.endm
+.macro UNWIND_ESPFIX_STACK
+#ifdef CONFIG_X86_ESPFIX32
+ movl %ss, %eax
+ /* see if on espfix stack */
+ cmpw $__ESPFIX_SS, %ax
+ jne 27f
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ /* switch to normal stack */
+ FIXUP_ESPFIX_STACK
+27:
+#endif
+.endm
+
+/*
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
+ */
+ .align 8
+ENTRY(irq_entries_start)
+ vector=FIRST_EXTERNAL_VECTOR
+ .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ pushl $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_interrupt
+ .align 8
+ .endr
+END(irq_entries_start)
+
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
+ ASM_CLAC
+ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ call do_IRQ
+ jmp ret_from_intr
+ENDPROC(common_interrupt)
+
+#define BUILD_INTERRUPT3(name, nr, fn) \
+ENTRY(name) \
+ ASM_CLAC; \
+ pushl $~(nr); \
+ SAVE_ALL; \
+ TRACE_IRQS_OFF \
+ movl %esp, %eax; \
+ call fn; \
+ jmp ret_from_intr; \
+ENDPROC(name)
+
+
+#ifdef CONFIG_TRACING
+# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
+#else
+# define TRACE_BUILD_INTERRUPT(name, nr)
+#endif
+
+#define BUILD_INTERRUPT(name, nr) \
+ BUILD_INTERRUPT3(name, nr, smp_##name); \
+ TRACE_BUILD_INTERRUPT(name, nr)
+
+/* The include is where all of the SMP etc. interrupts come from */
+#include <asm/entry_arch.h>
+
+ENTRY(coprocessor_error)
+ ASM_CLAC
+ pushl $0
+ pushl $do_coprocessor_error
+ jmp error_code
+END(coprocessor_error)
+
+ENTRY(simd_coprocessor_error)
+ ASM_CLAC
+ pushl $0
+#ifdef CONFIG_X86_INVD_BUG
+ /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
+ ALTERNATIVE "pushl $do_general_protection", \
+ "pushl $do_simd_coprocessor_error", \
+ X86_FEATURE_XMM
+#else
+ pushl $do_simd_coprocessor_error
+#endif
+ jmp error_code
+END(simd_coprocessor_error)
+
+ENTRY(device_not_available)
+ ASM_CLAC
+ pushl $-1 # mark this as an int
+ pushl $do_device_not_available
+ jmp error_code
+END(device_not_available)
+
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_iret)
+ iret
+ _ASM_EXTABLE(native_iret, iret_exc)
+END(native_iret)
+
+ENTRY(native_irq_enable_sysexit)
+ sti
+ sysexit
+END(native_irq_enable_sysexit)
+#endif
+
+ENTRY(overflow)
+ ASM_CLAC
+ pushl $0
+ pushl $do_overflow
+ jmp error_code
+END(overflow)
+
+ENTRY(bounds)
+ ASM_CLAC
+ pushl $0
+ pushl $do_bounds
+ jmp error_code
+END(bounds)
+
+ENTRY(invalid_op)
+ ASM_CLAC
+ pushl $0
+ pushl $do_invalid_op
+ jmp error_code
+END(invalid_op)
+
+ENTRY(coprocessor_segment_overrun)
+ ASM_CLAC
+ pushl $0
+ pushl $do_coprocessor_segment_overrun
+ jmp error_code
+END(coprocessor_segment_overrun)
+
+ENTRY(invalid_TSS)
+ ASM_CLAC
+ pushl $do_invalid_TSS
+ jmp error_code
+END(invalid_TSS)
+
+ENTRY(segment_not_present)
+ ASM_CLAC
+ pushl $do_segment_not_present
+ jmp error_code
+END(segment_not_present)
+
+ENTRY(stack_segment)
+ ASM_CLAC
+ pushl $do_stack_segment
+ jmp error_code
+END(stack_segment)
+
+ENTRY(alignment_check)
+ ASM_CLAC
+ pushl $do_alignment_check
+ jmp error_code
+END(alignment_check)
+
+ENTRY(divide_error)
+ ASM_CLAC
+ pushl $0 # no error code
+ pushl $do_divide_error
+ jmp error_code
+END(divide_error)
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+ ASM_CLAC
+ pushl $0
+ pushl machine_check_vector
+ jmp error_code
+END(machine_check)
+#endif
+
+ENTRY(spurious_interrupt_bug)
+ ASM_CLAC
+ pushl $0
+ pushl $do_spurious_interrupt_bug
+ jmp error_code
+END(spurious_interrupt_bug)
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+ addl $5*4, %esp /* remove xen-provided frame */
+ jmp sysenter_past_esp
+
+ENTRY(xen_hypervisor_callback)
+ pushl $-1 /* orig_ax = -1 => not a system call */
+ SAVE_ALL
+ TRACE_IRQS_OFF
+
+ /*
+ * Check to see if we got the event in the critical
+ * region in xen_iret_direct, after we've reenabled
+ * events and checked for pending events. This simulates
+ * iret instruction's behaviour where it delivers a
+ * pending interrupt when enabling interrupts:
+ */
+ movl PT_EIP(%esp), %eax
+ cmpl $xen_iret_start_crit, %eax
+ jb 1f
+ cmpl $xen_iret_end_crit, %eax
+ jae 1f
+
+ jmp xen_iret_crit_fixup
+
+ENTRY(xen_do_upcall)
+1: mov %esp, %eax
+ call xen_evtchn_do_upcall
+#ifndef CONFIG_PREEMPT
+ call xen_maybe_preempt_hcall
+#endif
+ jmp ret_from_intr
+ENDPROC(xen_hypervisor_callback)
+
+/*
+ * Hypervisor uses this for application faults while it executes.
+ * We get here for two reasons:
+ * 1. Fault while reloading DS, ES, FS or GS
+ * 2. Fault while executing IRET
+ * Category 1 we fix up by reattempting the load, and zeroing the segment
+ * register if the load fails.
+ * Category 2 we fix up by jumping to do_iret_error. We cannot use the
+ * normal Linux return path in this case because if we use the IRET hypercall
+ * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+ * We distinguish between categories by maintaining a status value in EAX.
+ */
+ENTRY(xen_failsafe_callback)
+ pushl %eax
+ movl $1, %eax
+1: mov 4(%esp), %ds
+2: mov 8(%esp), %es
+3: mov 12(%esp), %fs
+4: mov 16(%esp), %gs
+ /* EAX == 0 => Category 1 (Bad segment)
+ EAX != 0 => Category 2 (Bad IRET) */
+ testl %eax, %eax
+ popl %eax
+ lea 16(%esp), %esp
+ jz 5f
+ jmp iret_exc
+5: pushl $-1 /* orig_ax = -1 => not a system call */
+ SAVE_ALL
+ jmp ret_from_exception
+
+.section .fixup, "ax"
+6: xorl %eax, %eax
+ movl %eax, 4(%esp)
+ jmp 1b
+7: xorl %eax, %eax
+ movl %eax, 8(%esp)
+ jmp 2b
+8: xorl %eax, %eax
+ movl %eax, 12(%esp)
+ jmp 3b
+9: xorl %eax, %eax
+ movl %eax, 16(%esp)
+ jmp 4b
+.previous
+ _ASM_EXTABLE(1b, 6b)
+ _ASM_EXTABLE(2b, 7b)
+ _ASM_EXTABLE(3b, 8b)
+ _ASM_EXTABLE(4b, 9b)
+ENDPROC(xen_failsafe_callback)
+
+BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
+ xen_evtchn_do_upcall)
+
+#endif /* CONFIG_XEN */
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
+ hyperv_vector_handler)
+
+#endif /* CONFIG_HYPERV */
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+ ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ pushl $0 /* Pass NULL as regs pointer */
+ movl 4*4(%esp), %eax
+ movl 0x4(%ebp), %edx
+ movl function_trace_op, %ecx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ addl $4, %esp /* skip NULL pointer */
+ popl %edx
+ popl %ecx
+ popl %eax
+ftrace_ret:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
+
+.globl ftrace_stub
+ftrace_stub:
+ ret
+END(ftrace_caller)
+
+ENTRY(ftrace_regs_caller)
+ pushf /* push flags before compare (in cs location) */
+
+ /*
+ * i386 does not save SS and ESP when coming from kernel.
+ * Instead, to get sp, &regs->sp is used (see ptrace.h).
+ * Unfortunately, that means eflags must be at the same location
+ * as the current return ip is. We move the return ip into the
+ * ip location, and move flags into the return ip location.
+ */
+ pushl 4(%esp) /* save return ip into ip slot */
+
+ pushl $0 /* Load 0 into orig_ax */
+ pushl %gs
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+
+ movl 13*4(%esp), %eax /* Get the saved flags */
+ movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
+ /* clobbering return ip */
+ movl $__KERNEL_CS, 13*4(%esp)
+
+ movl 12*4(%esp), %eax /* Load ip (1st parameter) */
+ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
+ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
+ movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+ pushl %esp /* Save pt_regs as 4th parameter */
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ addl $4, %esp /* Skip pt_regs */
+ movl 14*4(%esp), %eax /* Move flags back into cs */
+ movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
+ movl 12*4(%esp), %eax /* Get return ip from regs->ip */
+ movl %eax, 14*4(%esp) /* Put return ip back for ret */
+
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ addl $8, %esp /* Skip orig_ax and ip */
+ popf /* Pop flags at end (no addl to corrupt flags) */
+ jmp ftrace_ret
+
+ popf
+ jmp ftrace_stub
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+ cmpl $__PAGE_OFFSET, %esp
+ jb ftrace_stub /* Paging not enabled yet? */
+
+ cmpl $ftrace_stub, ftrace_trace_function
+ jnz trace
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpl $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+
+ cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
+ jnz ftrace_graph_caller
+#endif
+.globl ftrace_stub
+ftrace_stub:
+ ret
+
+ /* taken from glibc */
+trace:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+ call *ftrace_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ lea 0x4(%ebp), %edx
+ movl (%ebp), %ecx
+ subl $MCOUNT_INSN_SIZE, %eax
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+END(ftrace_graph_caller)
+
+.globl return_to_handler
+return_to_handler:
+ pushl %eax
+ pushl %edx
+ movl %ebp, %eax
+ call ftrace_return_to_handler
+ movl %eax, %ecx
+ popl %edx
+ popl %eax
+ jmp *%ecx
+#endif
+
+#ifdef CONFIG_TRACING
+ENTRY(trace_page_fault)
+ ASM_CLAC
+ pushl $trace_do_page_fault
+ jmp error_code
+END(trace_page_fault)
+#endif
+
+ENTRY(page_fault)
+ ASM_CLAC
+ pushl $do_page_fault
+ ALIGN
+error_code:
+ /* the function address is in %gs's slot on the stack */
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ cld
+ movl $(__KERNEL_PERCPU), %ecx
+ movl %ecx, %fs
+ UNWIND_ESPFIX_STACK
+ GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi # get the function address
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+ movl $(__USER_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+ call *%edi
+ jmp ret_from_exception
+END(page_fault)
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+.macro FIX_STACK offset ok label
+ cmpw $__KERNEL_CS, 4(%esp)
+ jne \ok
+\label:
+ movl TSS_sysenter_sp0 + \offset(%esp), %esp
+ pushfl
+ pushl $__KERNEL_CS
+ pushl $sysenter_past_esp
+.endm
+
+ENTRY(debug)
+ ASM_CLAC
+ cmpl $entry_SYSENTER_32, (%esp)
+ jne debug_stack_correct
+ FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
+debug_stack_correct:
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx, %edx # error code 0
+ movl %esp, %eax # pt_regs pointer
+ call do_debug
+ jmp ret_from_exception
+END(debug)
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+ ASM_CLAC
+#ifdef CONFIG_X86_ESPFIX32
+ pushl %eax
+ movl %ss, %eax
+ cmpw $__ESPFIX_SS, %ax
+ popl %eax
+ je nmi_espfix_stack
+#endif
+ cmpl $entry_SYSENTER_32, (%esp)
+ je nmi_stack_fixup
+ pushl %eax
+ movl %esp, %eax
+ /*
+ * Do not access memory above the end of our stack page,
+ * it might not exist.
+ */
+ andl $(THREAD_SIZE-1), %eax
+ cmpl $(THREAD_SIZE-20), %eax
+ popl %eax
+ jae nmi_stack_correct
+ cmpl $entry_SYSENTER_32, 12(%esp)
+ je nmi_debug_stack_check
+nmi_stack_correct:
+ pushl %eax
+ SAVE_ALL
+ xorl %edx, %edx # zero error code
+ movl %esp, %eax # pt_regs pointer
+ call do_nmi
+ jmp restore_all_notrace
+
+nmi_stack_fixup:
+ FIX_STACK 12, nmi_stack_correct, 1
+ jmp nmi_stack_correct
+
+nmi_debug_stack_check:
+ cmpw $__KERNEL_CS, 16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug, (%esp)
+ jb nmi_stack_correct
+ cmpl $debug_esp_fix_insn, (%esp)
+ ja nmi_stack_correct
+ FIX_STACK 24, nmi_stack_correct, 1
+ jmp nmi_stack_correct
+
+#ifdef CONFIG_X86_ESPFIX32
+nmi_espfix_stack:
+ /*
+ * create the pointer to lss back
+ */
+ pushl %ss
+ pushl %esp
+ addl $4, (%esp)
+ /* copy the iret frame of 12 bytes */
+ .rept 3
+ pushl 16(%esp)
+ .endr
+ pushl %eax
+ SAVE_ALL
+ FIXUP_ESPFIX_STACK # %eax == %esp
+ xorl %edx, %edx # zero error code
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to espfix stack
+ jmp irq_return
+#endif
+END(nmi)
+
+ENTRY(int3)
+ ASM_CLAC
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx, %edx # zero error code
+ movl %esp, %eax # pt_regs pointer
+ call do_int3
+ jmp ret_from_exception
+END(int3)
+
+ENTRY(general_protection)
+ pushl $do_general_protection
+ jmp error_code
+END(general_protection)
+
+#ifdef CONFIG_KVM_GUEST
+ENTRY(async_page_fault)
+ ASM_CLAC
+ pushl $do_async_page_fault
+ jmp error_code
+END(async_page_fault)
+#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/entry/entry_64.S
index 02c2eff..3bb2c43 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -4,34 +4,25 @@
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- */
-
-/*
+ *
* entry.S contains the system-call and fault low-level handling routines.
*
* Some of this is documented in Documentation/x86/entry_64.txt
*
- * NOTE: This code handles signal-recognition, which happens every time
- * after an interrupt and after each system call.
- *
* A note on terminology:
- * - iret frame: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
*
* Some macro usage:
- * - CFI macros are used to generate dwarf2 unwind information for better
- * backtraces. They don't change any code.
- * - ENTRY/END Define functions in the symbol table.
- * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
- * - idtentry - Define exception entry points.
+ * - ENTRY/END: Define functions in the symbol table.
+ * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
+ * - idtentry: Define exception entry points.
*/
-
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
+#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/unistd.h>
@@ -49,13 +40,12 @@
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT 0x80000000
-#define __AUDIT_ARCH_LE 0x40000000
-
- .code64
- .section .entry.text, "ax"
+#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_64BIT 0x80000000
+#define __AUDIT_ARCH_LE 0x40000000
+.code64
+.section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
@@ -64,11 +54,10 @@ ENTRY(native_usergs_sysret64)
ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-
.macro TRACE_IRQS_IRETQ
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9,EFLAGS(%rsp) /* interrupts off? */
- jnc 1f
+ bt $9, EFLAGS(%rsp) /* interrupts off? */
+ jnc 1f
TRACE_IRQS_ON
1:
#endif
@@ -88,89 +77,34 @@ ENDPROC(native_usergs_sysret64)
#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
.macro TRACE_IRQS_OFF_DEBUG
- call debug_stack_set_zero
+ call debug_stack_set_zero
TRACE_IRQS_OFF
- call debug_stack_reset
+ call debug_stack_reset
.endm
.macro TRACE_IRQS_ON_DEBUG
- call debug_stack_set_zero
+ call debug_stack_set_zero
TRACE_IRQS_ON
- call debug_stack_reset
+ call debug_stack_reset
.endm
.macro TRACE_IRQS_IRETQ_DEBUG
- bt $9,EFLAGS(%rsp) /* interrupts off? */
- jnc 1f
+ bt $9, EFLAGS(%rsp) /* interrupts off? */
+ jnc 1f
TRACE_IRQS_ON_DEBUG
1:
.endm
#else
-# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
+# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
+# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
+# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
#endif
/*
- * empty frame
- */
- .macro EMPTY_FRAME start=1 offset=0
- .if \start
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,8+\offset
- .else
- CFI_DEF_CFA_OFFSET 8+\offset
- .endif
- .endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
- */
- .macro INTR_FRAME start=1 offset=0
- EMPTY_FRAME \start, 5*8+\offset
- /*CFI_REL_OFFSET ss, 4*8+\offset*/
- CFI_REL_OFFSET rsp, 3*8+\offset
- /*CFI_REL_OFFSET rflags, 2*8+\offset*/
- /*CFI_REL_OFFSET cs, 1*8+\offset*/
- CFI_REL_OFFSET rip, 0*8+\offset
- .endm
-
-/*
- * initial frame state for exceptions with error code (and interrupts
- * with vector already pushed)
- */
- .macro XCPT_FRAME start=1 offset=0
- INTR_FRAME \start, 1*8+\offset
- .endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
- .macro DEFAULT_FRAME start=1 offset=0
- XCPT_FRAME \start, ORIG_RAX+\offset
- CFI_REL_OFFSET rdi, RDI+\offset
- CFI_REL_OFFSET rsi, RSI+\offset
- CFI_REL_OFFSET rdx, RDX+\offset
- CFI_REL_OFFSET rcx, RCX+\offset
- CFI_REL_OFFSET rax, RAX+\offset
- CFI_REL_OFFSET r8, R8+\offset
- CFI_REL_OFFSET r9, R9+\offset
- CFI_REL_OFFSET r10, R10+\offset
- CFI_REL_OFFSET r11, R11+\offset
- CFI_REL_OFFSET rbx, RBX+\offset
- CFI_REL_OFFSET rbp, RBP+\offset
- CFI_REL_OFFSET r12, R12+\offset
- CFI_REL_OFFSET r13, R13+\offset
- CFI_REL_OFFSET r14, R14+\offset
- CFI_REL_OFFSET r15, R15+\offset
- .endm
-
-/*
- * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
+ * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
- * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
@@ -186,7 +120,7 @@ ENDPROC(native_usergs_sysret64)
* r10 arg3 (needs to be moved to rcx to conform to C ABI)
* r8 arg4
* r9 arg5
- * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
+ * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
*
* Only called from user space.
*
@@ -195,13 +129,7 @@ ENDPROC(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
-ENTRY(system_call)
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,0
- CFI_REGISTER rip,rcx
- /*CFI_REGISTER rflags,r11*/
-
+ENTRY(entry_SYSCALL_64)
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
@@ -213,14 +141,14 @@ ENTRY(system_call)
* after the swapgs, so that it can do the swapgs
* for the guest and jump here on syscall.
*/
-GLOBAL(system_call_after_swapgs)
+GLOBAL(entry_SYSCALL_64_after_swapgs)
- movq %rsp,PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack),%rsp
+ movq %rsp, PER_CPU_VAR(rsp_scratch)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
- pushq_cfi $__USER_DS /* pt_regs->ss */
- pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
+ pushq $__USER_DS /* pt_regs->ss */
+ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
/*
* Re-enable interrupts.
* We use 'rsp_scratch' as a scratch space, hence irq-off block above
@@ -229,36 +157,34 @@ GLOBAL(system_call_after_swapgs)
* with using rsp_scratch:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq_cfi %r11 /* pt_regs->flags */
- pushq_cfi $__USER_CS /* pt_regs->cs */
- pushq_cfi %rcx /* pt_regs->ip */
- CFI_REL_OFFSET rip,0
- pushq_cfi_reg rax /* pt_regs->orig_ax */
- pushq_cfi_reg rdi /* pt_regs->di */
- pushq_cfi_reg rsi /* pt_regs->si */
- pushq_cfi_reg rdx /* pt_regs->dx */
- pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi $-ENOSYS /* pt_regs->ax */
- pushq_cfi_reg r8 /* pt_regs->r8 */
- pushq_cfi_reg r9 /* pt_regs->r9 */
- pushq_cfi_reg r10 /* pt_regs->r10 */
- pushq_cfi_reg r11 /* pt_regs->r11 */
- sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
- CFI_ADJUST_CFA_OFFSET 6*8
-
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz tracesys
-system_call_fastpath:
+ pushq %r11 /* pt_regs->flags */
+ pushq $__USER_CS /* pt_regs->cs */
+ pushq %rcx /* pt_regs->ip */
+ pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz tracesys
+entry_SYSCALL_64_fastpath:
#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max,%rax
+ cmpq $__NR_syscall_max, %rax
#else
- andl $__SYSCALL_MASK,%eax
- cmpl $__NR_syscall_max,%eax
+ andl $__SYSCALL_MASK, %eax
+ cmpl $__NR_syscall_max, %eax
#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10,%rcx
- call *sys_call_table(,%rax,8)
- movq %rax,RAX(%rsp)
+ ja 1f /* return -ENOSYS (already in pt_regs->ax) */
+ movq %r10, %rcx
+ call *sys_call_table(, %rax, 8)
+ movq %rax, RAX(%rsp)
1:
/*
* Syscall return path ending with SYSRET (fast path).
@@ -279,19 +205,15 @@ system_call_fastpath:
* flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
* very bad.
*/
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
-
- CFI_REMEMBER_STATE
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RIP(%rsp),%rcx
- CFI_REGISTER rip,rcx
- movq EFLAGS(%rsp),%r11
- /*CFI_REGISTER rflags,r11*/
- movq RSP(%rsp),%rsp
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
+ movq RSP(%rsp), %rsp
/*
- * 64bit SYSRET restores rip from rcx,
+ * 64-bit SYSRET restores rip from rcx,
* rflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* Restoration of rflags re-enables interrupts.
@@ -307,25 +229,23 @@ system_call_fastpath:
*/
USERGS_SYSRET64
- CFI_RESTORE_STATE
-
/* Do syscall entry tracing */
tracesys:
- movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- call syscall_trace_enter_phase1
- test %rax, %rax
- jnz tracesys_phase2 /* if needed, run the slow path */
- RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
- movq ORIG_RAX(%rsp), %rax
- jmp system_call_fastpath /* and return to the fast path */
+ movq %rsp, %rdi
+ movl $AUDIT_ARCH_X86_64, %esi
+ call syscall_trace_enter_phase1
+ test %rax, %rax
+ jnz tracesys_phase2 /* if needed, run the slow path */
+ RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
+ movq ORIG_RAX(%rsp), %rax
+ jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
tracesys_phase2:
SAVE_EXTRA_REGS
- movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- movq %rax,%rdx
- call syscall_trace_enter_phase2
+ movq %rsp, %rdi
+ movl $AUDIT_ARCH_X86_64, %esi
+ movq %rax, %rdx
+ call syscall_trace_enter_phase2
/*
* Reload registers from stack in case ptrace changed them.
@@ -335,15 +255,15 @@ tracesys_phase2:
RESTORE_C_REGS_EXCEPT_RAX
RESTORE_EXTRA_REGS
#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max,%rax
+ cmpq $__NR_syscall_max, %rax
#else
- andl $__SYSCALL_MASK,%eax
- cmpl $__NR_syscall_max,%eax
+ andl $__SYSCALL_MASK, %eax
+ cmpl $__NR_syscall_max, %eax
#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10,%rcx /* fixup for C */
- call *sys_call_table(,%rax,8)
- movq %rax,RAX(%rsp)
+ ja 1f /* return -ENOSYS (already in pt_regs->ax) */
+ movq %r10, %rcx /* fixup for C */
+ call *sys_call_table(, %rax, 8)
+ movq %rax, RAX(%rsp)
1:
/* Use IRET because user could have changed pt_regs->foo */
@@ -355,31 +275,33 @@ GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
TRACE_IRQS_OFF
- movl $_TIF_ALLWORK_MASK,%edi
+ movl $_TIF_ALLWORK_MASK, %edi
/* edi: mask to check */
GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
- movl TI_flags(%rcx),%edx
- andl %edi,%edx
- jnz int_careful
- andl $~TS_COMPAT,TI_status(%rcx)
+ movl TI_flags(%rcx), %edx
+ andl %edi, %edx
+ jnz int_careful
+ andl $~TS_COMPAT, TI_status(%rcx)
jmp syscall_return
- /* Either reschedule or signal or syscall exit tracking needed. */
- /* First do a reschedule test. */
- /* edx: work, edi: workmask */
+ /*
+ * Either reschedule or signal or syscall exit tracking needed.
+ * First do a reschedule test.
+ * edx: work, edi: workmask
+ */
int_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc int_very_careful
+ bt $TIF_NEED_RESCHED, %edx
+ jnc int_very_careful
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq_cfi %rdi
+ pushq %rdi
SCHEDULE_USER
- popq_cfi %rdi
+ popq %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- jmp int_with_check
+ jmp int_with_check
/* handle signals and tracing -- both require a full pt_regs */
int_very_careful:
@@ -387,27 +309,27 @@ int_very_careful:
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_EXTRA_REGS
/* Check for syscall exit trace */
- testl $_TIF_WORK_SYSCALL_EXIT,%edx
- jz int_signal
- pushq_cfi %rdi
- leaq 8(%rsp),%rdi # &ptregs -> arg1
- call syscall_trace_leave
- popq_cfi %rdi
- andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
- jmp int_restore_rest
+ testl $_TIF_WORK_SYSCALL_EXIT, %edx
+ jz int_signal
+ pushq %rdi
+ leaq 8(%rsp), %rdi /* &ptregs -> arg1 */
+ call syscall_trace_leave
+ popq %rdi
+ andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi
+ jmp int_restore_rest
int_signal:
- testl $_TIF_DO_NOTIFY_MASK,%edx
- jz 1f
- movq %rsp,%rdi # &ptregs -> arg1
- xorl %esi,%esi # oldset -> arg2
- call do_notify_resume
-1: movl $_TIF_WORK_MASK,%edi
+ testl $_TIF_DO_NOTIFY_MASK, %edx
+ jz 1f
+ movq %rsp, %rdi /* &ptregs -> arg1 */
+ xorl %esi, %esi /* oldset -> arg2 */
+ call do_notify_resume
+1: movl $_TIF_WORK_MASK, %edi
int_restore_rest:
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- jmp int_with_check
+ jmp int_with_check
syscall_return:
/* The IRETQ could re-enable interrupts: */
@@ -418,34 +340,37 @@ syscall_return:
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context.
*/
- movq RCX(%rsp),%rcx
- cmpq %rcx,RIP(%rsp) /* RCX == RIP */
- jne opportunistic_sysret_failed
+ movq RCX(%rsp), %rcx
+ movq RIP(%rsp), %r11
+ cmpq %rcx, %r11 /* RCX == RIP */
+ jne opportunistic_sysret_failed
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
* in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP. It's not worth
- * testing for canonicalness exactly -- this check detects any
- * of the 17 high bits set, which is true for non-canonical
- * or kernel addresses. (This will pessimize vsyscall=native.
- * Big deal.)
+ * the kernel, since userspace controls RSP.
*
- * If virtual addresses ever become wider, this will need
+ * If width of "canonical tail" ever becomes variable, this will need
* to be updated to remain correct on both old and new CPUs.
*/
.ifne __VIRTUAL_MASK_SHIFT - 47
.error "virtual address width changed -- SYSRET checks need update"
.endif
- shr $__VIRTUAL_MASK_SHIFT, %rcx
- jnz opportunistic_sysret_failed
- cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */
- jne opportunistic_sysret_failed
+ /* Change top 16 bits to be the sign-extension of 47th bit */
+ shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+ sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- movq R11(%rsp),%r11
- cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */
- jne opportunistic_sysret_failed
+ /* If this changed %rcx, it was not canonical */
+ cmpq %rcx, %r11
+ jne opportunistic_sysret_failed
+
+ cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
+ jne opportunistic_sysret_failed
+
+ movq R11(%rsp), %r11
+ cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
+ jne opportunistic_sysret_failed
/*
* SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
@@ -454,47 +379,41 @@ syscall_return:
* with register state that satisfies the opportunistic SYSRET
* conditions. For example, single-stepping this user code:
*
- * movq $stuck_here,%rcx
+ * movq $stuck_here, %rcx
* pushfq
* popq %r11
* stuck_here:
*
* would never get past 'stuck_here'.
*/
- testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz opportunistic_sysret_failed
+ testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+ jnz opportunistic_sysret_failed
/* nothing to check for RSP */
- cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */
- jne opportunistic_sysret_failed
+ cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
+ jne opportunistic_sysret_failed
/*
- * We win! This label is here just for ease of understanding
- * perf profiles. Nothing jumps here.
+ * We win! This label is here just for ease of understanding
+ * perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
- CFI_REMEMBER_STATE
- /* r11 is already restored (see code above) */
- RESTORE_C_REGS_EXCEPT_R11
- movq RSP(%rsp),%rsp
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
+ movq RSP(%rsp), %rsp
USERGS_SYSRET64
- CFI_RESTORE_STATE
opportunistic_sysret_failed:
SWAPGS
jmp restore_c_regs_and_iret
- CFI_ENDPROC
-END(system_call)
+END(entry_SYSCALL_64)
.macro FORK_LIKE func
ENTRY(stub_\func)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8 /* offset 8: return address */
SAVE_EXTRA_REGS 8
- jmp sys_\func
- CFI_ENDPROC
+ jmp sys_\func
END(stub_\func)
.endm
@@ -503,8 +422,6 @@ END(stub_\func)
FORK_LIKE vfork
ENTRY(stub_execve)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
call sys_execve
return_from_execve:
testl %eax, %eax
@@ -514,11 +431,9 @@ return_from_execve:
1:
/* must use IRET code path (pt_regs->cs may have changed) */
addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
ZERO_EXTRA_REGS
- movq %rax,RAX(%rsp)
+ movq %rax, RAX(%rsp)
jmp int_ret_from_sys_call
- CFI_ENDPROC
END(stub_execve)
/*
* Remaining execve stubs are only 7 bytes long.
@@ -526,47 +441,25 @@ END(stub_execve)
*/
.align 8
GLOBAL(stub_execveat)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
call sys_execveat
jmp return_from_execve
- CFI_ENDPROC
END(stub_execveat)
-#ifdef CONFIG_X86_X32_ABI
+#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
.align 8
GLOBAL(stub_x32_execve)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
- call compat_sys_execve
- jmp return_from_execve
- CFI_ENDPROC
-END(stub_x32_execve)
- .align 8
-GLOBAL(stub_x32_execveat)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
- call compat_sys_execveat
- jmp return_from_execve
- CFI_ENDPROC
-END(stub_x32_execveat)
-#endif
-
-#ifdef CONFIG_IA32_EMULATION
- .align 8
GLOBAL(stub32_execve)
- CFI_STARTPROC
call compat_sys_execve
jmp return_from_execve
- CFI_ENDPROC
END(stub32_execve)
+END(stub_x32_execve)
.align 8
+GLOBAL(stub_x32_execveat)
GLOBAL(stub32_execveat)
- CFI_STARTPROC
call compat_sys_execveat
jmp return_from_execve
- CFI_ENDPROC
END(stub32_execveat)
+END(stub_x32_execveat)
#endif
/*
@@ -574,8 +467,6 @@ END(stub32_execveat)
* This cannot be done with SYSRET, so use the IRET return path instead.
*/
ENTRY(stub_rt_sigreturn)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
/*
* SAVE_EXTRA_REGS result is not normally needed:
* sigreturn overwrites all pt_regs->GPREGS.
@@ -584,24 +475,19 @@ ENTRY(stub_rt_sigreturn)
* we SAVE_EXTRA_REGS here.
*/
SAVE_EXTRA_REGS 8
- call sys_rt_sigreturn
+ call sys_rt_sigreturn
return_from_stub:
addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
RESTORE_EXTRA_REGS
- movq %rax,RAX(%rsp)
- jmp int_ret_from_sys_call
- CFI_ENDPROC
+ movq %rax, RAX(%rsp)
+ jmp int_ret_from_sys_call
END(stub_rt_sigreturn)
#ifdef CONFIG_X86_X32_ABI
ENTRY(stub_x32_rt_sigreturn)
- CFI_STARTPROC
- DEFAULT_FRAME 0, 8
SAVE_EXTRA_REGS 8
- call sys32_x32_rt_sigreturn
- jmp return_from_stub
- CFI_ENDPROC
+ call sys32_x32_rt_sigreturn
+ jmp return_from_stub
END(stub_x32_rt_sigreturn)
#endif
@@ -611,36 +497,36 @@ END(stub_x32_rt_sigreturn)
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
- DEFAULT_FRAME
- LOCK ; btr $TIF_FORK,TI_flags(%r8)
+ LOCK ; btr $TIF_FORK, TI_flags(%r8)
- pushq_cfi $0x0002
- popfq_cfi # reset kernel eflags
+ pushq $0x0002
+ popfq /* reset kernel eflags */
- call schedule_tail # rdi: 'prev' task parameter
+ call schedule_tail /* rdi: 'prev' task parameter */
RESTORE_EXTRA_REGS
- testl $3,CS(%rsp) # from kernel_thread?
+ testb $3, CS(%rsp) /* from kernel_thread? */
/*
* By the time we get here, we have no idea whether our pt_regs,
* ti flags, and ti status came from the 64-bit SYSCALL fast path,
- * the slow path, or one of the ia32entry paths.
+ * the slow path, or one of the 32-bit compat paths.
* Use IRET code path to return, since it can safely handle
* all of the above.
*/
jnz int_ret_from_sys_call
- /* We came from kernel_thread */
- /* nb: we depend on RESTORE_EXTRA_REGS above */
- movq %rbp, %rdi
- call *%rbx
- movl $0, RAX(%rsp)
+ /*
+ * We came from kernel_thread
+ * nb: we depend on RESTORE_EXTRA_REGS above
+ */
+ movq %rbp, %rdi
+ call *%rbx
+ movl $0, RAX(%rsp)
RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
- CFI_ENDPROC
+ jmp int_ret_from_sys_call
END(ret_from_fork)
/*
@@ -649,16 +535,13 @@ END(ret_from_fork)
*/
.align 8
ENTRY(irq_entries_start)
- INTR_FRAME
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
+ pushq $(~vector+0x80) /* Note: always in signed byte range */
vector=vector+1
jmp common_interrupt
- CFI_ADJUST_CFA_OFFSET -8
.align 8
.endr
- CFI_ENDPROC
END(irq_entries_start)
/*
@@ -684,10 +567,10 @@ END(irq_entries_start)
/* this goes to 0(%rsp) for unwinder, not for saving the value: */
SAVE_EXTRA_REGS_RBP -RBP
- leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */
+ leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */
- testl $3, CS-RBP(%rsp)
- je 1f
+ testb $3, CS-RBP(%rsp)
+ jz 1f
SWAPGS
1:
/*
@@ -697,24 +580,14 @@ END(irq_entries_start)
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
- movq %rsp, %rsi
- incl PER_CPU_VAR(irq_count)
- cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
- CFI_DEF_CFA_REGISTER rsi
- pushq %rsi
- /*
- * For debugger:
- * "CFA (Current Frame Address) is the value on stack + offset"
- */
- CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
- 0x77 /* DW_OP_breg7 (rsp) */, 0, \
- 0x06 /* DW_OP_deref */, \
- 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
- 0x22 /* DW_OP_plus */
+ movq %rsp, %rsi
+ incl PER_CPU_VAR(irq_count)
+ cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
+ pushq %rsi
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
- call \func
+ call \func
.endm
/*
@@ -723,42 +596,36 @@ END(irq_entries_start)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
- XCPT_FRAME
ASM_CLAC
- addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
interrupt do_IRQ
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- decl PER_CPU_VAR(irq_count)
+ decl PER_CPU_VAR(irq_count)
/* Restore saved previous stack */
- popq %rsi
- CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+ popq %rsi
/* return code expects complete pt_regs - adjust rsp accordingly: */
- leaq -RBP(%rsi),%rsp
- CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET RBP
+ leaq -RBP(%rsi), %rsp
- testl $3,CS(%rsp)
- je retint_kernel
+ testb $3, CS(%rsp)
+ jz retint_kernel
/* Interrupt came from user space */
-
+retint_user:
GET_THREAD_INFO(%rcx)
- /*
- * %rcx: thread info. Interrupts off.
- */
+
+ /* %rcx: thread info. Interrupts are off. */
retint_with_reschedule:
- movl $_TIF_WORK_MASK,%edi
+ movl $_TIF_WORK_MASK, %edi
retint_check:
LOCKDEP_SYS_EXIT_IRQ
- movl TI_flags(%rcx),%edx
- andl %edi,%edx
- CFI_REMEMBER_STATE
- jnz retint_careful
+ movl TI_flags(%rcx), %edx
+ andl %edi, %edx
+ jnz retint_careful
-retint_swapgs: /* return to user-space */
+retint_swapgs: /* return to user-space */
/*
* The iretq could re-enable interrupts:
*/
@@ -773,9 +640,9 @@ retint_kernel:
#ifdef CONFIG_PREEMPT
/* Interrupts are off */
/* Check if we need preemption */
- bt $9,EFLAGS(%rsp) /* interrupts were off? */
+ bt $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
-0: cmpl $0,PER_CPU_VAR(__preempt_count)
+0: cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
call preempt_schedule_irq
jmp 0b
@@ -793,8 +660,6 @@ retint_kernel:
restore_c_regs_and_iret:
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
-
-irq_return:
INTERRUPT_RETURN
ENTRY(native_iret)
@@ -803,8 +668,8 @@ ENTRY(native_iret)
* 64-bit mode SS:RSP on the exception stack is always valid.
*/
#ifdef CONFIG_X86_ESPFIX64
- testb $4,(SS-RIP)(%rsp)
- jnz native_irq_return_ldt
+ testb $4, (SS-RIP)(%rsp)
+ jnz native_irq_return_ldt
#endif
.global native_irq_return_iret
@@ -819,62 +684,60 @@ native_irq_return_iret:
#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
- pushq_cfi %rax
- pushq_cfi %rdi
+ pushq %rax
+ pushq %rdi
SWAPGS
- movq PER_CPU_VAR(espfix_waddr),%rdi
- movq %rax,(0*8)(%rdi) /* RAX */
- movq (2*8)(%rsp),%rax /* RIP */
- movq %rax,(1*8)(%rdi)
- movq (3*8)(%rsp),%rax /* CS */
- movq %rax,(2*8)(%rdi)
- movq (4*8)(%rsp),%rax /* RFLAGS */
- movq %rax,(3*8)(%rdi)
- movq (6*8)(%rsp),%rax /* SS */
- movq %rax,(5*8)(%rdi)
- movq (5*8)(%rsp),%rax /* RSP */
- movq %rax,(4*8)(%rdi)
- andl $0xffff0000,%eax
- popq_cfi %rdi
- orq PER_CPU_VAR(espfix_stack),%rax
+ movq PER_CPU_VAR(espfix_waddr), %rdi
+ movq %rax, (0*8)(%rdi) /* RAX */
+ movq (2*8)(%rsp), %rax /* RIP */
+ movq %rax, (1*8)(%rdi)
+ movq (3*8)(%rsp), %rax /* CS */
+ movq %rax, (2*8)(%rdi)
+ movq (4*8)(%rsp), %rax /* RFLAGS */
+ movq %rax, (3*8)(%rdi)
+ movq (6*8)(%rsp), %rax /* SS */
+ movq %rax, (5*8)(%rdi)
+ movq (5*8)(%rsp), %rax /* RSP */
+ movq %rax, (4*8)(%rdi)
+ andl $0xffff0000, %eax
+ popq %rdi
+ orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
- movq %rax,%rsp
- popq_cfi %rax
- jmp native_irq_return_iret
+ movq %rax, %rsp
+ popq %rax
+ jmp native_irq_return_iret
#endif
/* edi: workmask, edx: work */
retint_careful:
- CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc retint_signal
+ bt $TIF_NEED_RESCHED, %edx
+ jnc retint_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq_cfi %rdi
+ pushq %rdi
SCHEDULE_USER
- popq_cfi %rdi
+ popq %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- jmp retint_check
+ jmp retint_check
retint_signal:
- testl $_TIF_DO_NOTIFY_MASK,%edx
- jz retint_swapgs
+ testl $_TIF_DO_NOTIFY_MASK, %edx
+ jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_EXTRA_REGS
- movq $-1,ORIG_RAX(%rsp)
- xorl %esi,%esi # oldset
- movq %rsp,%rdi # &pt_regs
- call do_notify_resume
+ movq $-1, ORIG_RAX(%rsp)
+ xorl %esi, %esi /* oldset */
+ movq %rsp, %rdi /* &pt_regs */
+ call do_notify_resume
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
- jmp retint_with_reschedule
+ jmp retint_with_reschedule
- CFI_ENDPROC
END(common_interrupt)
/*
@@ -882,13 +745,11 @@ END(common_interrupt)
*/
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
- INTR_FRAME
ASM_CLAC
- pushq_cfi $~(\num)
+ pushq $~(\num)
.Lcommon_\sym:
interrupt \do_sym
- jmp ret_from_intr
- CFI_ENDPROC
+ jmp ret_from_intr
END(\sym)
.endm
@@ -910,53 +771,45 @@ trace_apicinterrupt \num \sym
.endm
#ifdef CONFIG_SMP
-apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
- irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt3 REBOOT_VECTOR \
- reboot_interrupt smp_reboot_interrupt
+apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
-apicinterrupt3 UV_BAU_MESSAGE \
- uv_bau_message_intr1 uv_bau_message_interrupt
+apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
#endif
-apicinterrupt LOCAL_TIMER_VECTOR \
- apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt X86_PLATFORM_IPI_VECTOR \
- x86_platform_ipi smp_x86_platform_ipi
+
+apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
+apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
-apicinterrupt3 POSTED_INTR_VECTOR \
- kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
-apicinterrupt THRESHOLD_APIC_VECTOR \
- threshold_interrupt smp_threshold_interrupt
+apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
-apicinterrupt THERMAL_APIC_VECTOR \
- thermal_interrupt smp_thermal_interrupt
+apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
#endif
#ifdef CONFIG_SMP
-apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
- call_function_single_interrupt smp_call_function_single_interrupt
-apicinterrupt CALL_FUNCTION_VECTOR \
- call_function_interrupt smp_call_function_interrupt
-apicinterrupt RESCHEDULE_VECTOR \
- reschedule_interrupt smp_reschedule_interrupt
+apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
+apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
+apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
-apicinterrupt ERROR_APIC_VECTOR \
- error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR \
- spurious_interrupt smp_spurious_interrupt
+apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR \
- irq_work_interrupt smp_irq_work_interrupt
+apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
#endif
/*
@@ -971,100 +824,87 @@ ENTRY(\sym)
.error "using shift_ist requires paranoid=1"
.endif
- .if \has_error_code
- XCPT_FRAME
- .else
- INTR_FRAME
- .endif
-
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
.ifeq \has_error_code
- pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
ALLOC_PT_GPREGS_ON_STACK
.if \paranoid
.if \paranoid == 1
- CFI_REMEMBER_STATE
- testl $3, CS(%rsp) /* If coming from userspace, switch */
- jnz 1f /* stacks. */
+ testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
+ jnz 1f
.endif
- call paranoid_entry
+ call paranoid_entry
.else
- call error_entry
+ call error_entry
.endif
/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
- DEFAULT_FRAME 0
-
.if \paranoid
.if \shift_ist != -1
- TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
+ TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
.else
TRACE_IRQS_OFF
.endif
.endif
- movq %rsp,%rdi /* pt_regs pointer */
+ movq %rsp, %rdi /* pt_regs pointer */
.if \has_error_code
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ movq ORIG_RAX(%rsp), %rsi /* get error code */
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.else
- xorl %esi,%esi /* no error code */
+ xorl %esi, %esi /* no error code */
.endif
.if \shift_ist != -1
- subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
.endif
- call \do_sym
+ call \do_sym
.if \shift_ist != -1
- addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
.endif
/* these procedures expect "no swapgs" flag in ebx */
.if \paranoid
- jmp paranoid_exit
+ jmp paranoid_exit
.else
- jmp error_exit
+ jmp error_exit
.endif
.if \paranoid == 1
- CFI_RESTORE_STATE
/*
* Paranoid entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
1:
- call error_entry
+ call error_entry
- DEFAULT_FRAME 0
- movq %rsp,%rdi /* pt_regs pointer */
- call sync_regs
- movq %rax,%rsp /* switch stack */
+ movq %rsp, %rdi /* pt_regs pointer */
+ call sync_regs
+ movq %rax, %rsp /* switch stack */
- movq %rsp,%rdi /* pt_regs pointer */
+ movq %rsp, %rdi /* pt_regs pointer */
.if \has_error_code
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ movq ORIG_RAX(%rsp), %rsi /* get error code */
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.else
- xorl %esi,%esi /* no error code */
+ xorl %esi, %esi /* no error code */
.endif
- call \do_sym
+ call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
+ jmp error_exit /* %ebx: no swapgs flag */
.endif
-
- CFI_ENDPROC
END(\sym)
.endm
@@ -1079,65 +919,58 @@ idtentry \sym \do_sym has_error_code=\has_error_code
.endm
#endif
-idtentry divide_error do_divide_error has_error_code=0
-idtentry overflow do_overflow has_error_code=0
-idtentry bounds do_bounds has_error_code=0
-idtentry invalid_op do_invalid_op has_error_code=0
-idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2
-idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
-idtentry invalid_TSS do_invalid_TSS has_error_code=1
-idtentry segment_not_present do_segment_not_present has_error_code=1
-idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
-idtentry coprocessor_error do_coprocessor_error has_error_code=0
-idtentry alignment_check do_alignment_check has_error_code=1
-idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
-
-
- /* Reload gs selector with exception handling */
- /* edi: new selector */
+idtentry divide_error do_divide_error has_error_code=0
+idtentry overflow do_overflow has_error_code=0
+idtentry bounds do_bounds has_error_code=0
+idtentry invalid_op do_invalid_op has_error_code=0
+idtentry device_not_available do_device_not_available has_error_code=0
+idtentry double_fault do_double_fault has_error_code=1 paranoid=2
+idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
+idtentry invalid_TSS do_invalid_TSS has_error_code=1
+idtentry segment_not_present do_segment_not_present has_error_code=1
+idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
+idtentry coprocessor_error do_coprocessor_error has_error_code=0
+idtentry alignment_check do_alignment_check has_error_code=1
+idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
+
+
+ /*
+ * Reload gs selector with exception handling
+ * edi: new selector
+ */
ENTRY(native_load_gs_index)
- CFI_STARTPROC
- pushfq_cfi
+ pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
gs_change:
- movl %edi,%gs
-2: mfence /* workaround */
+ movl %edi, %gs
+2: mfence /* workaround */
SWAPGS
- popfq_cfi
+ popfq
ret
- CFI_ENDPROC
END(native_load_gs_index)
- _ASM_EXTABLE(gs_change,bad_gs)
- .section .fixup,"ax"
+ _ASM_EXTABLE(gs_change, bad_gs)
+ .section .fixup, "ax"
/* running with kernelgs */
bad_gs:
- SWAPGS /* switch back to user gs */
- xorl %eax,%eax
- movl %eax,%gs
- jmp 2b
+ SWAPGS /* switch back to user gs */
+ xorl %eax, %eax
+ movl %eax, %gs
+ jmp 2b
.previous
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(do_softirq_own_stack)
- CFI_STARTPROC
- pushq_cfi %rbp
- CFI_REL_OFFSET rbp,0
- mov %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- incl PER_CPU_VAR(irq_count)
- cmove PER_CPU_VAR(irq_stack_ptr),%rsp
- push %rbp # backlink for old unwinder
- call __do_softirq
+ pushq %rbp
+ mov %rsp, %rbp
+ incl PER_CPU_VAR(irq_count)
+ cmove PER_CPU_VAR(irq_stack_ptr), %rsp
+ push %rbp /* frame pointer backlink */
+ call __do_softirq
leaveq
- CFI_RESTORE rbp
- CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET -8
- decl PER_CPU_VAR(irq_count)
+ decl PER_CPU_VAR(irq_count)
ret
- CFI_ENDPROC
END(do_softirq_own_stack)
#ifdef CONFIG_XEN
@@ -1156,29 +989,24 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
*/
-ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
- CFI_STARTPROC
+ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
+
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
- movq %rdi, %rsp # we don't return, adjust the stack frame
- CFI_ENDPROC
- DEFAULT_FRAME
-11: incl PER_CPU_VAR(irq_count)
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
- pushq %rbp # backlink for old unwinder
- call xen_evtchn_do_upcall
- popq %rsp
- CFI_DEF_CFA_REGISTER rsp
- decl PER_CPU_VAR(irq_count)
+ movq %rdi, %rsp /* we don't return, adjust the stack frame */
+11: incl PER_CPU_VAR(irq_count)
+ movq %rsp, %rbp
+ cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
+ pushq %rbp /* frame pointer backlink */
+ call xen_evtchn_do_upcall
+ popq %rsp
+ decl PER_CPU_VAR(irq_count)
#ifndef CONFIG_PREEMPT
- call xen_maybe_preempt_hcall
+ call xen_maybe_preempt_hcall
#endif
- jmp error_exit
- CFI_ENDPROC
+ jmp error_exit
END(xen_do_hypervisor_callback)
/*
@@ -1195,51 +1023,35 @@ END(xen_do_hypervisor_callback)
* with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
- INTR_FRAME 1 (6*8)
- /*CFI_REL_OFFSET gs,GS*/
- /*CFI_REL_OFFSET fs,FS*/
- /*CFI_REL_OFFSET es,ES*/
- /*CFI_REL_OFFSET ds,DS*/
- CFI_REL_OFFSET r11,8
- CFI_REL_OFFSET rcx,0
- movw %ds,%cx
- cmpw %cx,0x10(%rsp)
- CFI_REMEMBER_STATE
- jne 1f
- movw %es,%cx
- cmpw %cx,0x18(%rsp)
- jne 1f
- movw %fs,%cx
- cmpw %cx,0x20(%rsp)
- jne 1f
- movw %gs,%cx
- cmpw %cx,0x28(%rsp)
- jne 1f
+ movl %ds, %ecx
+ cmpw %cx, 0x10(%rsp)
+ jne 1f
+ movl %es, %ecx
+ cmpw %cx, 0x18(%rsp)
+ jne 1f
+ movl %fs, %ecx
+ cmpw %cx, 0x20(%rsp)
+ jne 1f
+ movl %gs, %ecx
+ cmpw %cx, 0x28(%rsp)
+ jne 1f
/* All segments match their saved values => Category 2 (Bad IRET). */
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x30,%rsp
- CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $0 /* RIP */
- pushq_cfi %r11
- pushq_cfi %rcx
- jmp general_protection
- CFI_RESTORE_STATE
+ movq (%rsp), %rcx
+ movq 8(%rsp), %r11
+ addq $0x30, %rsp
+ pushq $0 /* RIP */
+ pushq %r11
+ pushq %rcx
+ jmp general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
- movq (%rsp),%rcx
- CFI_RESTORE rcx
- movq 8(%rsp),%r11
- CFI_RESTORE r11
- addq $0x30,%rsp
- CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $-1 /* orig_ax = -1 => not a system call */
+ movq (%rsp), %rcx
+ movq 8(%rsp), %r11
+ addq $0x30, %rsp
+ pushq $-1 /* orig_ax = -1 => not a system call */
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
- jmp error_exit
- CFI_ENDPROC
+ jmp error_exit
END(xen_failsafe_callback)
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1252,21 +1064,25 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
-idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry stack_segment do_stack_segment has_error_code=1
+idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry stack_segment do_stack_segment has_error_code=1
+
#ifdef CONFIG_XEN
-idtentry xen_debug do_debug has_error_code=0
-idtentry xen_int3 do_int3 has_error_code=0
-idtentry xen_stack_segment do_stack_segment has_error_code=1
+idtentry xen_debug do_debug has_error_code=0
+idtentry xen_int3 do_int3 has_error_code=0
+idtentry xen_stack_segment do_stack_segment has_error_code=1
#endif
-idtentry general_protection do_general_protection has_error_code=1
-trace_idtentry page_fault do_page_fault has_error_code=1
+
+idtentry general_protection do_general_protection has_error_code=1
+trace_idtentry page_fault do_page_fault has_error_code=1
+
#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1
+idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
+
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
#endif
/*
@@ -1275,19 +1091,17 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
- XCPT_FRAME 1 15*8
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
+ movl $1, %ebx
+ movl $MSR_GS_BASE, %ecx
rdmsr
- testl %edx,%edx
- js 1f /* negative -> in kernel */
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
SWAPGS
- xorl %ebx,%ebx
+ xorl %ebx, %ebx
1: ret
- CFI_ENDPROC
END(paranoid_entry)
/*
@@ -1299,17 +1113,17 @@ END(paranoid_entry)
* in syscall entry), so checking for preemption here would
* be complicated. Fortunately, we there's no good reason
* to try to handle preemption here.
+ *
+ * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
*/
-/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
ENTRY(paranoid_exit)
- DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF_DEBUG
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_exit_no_swapgs
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
+ jmp paranoid_exit_restore
paranoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
paranoid_exit_restore:
@@ -1317,24 +1131,24 @@ paranoid_exit_restore:
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
INTERRUPT_RETURN
- CFI_ENDPROC
END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch gs if needed.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ * Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
- XCPT_FRAME 1 15*8
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
- xorl %ebx,%ebx
- testl $3,CS+8(%rsp)
- je error_kernelspace
-error_swapgs:
+ xorl %ebx, %ebx
+ testb $3, CS+8(%rsp)
+ jz error_kernelspace
+
+ /* We entered from user mode */
SWAPGS
-error_sti:
+
+error_entry_done:
TRACE_IRQS_OFF
ret
@@ -1345,56 +1159,66 @@ error_sti:
* for these here too.
*/
error_kernelspace:
- CFI_REL_OFFSET rcx, RCX+8
- incl %ebx
- leaq native_irq_return_iret(%rip),%rcx
- cmpq %rcx,RIP+8(%rsp)
- je error_bad_iret
- movl %ecx,%eax /* zero extend */
- cmpq %rax,RIP+8(%rsp)
- je bstep_iret
- cmpq $gs_change,RIP+8(%rsp)
- je error_swapgs
- jmp error_sti
+ incl %ebx
+ leaq native_irq_return_iret(%rip), %rcx
+ cmpq %rcx, RIP+8(%rsp)
+ je error_bad_iret
+ movl %ecx, %eax /* zero extend */
+ cmpq %rax, RIP+8(%rsp)
+ je bstep_iret
+ cmpq $gs_change, RIP+8(%rsp)
+ jne error_entry_done
+
+ /*
+ * hack: gs_change can fail with user gsbase. If this happens, fix up
+ * gsbase and proceed. We'll fix up the exception and land in
+ * gs_change's error handler with kernel gsbase.
+ */
+ SWAPGS
+ jmp error_entry_done
bstep_iret:
/* Fix truncated RIP */
- movq %rcx,RIP+8(%rsp)
+ movq %rcx, RIP+8(%rsp)
/* fall through */
error_bad_iret:
+ /*
+ * We came from an IRET to user mode, so we have user gsbase.
+ * Switch to kernel gsbase:
+ */
SWAPGS
- mov %rsp,%rdi
- call fixup_bad_iret
- mov %rax,%rsp
- decl %ebx /* Return to usergs */
- jmp error_sti
- CFI_ENDPROC
+
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+ * as if we faulted immediately after IRET and clear EBX so that
+ * error_exit knows that we will be returning to user mode.
+ */
+ mov %rsp, %rdi
+ call fixup_bad_iret
+ mov %rax, %rsp
+ decl %ebx
+ jmp error_entry_done
END(error_entry)
-/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
+/*
+ * On entry, EBS is a "return to kernel mode" flag:
+ * 1: already in kernel mode, don't need SWAPGS
+ * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
+ */
ENTRY(error_exit)
- DEFAULT_FRAME
- movl %ebx,%eax
+ movl %ebx, %eax
RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
- testl %eax,%eax
- jne retint_kernel
- LOCKDEP_SYS_EXIT_IRQ
- movl TI_flags(%rcx),%edx
- movl $_TIF_WORK_MASK,%edi
- andl %edi,%edx
- jnz retint_careful
- jmp retint_swapgs
- CFI_ENDPROC
+ testl %eax, %eax
+ jnz retint_kernel
+ jmp retint_user
END(error_exit)
/* Runs on exception stack */
ENTRY(nmi)
- INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
@@ -1429,22 +1253,21 @@ ENTRY(nmi)
*/
/* Use %rdx as our temp variable throughout */
- pushq_cfi %rdx
- CFI_REL_OFFSET rdx, 0
+ pushq %rdx
/*
* If %cs was not the kernel segment, then the NMI triggered in user
* space, which means it is definitely not nested.
*/
- cmpl $__KERNEL_CS, 16(%rsp)
- jne first_nmi
+ cmpl $__KERNEL_CS, 16(%rsp)
+ jne first_nmi
/*
* Check the special variable on the stack to see if NMIs are
* executing.
*/
- cmpl $1, -8(%rsp)
- je nested_nmi
+ cmpl $1, -8(%rsp)
+ je nested_nmi
/*
* Now test if the previous stack was an NMI stack.
@@ -1458,51 +1281,46 @@ ENTRY(nmi)
cmpq %rdx, 4*8(%rsp)
/* If the stack pointer is above the NMI stack, this is a normal NMI */
ja first_nmi
+
subq $EXCEPTION_STKSZ, %rdx
cmpq %rdx, 4*8(%rsp)
/* If it is below the NMI stack, it is a normal NMI */
jb first_nmi
/* Ah, it is within the NMI stack, treat it as nested */
- CFI_REMEMBER_STATE
-
nested_nmi:
/*
* Do nothing if we interrupted the fixup in repeat_nmi.
* It's about to repeat the NMI handler, so we are fine
* with ignoring this one.
*/
- movq $repeat_nmi, %rdx
- cmpq 8(%rsp), %rdx
- ja 1f
- movq $end_repeat_nmi, %rdx
- cmpq 8(%rsp), %rdx
- ja nested_nmi_out
+ movq $repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja 1f
+ movq $end_repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja nested_nmi_out
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
- leaq -1*8(%rsp), %rdx
- movq %rdx, %rsp
- CFI_ADJUST_CFA_OFFSET 1*8
- leaq -10*8(%rsp), %rdx
- pushq_cfi $__KERNEL_DS
- pushq_cfi %rdx
- pushfq_cfi
- pushq_cfi $__KERNEL_CS
- pushq_cfi $repeat_nmi
+ leaq -1*8(%rsp), %rdx
+ movq %rdx, %rsp
+ leaq -10*8(%rsp), %rdx
+ pushq $__KERNEL_DS
+ pushq %rdx
+ pushfq
+ pushq $__KERNEL_CS
+ pushq $repeat_nmi
/* Put stack back */
- addq $(6*8), %rsp
- CFI_ADJUST_CFA_OFFSET -6*8
+ addq $(6*8), %rsp
nested_nmi_out:
- popq_cfi %rdx
- CFI_RESTORE rdx
+ popq %rdx
/* No need to check faults here */
INTERRUPT_RETURN
- CFI_RESTORE_STATE
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
@@ -1540,23 +1358,18 @@ first_nmi:
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
- movq (%rsp), %rdx
- CFI_RESTORE rdx
+ movq (%rsp), %rdx
/* Set the NMI executing variable on the stack. */
- pushq_cfi $1
+ pushq $1
- /*
- * Leave room for the "copied" frame
- */
- subq $(5*8), %rsp
- CFI_ADJUST_CFA_OFFSET 5*8
+ /* Leave room for the "copied" frame */
+ subq $(5*8), %rsp
/* Copy the stack frame to the Saved frame */
.rept 5
- pushq_cfi 11*8(%rsp)
+ pushq 11*8(%rsp)
.endr
- CFI_DEF_CFA_OFFSET 5*8
/* Everything up to here is safe from nested NMIs */
@@ -1575,16 +1388,14 @@ repeat_nmi:
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
- movq $1, 10*8(%rsp)
+ movq $1, 10*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
- addq $(10*8), %rsp
- CFI_ADJUST_CFA_OFFSET -10*8
+ addq $(10*8), %rsp
.rept 5
- pushq_cfi -6*8(%rsp)
+ pushq -6*8(%rsp)
.endr
- subq $(5*8), %rsp
- CFI_DEF_CFA_OFFSET 5*8
+ subq $(5*8), %rsp
end_repeat_nmi:
/*
@@ -1592,7 +1403,7 @@ end_repeat_nmi:
* NMI if the first NMI took an exception and reset our iret stack
* so that we repeat another NMI.
*/
- pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
ALLOC_PT_GPREGS_ON_STACK
/*
@@ -1602,8 +1413,7 @@ end_repeat_nmi:
* setting NEED_RESCHED or anything that normal interrupts and
* exceptions might do.
*/
- call paranoid_entry
- DEFAULT_FRAME 0
+ call paranoid_entry
/*
* Save off the CR2 register. If we take a page fault in the NMI then
@@ -1614,22 +1424,21 @@ end_repeat_nmi:
* origin fault. Save it off and restore it if it changes.
* Use the r12 callee-saved register.
*/
- movq %cr2, %r12
+ movq %cr2, %r12
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
- movq %rsp,%rdi
- movq $-1,%rsi
- call do_nmi
+ movq %rsp, %rdi
+ movq $-1, %rsi
+ call do_nmi
/* Did the NMI take a page fault? Restore cr2 if it did */
- movq %cr2, %rcx
- cmpq %rcx, %r12
- je 1f
- movq %r12, %cr2
+ movq %cr2, %rcx
+ cmpq %rcx, %r12
+ je 1f
+ movq %r12, %cr2
1:
-
- testl %ebx,%ebx /* swapgs needed? */
- jnz nmi_restore
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz nmi_restore
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
@@ -1639,15 +1448,11 @@ nmi_restore:
REMOVE_PT_GPREGS_FROM_STACK 6*8
/* Clear the NMI executing stack variable */
- movq $0, 5*8(%rsp)
- jmp irq_return
- CFI_ENDPROC
+ movq $0, 5*8(%rsp)
+ INTERRUPT_RETURN
END(nmi)
ENTRY(ignore_sysret)
- CFI_STARTPROC
- mov $-ENOSYS,%eax
+ mov $-ENOSYS, %eax
sysret
- CFI_ENDPROC
END(ignore_sysret)
-
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
new file mode 100644
index 0000000..bb187a6
--- /dev/null
+++ b/arch/x86/entry/entry_64_compat.S
@@ -0,0 +1,556 @@
+/*
+ * Compatibility mode system call entry point for x86-64.
+ *
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */
+#include "calling.h"
+#include <asm/asm-offsets.h>
+#include <asm/current.h>
+#include <asm/errno.h>
+#include <asm/ia32_unistd.h>
+#include <asm/thread_info.h>
+#include <asm/segment.h>
+#include <asm/irqflags.h>
+#include <asm/asm.h>
+#include <asm/smap.h>
+#include <linux/linkage.h>
+#include <linux/err.h>
+
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
+#ifndef CONFIG_AUDITSYSCALL
+# define sysexit_audit ia32_ret_from_sys_call
+# define sysretl_audit ia32_ret_from_sys_call
+#endif
+
+ .section .entry.text, "ax"
+
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_usergs_sysret32)
+ swapgs
+ sysretl
+ENDPROC(native_usergs_sysret32)
+#endif
+
+/*
+ * 32-bit SYSENTER instruction entry.
+ *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp user stack
+ * 0(%ebp) arg6
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(entry_SYSENTER_compat)
+ /*
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
+ SWAPGS_UNSAFE_STACK
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %ebp, %ebp
+ movl %eax, %eax
+
+ movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER32_DS /* pt_regs->ss */
+ pushq %rbp /* pt_regs->sp */
+ pushfq /* pt_regs->flags */
+ pushq $__USER32_CS /* pt_regs->cs */
+ pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */
+ pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+ cld
+ sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
+
+ /*
+ * no need to do an access_ok check here because rbp has been
+ * 32-bit zero extended
+ */
+ ASM_STAC
+1: movl (%rbp), %ebp
+ _ASM_EXTABLE(1b, ia32_badarg)
+ ASM_CLAC
+
+ /*
+ * Sysenter doesn't filter flags, so we need to clear NT
+ * ourselves. To save a few cycles, we can check whether
+ * NT was set instead of doing an unconditional popfq.
+ */
+ testl $X86_EFLAGS_NT, EFLAGS(%rsp)
+ jnz sysenter_fix_flags
+sysenter_flags_fixed:
+
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz sysenter_tracesys
+
+sysenter_do_call:
+ /* 32-bit syscall -> 64-bit C ABI argument conversion */
+ movl %edi, %r8d /* arg5 */
+ movl %ebp, %r9d /* arg6 */
+ xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx, %edi /* arg1 */
+ movl %edx, %edx /* arg3 (zero extension) */
+sysenter_dispatch:
+ cmpq $(IA32_NR_syscalls-1), %rax
+ ja 1f
+ call *ia32_sys_call_table(, %rax, 8)
+ movq %rax, RAX(%rsp)
+1:
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz sysexit_audit
+sysexit_from_sys_call:
+ /*
+ * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+ * NMI between STI and SYSEXIT has poorly specified behavior,
+ * and and NMI followed by an IRQ with usergs is fatal. So
+ * we just pretend we're using SYSEXIT but we really use
+ * SYSRETL instead.
+ *
+ * This code path is still called 'sysexit' because it pairs
+ * with 'sysenter' and it uses the SYSENTER calling convention.
+ */
+ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ movl RIP(%rsp), %ecx /* User %eip */
+ RESTORE_RSI_RDI
+ xorl %edx, %edx /* Do not leak kernel information */
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r10
+ movl EFLAGS(%rsp), %r11d /* User eflags */
+ TRACE_IRQS_ON
+
+ /*
+ * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
+ * since it avoids a dicey window with interrupts enabled.
+ */
+ movl RSP(%rsp), %esp
+
+ /*
+ * USERGS_SYSRET32 does:
+ * gsbase = user's gs base
+ * eip = ecx
+ * rflags = r11
+ * cs = __USER32_CS
+ * ss = __USER_DS
+ *
+ * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+ *
+ * pop %ebp
+ * pop %edx
+ * pop %ecx
+ *
+ * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+ * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
+ * address (already known to user code), and R12-R15 are
+ * callee-saved and therefore don't contain any interesting
+ * kernel data.
+ */
+ USERGS_SYSRET32
+
+#ifdef CONFIG_AUDITSYSCALL
+ .macro auditsys_entry_common
+ /*
+ * At this point, registers hold syscall args in the 32-bit syscall ABI:
+ * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
+ *
+ * We want to pass them to __audit_syscall_entry(), which is a 64-bit
+ * C function with 5 parameters, so shuffle them to match what
+ * the function expects: RDI,RSI,RDX,RCX,R8.
+ */
+ movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
+ xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
+ /* arg3 (RDX) <= 2nd syscall arg (ECX) */
+ movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
+ movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
+ call __audit_syscall_entry
+
+ /*
+ * We are going to jump back to the syscall dispatch code.
+ * Prepare syscall args as required by the 64-bit C ABI.
+ * Registers clobbered by __audit_syscall_entry() are
+ * loaded from pt_regs on stack:
+ */
+ movl ORIG_RAX(%rsp), %eax /* syscall number */
+ movl %ebx, %edi /* arg1 */
+ movl RCX(%rsp), %esi /* arg2 */
+ movl RDX(%rsp), %edx /* arg3 */
+ movl RSI(%rsp), %ecx /* arg4 */
+ movl RDI(%rsp), %r8d /* arg5 */
+ movl %ebp, %r9d /* arg6 */
+ .endm
+
+ .macro auditsys_exit exit
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz ia32_ret_from_sys_call
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ movl %eax, %esi /* second arg, syscall return value */
+ cmpl $-MAX_ERRNO, %eax /* is it an error ? */
+ jbe 1f
+ movslq %eax, %rsi /* if error sign extend to 64 bits */
+1: setbe %al /* 1 if error, 0 if not */
+ movzbl %al, %edi /* zero-extend that into %edi */
+ call __audit_syscall_exit
+ movq RAX(%rsp), %rax /* reload syscall return value */
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jz \exit
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ jmp int_with_check
+ .endm
+
+sysenter_auditsys:
+ auditsys_entry_common
+ jmp sysenter_dispatch
+
+sysexit_audit:
+ auditsys_exit sysexit_from_sys_call
+#endif
+
+sysenter_fix_flags:
+ pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
+ popfq
+ jmp sysenter_flags_fixed
+
+sysenter_tracesys:
+#ifdef CONFIG_AUDITSYSCALL
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jz sysenter_auditsys
+#endif
+ SAVE_EXTRA_REGS
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ movq %rsp, %rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+
+ /* Reload arg registers from stack. (see sysenter_tracesys) */
+ movl RCX(%rsp), %ecx
+ movl RDX(%rsp), %edx
+ movl RSI(%rsp), %esi
+ movl RDI(%rsp), %edi
+ movl %eax, %eax /* zero extension */
+
+ RESTORE_EXTRA_REGS
+ jmp sysenter_do_call
+ENDPROC(entry_SYSENTER_compat)
+
+/*
+ * 32-bit SYSCALL instruction entry.
+ *
+ * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
+ * Arguments:
+ * eax system call number
+ * ecx return address
+ * ebx arg1
+ * ebp arg2 (note: not saved in the stack frame, should not be touched)
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * esp user stack
+ * 0(%esp) arg6
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(entry_SYSCALL_compat)
+ /*
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
+ SWAPGS_UNSAFE_STACK
+ movl %esp, %r8d
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax, %eax
+
+ /* Construct struct pt_regs on stack */
+ pushq $__USER32_DS /* pt_regs->ss */
+ pushq %r8 /* pt_regs->sp */
+ pushq %r11 /* pt_regs->flags */
+ pushq $__USER32_CS /* pt_regs->cs */
+ pushq %rcx /* pt_regs->ip */
+ pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rbp /* pt_regs->cx */
+ movl %ebp, %ecx
+ pushq $-ENOSYS /* pt_regs->ax */
+ sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
+
+ /*
+ * No need to do an access_ok check here because r8 has been
+ * 32-bit zero extended:
+ */
+ ASM_STAC
+1: movl (%r8), %ebp
+ _ASM_EXTABLE(1b, ia32_badarg)
+ ASM_CLAC
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz cstar_tracesys
+
+cstar_do_call:
+ /* 32-bit syscall -> 64-bit C ABI argument conversion */
+ movl %edi, %r8d /* arg5 */
+ movl %ebp, %r9d /* arg6 */
+ xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx, %edi /* arg1 */
+ movl %edx, %edx /* arg3 (zero extension) */
+
+cstar_dispatch:
+ cmpq $(IA32_NR_syscalls-1), %rax
+ ja 1f
+
+ call *ia32_sys_call_table(, %rax, 8)
+ movq %rax, RAX(%rsp)
+1:
+ movl RCX(%rsp), %ebp
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz sysretl_audit
+
+sysretl_from_sys_call:
+ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ RESTORE_RSI_RDI_RDX
+ movl RIP(%rsp), %ecx
+ movl EFLAGS(%rsp), %r11d
+ xorq %r10, %r10
+ xorq %r9, %r9
+ xorq %r8, %r8
+ TRACE_IRQS_ON
+ movl RSP(%rsp), %esp
+ /*
+ * 64-bit->32-bit SYSRET restores eip from ecx,
+ * eflags from r11 (but RF and VM bits are forced to 0),
+ * cs and ss are loaded from MSRs.
+ * (Note: 32-bit->32-bit SYSRET is different: since r11
+ * does not exist, it merely sets eflags.IF=1).
+ *
+ * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+ * descriptor is not reinitialized. This means that we must
+ * avoid SYSRET with SS == NULL, which could happen if we schedule,
+ * exit the kernel, and re-enter using an interrupt vector. (All
+ * interrupt entries on x86_64 set SS to NULL.) We prevent that
+ * from happening by reloading SS in __switch_to.
+ */
+ USERGS_SYSRET32
+
+#ifdef CONFIG_AUDITSYSCALL
+cstar_auditsys:
+ auditsys_entry_common
+ jmp cstar_dispatch
+
+sysretl_audit:
+ auditsys_exit sysretl_from_sys_call
+#endif
+
+cstar_tracesys:
+#ifdef CONFIG_AUDITSYSCALL
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jz cstar_auditsys
+#endif
+ SAVE_EXTRA_REGS
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ movq %rsp, %rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+
+ /* Reload arg registers from stack. (see sysenter_tracesys) */
+ movl RCX(%rsp), %ecx
+ movl RDX(%rsp), %edx
+ movl RSI(%rsp), %esi
+ movl RDI(%rsp), %edi
+ movl %eax, %eax /* zero extension */
+
+ RESTORE_EXTRA_REGS
+ jmp cstar_do_call
+END(entry_SYSCALL_compat)
+
+ia32_badarg:
+ ASM_CLAC
+ movq $-EFAULT, RAX(%rsp)
+ia32_ret_from_sys_call:
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ jmp int_ret_from_sys_call
+
+/*
+ * Emulated IA32 system calls via int 0x80.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6 (note: not saved in the stack frame, should not be touched)
+ *
+ * Notes:
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
+ * Arguments are zero extended. For system calls that want sign extension and
+ * take long arguments a wrapper is needed. Most calls can just be called
+ * directly.
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
+
+ENTRY(entry_INT80_compat)
+ /*
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ SWAPGS
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax, %eax
+
+ /* Construct struct pt_regs on stack (iret frame is already on stack) */
+ pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq $-ENOSYS /* pt_regs->ax */
+ pushq $0 /* pt_regs->r8 */
+ pushq $0 /* pt_regs->r9 */
+ pushq $0 /* pt_regs->r10 */
+ pushq $0 /* pt_regs->r11 */
+ cld
+ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz ia32_tracesys
+
+ia32_do_call:
+ /* 32-bit syscall -> 64-bit C ABI argument conversion */
+ movl %edi, %r8d /* arg5 */
+ movl %ebp, %r9d /* arg6 */
+ xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx, %edi /* arg1 */
+ movl %edx, %edx /* arg3 (zero extension) */
+ cmpq $(IA32_NR_syscalls-1), %rax
+ ja 1f
+
+ call *ia32_sys_call_table(, %rax, 8)
+ movq %rax, RAX(%rsp)
+1:
+ jmp int_ret_from_sys_call
+
+ia32_tracesys:
+ SAVE_EXTRA_REGS
+ movq %rsp, %rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ /*
+ * Reload arg registers from stack in case ptrace changed them.
+ * Don't reload %eax because syscall_trace_enter() returned
+ * the %rax value we should see. But do truncate it to 32 bits.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
+ */
+ movl RCX(%rsp), %ecx
+ movl RDX(%rsp), %edx
+ movl RSI(%rsp), %esi
+ movl RDI(%rsp), %edi
+ movl %eax, %eax /* zero extension */
+ RESTORE_EXTRA_REGS
+ jmp ia32_do_call
+END(entry_INT80_compat)
+
+ .macro PTREGSCALL label, func
+ ALIGN
+GLOBAL(\label)
+ leaq \func(%rip), %rax
+ jmp ia32_ptregs_common
+ .endm
+
+ PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
+ PTREGSCALL stub32_sigreturn, sys32_sigreturn
+ PTREGSCALL stub32_fork, sys_fork
+ PTREGSCALL stub32_vfork, sys_vfork
+
+ ALIGN
+GLOBAL(stub32_clone)
+ leaq sys_clone(%rip), %rax
+ /*
+ * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
+ * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
+ *
+ * The native 64-bit kernel's sys_clone() implements the latter,
+ * so we need to swap arguments here before calling it:
+ */
+ xchg %r8, %rcx
+ jmp ia32_ptregs_common
+
+ ALIGN
+ia32_ptregs_common:
+ SAVE_EXTRA_REGS 8
+ call *%rax
+ RESTORE_EXTRA_REGS 8
+ ret
+END(ia32_ptregs_common)
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/entry/syscall_32.c
index 3777189..8ea34f9 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -10,7 +10,7 @@
#else
#define SYM(sym, compat) sym
#define ia32_sys_call_table sys_call_table
-#define __NR_ia32_syscall_max __NR_syscall_max
+#define __NR_syscall_compat_max __NR_syscall_max
#endif
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
@@ -23,11 +23,11 @@ typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void);
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
+ [0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/entry/syscall_64.c
index 4ac730b..4ac730b 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index a55abb9..57aa59f 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -1,5 +1,5 @@
-out := $(obj)/../include/generated/asm
-uapi := $(obj)/../include/generated/uapi/asm
+out := $(obj)/../../include/generated/asm
+uapi := $(obj)/../../include/generated/uapi/asm
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ef8187f..ef8187f 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 9ef32d5..9ef32d5 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index 31fd5f1..31fd5f1 100644
--- a/arch/x86/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec..0e7f8ec 100644
--- a/arch/x86/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/entry/thunk_32.S
index 5eb7150..e9acf5f 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -6,16 +6,14 @@
*/
#include <linux/linkage.h>
#include <asm/asm.h>
- #include <asm/dwarf2.h>
/* put return address in eax (arg1) */
.macro THUNK name, func, put_ret_addr_in_eax=0
.globl \name
\name:
- CFI_STARTPROC
- pushl_cfi_reg eax
- pushl_cfi_reg ecx
- pushl_cfi_reg edx
+ pushl %eax
+ pushl %ecx
+ pushl %edx
.if \put_ret_addr_in_eax
/* Place EIP in the arg1 */
@@ -23,11 +21,10 @@
.endif
call \func
- popl_cfi_reg edx
- popl_cfi_reg ecx
- popl_cfi_reg eax
+ popl %edx
+ popl %ecx
+ popl %eax
ret
- CFI_ENDPROC
_ASM_NOKPROBE(\name)
.endm
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/entry/thunk_64.S
index f89ba4e9..3e95681 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -6,35 +6,32 @@
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
+#include "calling.h"
#include <asm/asm.h>
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
.globl \name
\name:
- CFI_STARTPROC
/* this one pushes 9 elems, the next one would be %rIP */
- pushq_cfi_reg rdi
- pushq_cfi_reg rsi
- pushq_cfi_reg rdx
- pushq_cfi_reg rcx
- pushq_cfi_reg rax
- pushq_cfi_reg r8
- pushq_cfi_reg r9
- pushq_cfi_reg r10
- pushq_cfi_reg r11
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
.if \put_ret_addr_in_rdi
/* 9*8(%rsp) is return addr on stack */
- movq_cfi_restore 9*8, rdi
+ movq 9*8(%rsp), %rdi
.endif
call \func
jmp restore
- CFI_ENDPROC
_ASM_NOKPROBE(\name)
.endm
@@ -57,19 +54,16 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
- CFI_STARTPROC
- CFI_ADJUST_CFA_OFFSET 9*8
restore:
- popq_cfi_reg r11
- popq_cfi_reg r10
- popq_cfi_reg r9
- popq_cfi_reg r8
- popq_cfi_reg rax
- popq_cfi_reg rcx
- popq_cfi_reg rdx
- popq_cfi_reg rsi
- popq_cfi_reg rdi
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
ret
- CFI_ENDPROC
_ASM_NOKPROBE(restore)
#endif
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore
index aae8ffd..aae8ffd 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/entry/vdso/.gitignore
diff --git a/arch/x86/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 275a3a8..e970320 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh
index 7ee90a9..7ee90a9 100755
--- a/arch/x86/vdso/checkundef.sh
+++ b/arch/x86/entry/vdso/checkundef.sh
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 9793322..9793322 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index de2c921..de2c921 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
index 79a071e..79a071e 100644
--- a/arch/x86/vdso/vdso-note.S
+++ b/arch/x86/entry/vdso/vdso-note.S
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 6807932..6807932 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8627db2..8627db2 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987..0224987 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index e904c27..e904c27 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore
index e45fba9..e45fba9 100644
--- a/arch/x86/vdso/vdso32/.gitignore
+++ b/arch/x86/entry/vdso/vdso32/.gitignore
diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S
index b15b7c0..b15b7c0 100644
--- a/arch/x86/vdso/vdso32/int80.S
+++ b/arch/x86/entry/vdso/vdso32/int80.S
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index c83f257..c83f257 100644
--- a/arch/x86/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e2..d7ec4e2 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
index 6b286bb..6b286bb 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/entry/vdso/vdso32/syscall.S
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
index e354bce..e354bce 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/entry/vdso/vdso32/sysenter.S
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 175cc72..175cc72 100644
--- a/arch/x86/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
index 541468e..541468e 100644
--- a/arch/x86/vdso/vdso32/vdso-fakesections.c
+++ b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 31056cf..31056cf 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 697c11e..697c11e 100644
--- a/arch/x86/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index 8ec3d1f..8ec3d1f 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
diff --git a/arch/x86/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 1c9f750..1c9f750 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
new file mode 100644
index 0000000..a9f4856
--- /dev/null
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the x86 low level vsyscall code
+#
+obj-y := vsyscall_gtod.o
+
+obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
+
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 2dcc6ff..2dcc6ff 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index c9596a9..c9596a9 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e3304..51e3304 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/entry/vsyscall/vsyscall_trace.h
index a8b2ede..9dd7359 100644
--- a/arch/x86/kernel/vsyscall_trace.h
+++ b/arch/x86/entry/vsyscall/vsyscall_trace.h
@@ -24,6 +24,6 @@ TRACE_EVENT(emulate_vsyscall,
#endif
#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/
#define TRACE_INCLUDE_FILE vsyscall_trace
#include <trace/define_trace.h>
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index bb635c6..cd4339b 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -2,7 +2,7 @@
# Makefile for the ia32 kernel emulation subsystem.
#
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
+obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_signal.o
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
deleted file mode 100644
index 72bf268..0000000
--- a/arch/x86/ia32/ia32entry.S
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * Compatibility mode system call entry point for x86-64.
- *
- * Copyright 2000-2002 Andi Kleen, SuSE Labs.
- */
-
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
-#include <asm/asm-offsets.h>
-#include <asm/current.h>
-#include <asm/errno.h>
-#include <asm/ia32_unistd.h>
-#include <asm/thread_info.h>
-#include <asm/segment.h>
-#include <asm/irqflags.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <linux/linkage.h>
-#include <linux/err.h>
-
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE 0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit ia32_ret_from_sys_call
-#define sysretl_audit ia32_ret_from_sys_call
-#endif
-
- .section .entry.text, "ax"
-
- /* clobbers %rax */
- .macro CLEAR_RREGS _r9=rax
- xorl %eax,%eax
- movq %rax,R11(%rsp)
- movq %rax,R10(%rsp)
- movq %\_r9,R9(%rsp)
- movq %rax,R8(%rsp)
- .endm
-
- /*
- * Reload arg registers from stack in case ptrace changed them.
- * We don't reload %eax because syscall_trace_enter() returned
- * the %rax value we should see. Instead, we just truncate that
- * value to 32 bits again as we did on entry from user mode.
- * If it's a new value set by user_regset during entry tracing,
- * this matches the normal truncation of the user-mode value.
- * If it's -1 to make us punt the syscall, then (u32)-1 is still
- * an appropriately invalid value.
- */
- .macro LOAD_ARGS32 _r9=0
- .if \_r9
- movl R9(%rsp),%r9d
- .endif
- movl RCX(%rsp),%ecx
- movl RDX(%rsp),%edx
- movl RSI(%rsp),%esi
- movl RDI(%rsp),%edi
- movl %eax,%eax /* zero extension */
- .endm
-
- .macro CFI_STARTPROC32 simple
- CFI_STARTPROC \simple
- CFI_UNDEFINED r8
- CFI_UNDEFINED r9
- CFI_UNDEFINED r10
- CFI_UNDEFINED r11
- CFI_UNDEFINED r12
- CFI_UNDEFINED r13
- CFI_UNDEFINED r14
- CFI_UNDEFINED r15
- .endm
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret32)
- swapgs
- sysretl
-ENDPROC(native_usergs_sysret32)
-
-ENTRY(native_irq_enable_sysexit)
- swapgs
- sti
- sysexit
-ENDPROC(native_irq_enable_sysexit)
-#endif
-
-/*
- * 32bit SYSENTER instruction entry.
- *
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
- * SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
- *
- * Arguments:
- * eax system call number
- * ebx arg1
- * ecx arg2
- * edx arg3
- * esi arg4
- * edi arg5
- * ebp user stack
- * 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
- */
-ENTRY(ia32_sysenter_target)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,0
- CFI_REGISTER rsp,rbp
-
- /*
- * Interrupts are off on entry.
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- SWAPGS_UNSAFE_STACK
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
- ENABLE_INTERRUPTS(CLBR_NONE)
-
- /* Zero-extending 32-bit regs, do not remove */
- movl %ebp, %ebp
- movl %eax, %eax
-
- movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
- CFI_REGISTER rip,r10
-
- /* Construct struct pt_regs on stack */
- pushq_cfi $__USER32_DS /* pt_regs->ss */
- pushq_cfi %rbp /* pt_regs->sp */
- CFI_REL_OFFSET rsp,0
- pushfq_cfi /* pt_regs->flags */
- pushq_cfi $__USER32_CS /* pt_regs->cs */
- pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
- CFI_REL_OFFSET rip,0
- pushq_cfi_reg rax /* pt_regs->orig_ax */
- pushq_cfi_reg rdi /* pt_regs->di */
- pushq_cfi_reg rsi /* pt_regs->si */
- pushq_cfi_reg rdx /* pt_regs->dx */
- pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
- cld
- sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
- CFI_ADJUST_CFA_OFFSET 10*8
-
- /*
- * no need to do an access_ok check here because rbp has been
- * 32bit zero extended
- */
- ASM_STAC
-1: movl (%rbp),%ebp
- _ASM_EXTABLE(1b,ia32_badarg)
- ASM_CLAC
-
- /*
- * Sysenter doesn't filter flags, so we need to clear NT
- * ourselves. To save a few cycles, we can check whether
- * NT was set instead of doing an unconditional popfq.
- */
- testl $X86_EFLAGS_NT,EFLAGS(%rsp)
- jnz sysenter_fix_flags
-sysenter_flags_fixed:
-
- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- CFI_REMEMBER_STATE
- jnz sysenter_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
-sysenter_do_call:
- /* 32bit syscall -> 64bit C ABI argument conversion */
- movl %edi,%r8d /* arg5 */
- movl %ebp,%r9d /* arg6 */
- xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
- movl %ebx,%edi /* arg1 */
- movl %edx,%edx /* arg3 (zero extension) */
-sysenter_dispatch:
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX(%rsp)
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz sysexit_audit
-sysexit_from_sys_call:
- /*
- * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
- * NMI between STI and SYSEXIT has poorly specified behavior,
- * and and NMI followed by an IRQ with usergs is fatal. So
- * we just pretend we're using SYSEXIT but we really use
- * SYSRETL instead.
- *
- * This code path is still called 'sysexit' because it pairs
- * with 'sysenter' and it uses the SYSENTER calling convention.
- */
- andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- movl RIP(%rsp),%ecx /* User %eip */
- CFI_REGISTER rip,rcx
- RESTORE_RSI_RDI
- xorl %edx,%edx /* avoid info leaks */
- xorq %r8,%r8
- xorq %r9,%r9
- xorq %r10,%r10
- movl EFLAGS(%rsp),%r11d /* User eflags */
- /*CFI_RESTORE rflags*/
- TRACE_IRQS_ON
-
- /*
- * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
- * since it avoids a dicey window with interrupts enabled.
- */
- movl RSP(%rsp),%esp
-
- /*
- * USERGS_SYSRET32 does:
- * gsbase = user's gs base
- * eip = ecx
- * rflags = r11
- * cs = __USER32_CS
- * ss = __USER_DS
- *
- * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
- *
- * pop %ebp
- * pop %edx
- * pop %ecx
- *
- * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
- * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
- * address (already known to user code), and R12-R15 are
- * callee-saved and therefore don't contain any interesting
- * kernel data.
- */
- USERGS_SYSRET32
-
- CFI_RESTORE_STATE
-
-#ifdef CONFIG_AUDITSYSCALL
- .macro auditsys_entry_common
- movl %esi,%r8d /* 5th arg: 4th syscall arg */
- movl %ecx,%r9d /*swap with edx*/
- movl %edx,%ecx /* 4th arg: 3rd syscall arg */
- movl %r9d,%edx /* 3rd arg: 2nd syscall arg */
- movl %ebx,%esi /* 2nd arg: 1st syscall arg */
- movl %eax,%edi /* 1st arg: syscall number */
- call __audit_syscall_entry
- movl RAX(%rsp),%eax /* reload syscall number */
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
- movl %ebx,%edi /* reload 1st syscall arg */
- movl RCX(%rsp),%esi /* reload 2nd syscall arg */
- movl RDX(%rsp),%edx /* reload 3rd syscall arg */
- movl RSI(%rsp),%ecx /* reload 4th syscall arg */
- movl RDI(%rsp),%r8d /* reload 5th syscall arg */
- .endm
-
- .macro auditsys_exit exit
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz ia32_ret_from_sys_call
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movl %eax,%esi /* second arg, syscall return value */
- cmpl $-MAX_ERRNO,%eax /* is it an error ? */
- jbe 1f
- movslq %eax, %rsi /* if error sign extend to 64 bits */
-1: setbe %al /* 1 if error, 0 if not */
- movzbl %al,%edi /* zero-extend that into %edi */
- call __audit_syscall_exit
- movq RAX(%rsp),%rax /* reload syscall return value */
- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz \exit
- CLEAR_RREGS
- jmp int_with_check
- .endm
-
-sysenter_auditsys:
- auditsys_entry_common
- movl %ebp,%r9d /* reload 6th syscall arg */
- jmp sysenter_dispatch
-
-sysexit_audit:
- auditsys_exit sysexit_from_sys_call
-#endif
-
-sysenter_fix_flags:
- pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
- popfq_cfi
- jmp sysenter_flags_fixed
-
-sysenter_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz sysenter_auditsys
-#endif
- SAVE_EXTRA_REGS
- CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
- RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
- jmp sysenter_do_call
- CFI_ENDPROC
-ENDPROC(ia32_sysenter_target)
-
-/*
- * 32bit SYSCALL instruction entry.
- *
- * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
- *
- * Note: rflags saving+masking-with-MSR happens only in Long mode
- * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
- * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
- * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
- *
- * Arguments:
- * eax system call number
- * ecx return address
- * ebx arg1
- * ebp arg2 (note: not saved in the stack frame, should not be touched)
- * edx arg3
- * esi arg4
- * edi arg5
- * esp user stack
- * 0(%esp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
- */
-ENTRY(ia32_cstar_target)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,0
- CFI_REGISTER rip,rcx
- /*CFI_REGISTER rflags,r11*/
-
- /*
- * Interrupts are off on entry.
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- SWAPGS_UNSAFE_STACK
- movl %esp,%r8d
- CFI_REGISTER rsp,r8
- movq PER_CPU_VAR(kernel_stack),%rsp
- ENABLE_INTERRUPTS(CLBR_NONE)
-
- /* Zero-extending 32-bit regs, do not remove */
- movl %eax,%eax
-
- /* Construct struct pt_regs on stack */
- pushq_cfi $__USER32_DS /* pt_regs->ss */
- pushq_cfi %r8 /* pt_regs->sp */
- CFI_REL_OFFSET rsp,0
- pushq_cfi %r11 /* pt_regs->flags */
- pushq_cfi $__USER32_CS /* pt_regs->cs */
- pushq_cfi %rcx /* pt_regs->ip */
- CFI_REL_OFFSET rip,0
- pushq_cfi_reg rax /* pt_regs->orig_ax */
- pushq_cfi_reg rdi /* pt_regs->di */
- pushq_cfi_reg rsi /* pt_regs->si */
- pushq_cfi_reg rdx /* pt_regs->dx */
- pushq_cfi_reg rbp /* pt_regs->cx */
- movl %ebp,%ecx
- pushq_cfi_reg rax /* pt_regs->ax */
- sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
- CFI_ADJUST_CFA_OFFSET 10*8
-
- /*
- * no need to do an access_ok check here because r8 has been
- * 32bit zero extended
- */
- ASM_STAC
-1: movl (%r8),%r9d
- _ASM_EXTABLE(1b,ia32_badarg)
- ASM_CLAC
- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- CFI_REMEMBER_STATE
- jnz cstar_tracesys
- cmpq $IA32_NR_syscalls-1,%rax
- ja ia32_badsys
-cstar_do_call:
- /* 32bit syscall -> 64bit C ABI argument conversion */
- movl %edi,%r8d /* arg5 */
- /* r9 already loaded */ /* arg6 */
- xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
- movl %ebx,%edi /* arg1 */
- movl %edx,%edx /* arg3 (zero extension) */
-cstar_dispatch:
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX(%rsp)
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz sysretl_audit
-sysretl_from_sys_call:
- andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- RESTORE_RSI_RDI_RDX
- movl RIP(%rsp),%ecx
- CFI_REGISTER rip,rcx
- movl EFLAGS(%rsp),%r11d
- /*CFI_REGISTER rflags,r11*/
- xorq %r10,%r10
- xorq %r9,%r9
- xorq %r8,%r8
- TRACE_IRQS_ON
- movl RSP(%rsp),%esp
- CFI_RESTORE rsp
- /*
- * 64bit->32bit SYSRET restores eip from ecx,
- * eflags from r11 (but RF and VM bits are forced to 0),
- * cs and ss are loaded from MSRs.
- * (Note: 32bit->32bit SYSRET is different: since r11
- * does not exist, it merely sets eflags.IF=1).
- *
- * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
- * descriptor is not reinitialized. This means that we must
- * avoid SYSRET with SS == NULL, which could happen if we schedule,
- * exit the kernel, and re-enter using an interrupt vector. (All
- * interrupt entries on x86_64 set SS to NULL.) We prevent that
- * from happening by reloading SS in __switch_to.
- */
- USERGS_SYSRET32
-
-#ifdef CONFIG_AUDITSYSCALL
-cstar_auditsys:
- CFI_RESTORE_STATE
- movl %r9d,R9(%rsp) /* register to be clobbered by call */
- auditsys_entry_common
- movl R9(%rsp),%r9d /* reload 6th syscall arg */
- jmp cstar_dispatch
-
-sysretl_audit:
- auditsys_exit sysretl_from_sys_call
-#endif
-
-cstar_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz cstar_auditsys
-#endif
- xchgl %r9d,%ebp
- SAVE_EXTRA_REGS
- CLEAR_RREGS r9
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
- RESTORE_EXTRA_REGS
- xchgl %ebp,%r9d
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
- jmp cstar_do_call
-END(ia32_cstar_target)
-
-ia32_badarg:
- ASM_CLAC
- movq $-EFAULT,%rax
- jmp ia32_sysret
- CFI_ENDPROC
-
-/*
- * Emulated IA32 system calls via int 0x80.
- *
- * Arguments:
- * eax system call number
- * ebx arg1
- * ecx arg2
- * edx arg3
- * esi arg4
- * edi arg5
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
- */
-
-ENTRY(ia32_syscall)
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,5*8
- /*CFI_REL_OFFSET ss,4*8 */
- CFI_REL_OFFSET rsp,3*8
- /*CFI_REL_OFFSET rflags,2*8 */
- /*CFI_REL_OFFSET cs,1*8 */
- CFI_REL_OFFSET rip,0*8
-
- /*
- * Interrupts are off on entry.
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- SWAPGS
- ENABLE_INTERRUPTS(CLBR_NONE)
-
- /* Zero-extending 32-bit regs, do not remove */
- movl %eax,%eax
-
- /* Construct struct pt_regs on stack (iret frame is already on stack) */
- pushq_cfi_reg rax /* pt_regs->orig_ax */
- pushq_cfi_reg rdi /* pt_regs->di */
- pushq_cfi_reg rsi /* pt_regs->si */
- pushq_cfi_reg rdx /* pt_regs->dx */
- pushq_cfi_reg rcx /* pt_regs->cx */
- pushq_cfi_reg rax /* pt_regs->ax */
- cld
- sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
- CFI_ADJUST_CFA_OFFSET 10*8
-
- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz ia32_tracesys
- cmpq $(IA32_NR_syscalls-1),%rax
- ja ia32_badsys
-ia32_do_call:
- /* 32bit syscall -> 64bit C ABI argument conversion */
- movl %edi,%r8d /* arg5 */
- movl %ebp,%r9d /* arg6 */
- xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
- movl %ebx,%edi /* arg1 */
- movl %edx,%edx /* arg3 (zero extension) */
- call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
-ia32_sysret:
- movq %rax,RAX(%rsp)
-ia32_ret_from_sys_call:
- CLEAR_RREGS
- jmp int_ret_from_sys_call
-
-ia32_tracesys:
- SAVE_EXTRA_REGS
- CLEAR_RREGS
- movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
- movq %rsp,%rdi /* &pt_regs -> arg1 */
- call syscall_trace_enter
- LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
- RESTORE_EXTRA_REGS
- cmpq $(IA32_NR_syscalls-1),%rax
- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
- jmp ia32_do_call
-END(ia32_syscall)
-
-ia32_badsys:
- movq $0,ORIG_RAX(%rsp)
- movq $-ENOSYS,%rax
- jmp ia32_sysret
-
- CFI_ENDPROC
-
- .macro PTREGSCALL label, func
- ALIGN
-GLOBAL(\label)
- leaq \func(%rip),%rax
- jmp ia32_ptregs_common
- .endm
-
- CFI_STARTPROC32
-
- PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
- PTREGSCALL stub32_sigreturn, sys32_sigreturn
- PTREGSCALL stub32_fork, sys_fork
- PTREGSCALL stub32_vfork, sys_vfork
-
- ALIGN
-GLOBAL(stub32_clone)
- leaq sys_clone(%rip),%rax
- mov %r8, %rcx
- jmp ia32_ptregs_common
-
- ALIGN
-ia32_ptregs_common:
- CFI_ENDPROC
- CFI_STARTPROC32 simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SIZEOF_PTREGS
- CFI_REL_OFFSET rax,RAX
- CFI_REL_OFFSET rcx,RCX
- CFI_REL_OFFSET rdx,RDX
- CFI_REL_OFFSET rsi,RSI
- CFI_REL_OFFSET rdi,RDI
- CFI_REL_OFFSET rip,RIP
-/* CFI_REL_OFFSET cs,CS*/
-/* CFI_REL_OFFSET rflags,EFLAGS*/
- CFI_REL_OFFSET rsp,RSP
-/* CFI_REL_OFFSET ss,SS*/
- SAVE_EXTRA_REGS 8
- call *%rax
- RESTORE_EXTRA_REGS 8
- ret
- CFI_ENDPROC
-END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index bdf02ee..e7636ba 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,6 +18,12 @@
.endm
#endif
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
@@ -27,6 +33,12 @@
.byte \pad_len
.endm
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
.macro ALTERNATIVE oldinstr, newinstr, feature
140:
\oldinstr
@@ -55,6 +67,12 @@
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 976b86a..c839363 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -644,6 +644,12 @@ static inline void entering_ack_irq(void)
entering_irq();
}
+static inline void ipi_entering_ack_irq(void)
+{
+ ack_APIC_irq();
+ irq_enter();
+}
+
static inline void exiting_irq(void)
{
irq_exit();
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7730c1c..189679a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -63,6 +63,31 @@
_ASM_ALIGN ; \
_ASM_PTR (entry); \
.popsection
+
+.macro ALIGN_DESTINATION
+ /* check for bad alignment of destination */
+ movl %edi,%ecx
+ andl $7,%ecx
+ jz 102f /* already aligned */
+ subl $8,%ecx
+ negl %ecx
+ subl %ecx,%edx
+100: movb (%rsi),%al
+101: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 100b
+102:
+ .section .fixup,"ax"
+103: addl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ _ASM_EXTABLE(100b,103b)
+ _ASM_EXTABLE(101b,103b)
+ .endm
+
#else
# define _ASM_EXTABLE(from,to) \
" .pushsection \"__ex_table\",\"a\"\n" \
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e5cd12..e916895 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -22,7 +22,7 @@
*
* Atomically reads the value of @v.
*/
-static inline int atomic_read(const atomic_t *v)
+static __always_inline int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE((v)->counter);
}
@@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void atomic_set(atomic_t *v, int i)
+static __always_inline void atomic_set(atomic_t *v, int i)
{
v->counter = i;
}
@@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i)
*
* Atomically adds @i to @v.
*/
-static inline void atomic_add(int i, atomic_t *v)
+static __always_inline void atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
@@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void atomic_sub(int i, atomic_t *v)
+static __always_inline void atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
@@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
}
@@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
-static inline void atomic_inc(atomic_t *v)
+static __always_inline void atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
@@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
-static inline void atomic_dec(atomic_t *v)
+static __always_inline void atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
@@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-static inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline int atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
@@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline int atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
}
@@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline int atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
}
@@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static inline int atomic_add_return(int i, atomic_t *v)
+static __always_inline int atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
@@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
*
* Atomically subtracts @i from @v and returns @v - @i
*/
-static inline int atomic_sub_return(int i, atomic_t *v)
+static __always_inline int atomic_sub_return(int i, atomic_t *v)
{
return atomic_add_return(-i, v);
}
@@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
}
@@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
* Atomically adds @a to @v, so long as @v was not already @u.
* Returns the old value of @v.
*/
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
c = atomic_read(v);
@@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
* Atomically adds 1 to @v
* Returns the new value of @u
*/
-static inline short int atomic_inc_short(short int *v)
+static __always_inline short int atomic_inc_short(short int *v)
{
asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
return *v;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index f8d273e..b965f9e 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i)
*
* Atomically adds @i to @v.
*/
-static inline void atomic64_add(long i, atomic64_t *v)
+static __always_inline void atomic64_add(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
@@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-static inline void atomic64_inc(atomic64_t *v)
+static __always_inline void atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
@@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-static inline void atomic64_dec(atomic64_t *v)
+static __always_inline void atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
@@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static inline long atomic64_add_return(long i, atomic64_t *v)
+static __always_inline long atomic64_add_return(long i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b..e51a8f8 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
#define smp_mb() mb()
#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */
#define read_barrier_depends() do { } while (0)
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 47c8e32..b6f7457 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -8,7 +8,7 @@
/*
* The set_memory_* API can be used to change various attributes of a virtual
* address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteBack
+ * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
* Executability : eXeutable, NoteXecutable
* Read/Write : ReadOnly, ReadWrite
* Presence : NotPresent
@@ -35,9 +35,11 @@
int _set_memory_uc(unsigned long addr, int numpages);
int _set_memory_wc(unsigned long addr, int numpages);
+int _set_memory_wt(unsigned long addr, int numpages);
int _set_memory_wb(unsigned long addr, int numpages);
int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
+int set_memory_wt(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
@@ -48,10 +50,12 @@ int set_memory_4k(unsigned long addr, int numpages);
int set_memory_array_uc(unsigned long *addr, int addrinarray);
int set_memory_array_wc(unsigned long *addr, int addrinarray);
+int set_memory_array_wt(unsigned long *addr, int addrinarray);
int set_memory_array_wb(unsigned long *addr, int addrinarray);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
+int set_pages_array_wt(struct page **pages, int addrinarray);
int set_pages_array_wb(struct page **pages, int addrinarray);
/*
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d7..ad19841 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
#include <linux/compiler.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
deleted file mode 100644
index de1cdaf..0000000
--- a/arch/x86/include/asm/dwarf2.h
+++ /dev/null
@@ -1,170 +0,0 @@
-#ifndef _ASM_X86_DWARF2_H
-#define _ASM_X86_DWARF2_H
-
-#ifndef __ASSEMBLY__
-#warning "asm/dwarf2.h should be only included in pure assembly files"
-#endif
-
-/*
- * Macros for dwarf2 CFI unwind table entries.
- * See "as.info" for details on these pseudo ops. Unfortunately
- * they are only supported in very new binutils, so define them
- * away for older version.
- */
-
-#ifdef CONFIG_AS_CFI
-
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
-#define CFI_ESCAPE .cfi_escape
-
-#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
-#else
-#define CFI_SIGNAL_FRAME
-#endif
-
-#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
- /*
- * Emit CFI data in .debug_frame sections, not .eh_frame sections.
- * The latter we currently just discard since we don't do DWARF
- * unwinding at runtime. So only the offline DWARF information is
- * useful to anyone. Note we should not use this directive if this
- * file is used in the vDSO assembly, or if vmlinux.lds.S gets
- * changed so it doesn't discard .eh_frame.
- */
- .cfi_sections .debug_frame
-#endif
-
-#else
-
-/*
- * Due to the structure of pre-exisiting code, don't use assembler line
- * comment character # to ignore the arguments. Instead, use a dummy macro.
- */
-.macro cfi_ignore a=0, b=0, c=0, d=0
-.endm
-
-#define CFI_STARTPROC cfi_ignore
-#define CFI_ENDPROC cfi_ignore
-#define CFI_DEF_CFA cfi_ignore
-#define CFI_DEF_CFA_REGISTER cfi_ignore
-#define CFI_DEF_CFA_OFFSET cfi_ignore
-#define CFI_ADJUST_CFA_OFFSET cfi_ignore
-#define CFI_OFFSET cfi_ignore
-#define CFI_REL_OFFSET cfi_ignore
-#define CFI_REGISTER cfi_ignore
-#define CFI_RESTORE cfi_ignore
-#define CFI_REMEMBER_STATE cfi_ignore
-#define CFI_RESTORE_STATE cfi_ignore
-#define CFI_UNDEFINED cfi_ignore
-#define CFI_ESCAPE cfi_ignore
-#define CFI_SIGNAL_FRAME cfi_ignore
-
-#endif
-
-/*
- * An attempt to make CFI annotations more or less
- * correct and shorter. It is implied that you know
- * what you're doing if you use them.
- */
-#ifdef __ASSEMBLY__
-#ifdef CONFIG_X86_64
- .macro pushq_cfi reg
- pushq \reg
- CFI_ADJUST_CFA_OFFSET 8
- .endm
-
- .macro pushq_cfi_reg reg
- pushq %\reg
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET \reg, 0
- .endm
-
- .macro popq_cfi reg
- popq \reg
- CFI_ADJUST_CFA_OFFSET -8
- .endm
-
- .macro popq_cfi_reg reg
- popq %\reg
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE \reg
- .endm
-
- .macro pushfq_cfi
- pushfq
- CFI_ADJUST_CFA_OFFSET 8
- .endm
-
- .macro popfq_cfi
- popfq
- CFI_ADJUST_CFA_OFFSET -8
- .endm
-
- .macro movq_cfi reg offset=0
- movq %\reg, \offset(%rsp)
- CFI_REL_OFFSET \reg, \offset
- .endm
-
- .macro movq_cfi_restore offset reg
- movq \offset(%rsp), %\reg
- CFI_RESTORE \reg
- .endm
-#else /*!CONFIG_X86_64*/
- .macro pushl_cfi reg
- pushl \reg
- CFI_ADJUST_CFA_OFFSET 4
- .endm
-
- .macro pushl_cfi_reg reg
- pushl %\reg
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET \reg, 0
- .endm
-
- .macro popl_cfi reg
- popl \reg
- CFI_ADJUST_CFA_OFFSET -4
- .endm
-
- .macro popl_cfi_reg reg
- popl %\reg
- CFI_ADJUST_CFA_OFFSET -4
- CFI_RESTORE \reg
- .endm
-
- .macro pushfl_cfi
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
- .endm
-
- .macro popfl_cfi
- popfl
- CFI_ADJUST_CFA_OFFSET -4
- .endm
-
- .macro movl_cfi reg offset=0
- movl %\reg, \offset(%esp)
- CFI_REL_OFFSET \reg, \offset
- .endm
-
- .macro movl_cfi_restore offset reg
- movl \offset(%esp), %\reg
- CFI_RESTORE \reg
- .endm
-#endif /*!CONFIG_X86_64*/
-#endif /*__ASSEMBLY__*/
-
-#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index dc5fa66..df00299 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
+ smp_kvm_posted_intr_wakeup_ipi)
#endif
/*
@@ -50,4 +52,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif
+#ifdef CONFIG_X86_MCE_AMD
+BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
+#endif
#endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 3b629f4..793179c 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,20 +1,17 @@
#ifdef __ASSEMBLY__
#include <asm/asm.h>
-#include <asm/dwarf2.h>
/* The annotation hides the frame from the unwinder and makes it look
like a ordinary ebp save/restore. This avoids some special cases for
frame pointer later */
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
- __ASM_SIZE(push,_cfi) %__ASM_REG(bp)
- CFI_REL_OFFSET __ASM_REG(bp), 0
+ __ASM_SIZE(push,) %__ASM_REG(bp)
__ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
.endm
.macro ENDFRAME
- __ASM_SIZE(pop,_cfi) %__ASM_REG(bp)
- CFI_RESTORE __ASM_REG(bp)
+ __ASM_SIZE(pop,) %__ASM_REG(bp)
.endm
#else
.macro FRAME
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..7178043 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
#endif
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
+ unsigned int kvm_posted_intr_wakeup_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
@@ -33,6 +34,9 @@ typedef struct {
#ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count;
#endif
+#ifdef CONFIG_X86_MCE_AMD
+ unsigned int irq_deferred_error_count;
+#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count;
#endif
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 36f7125..5fa9fb0 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,20 +74,16 @@ extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
struct irq_data;
+struct hpet_dev;
+struct irq_domain;
+
extern void hpet_msi_unmask(struct irq_data *data);
extern void hpet_msi_mask(struct irq_data *data);
-struct hpet_dev;
extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
-
-#ifdef CONFIG_PCI_MSI
-extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
-#else
-static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
-{
- return -EINVAL;
-}
-#endif
+extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
+extern int hpet_assign_irq(struct irq_domain *domain,
+ struct hpet_dev *dev, int dev_num);
#ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e9571dd..6615032 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,7 @@
extern asmlinkage void apic_timer_interrupt(void);
extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
@@ -36,43 +37,10 @@ extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
-extern asmlinkage void invalidate_interrupt(void);
-extern asmlinkage void invalidate_interrupt0(void);
-extern asmlinkage void invalidate_interrupt1(void);
-extern asmlinkage void invalidate_interrupt2(void);
-extern asmlinkage void invalidate_interrupt3(void);
-extern asmlinkage void invalidate_interrupt4(void);
-extern asmlinkage void invalidate_interrupt5(void);
-extern asmlinkage void invalidate_interrupt6(void);
-extern asmlinkage void invalidate_interrupt7(void);
-extern asmlinkage void invalidate_interrupt8(void);
-extern asmlinkage void invalidate_interrupt9(void);
-extern asmlinkage void invalidate_interrupt10(void);
-extern asmlinkage void invalidate_interrupt11(void);
-extern asmlinkage void invalidate_interrupt12(void);
-extern asmlinkage void invalidate_interrupt13(void);
-extern asmlinkage void invalidate_interrupt14(void);
-extern asmlinkage void invalidate_interrupt15(void);
-extern asmlinkage void invalidate_interrupt16(void);
-extern asmlinkage void invalidate_interrupt17(void);
-extern asmlinkage void invalidate_interrupt18(void);
-extern asmlinkage void invalidate_interrupt19(void);
-extern asmlinkage void invalidate_interrupt20(void);
-extern asmlinkage void invalidate_interrupt21(void);
-extern asmlinkage void invalidate_interrupt22(void);
-extern asmlinkage void invalidate_interrupt23(void);
-extern asmlinkage void invalidate_interrupt24(void);
-extern asmlinkage void invalidate_interrupt25(void);
-extern asmlinkage void invalidate_interrupt26(void);
-extern asmlinkage void invalidate_interrupt27(void);
-extern asmlinkage void invalidate_interrupt28(void);
-extern asmlinkage void invalidate_interrupt29(void);
-extern asmlinkage void invalidate_interrupt30(void);
-extern asmlinkage void invalidate_interrupt31(void);
-
extern asmlinkage void irq_move_cleanup_interrupt(void);
extern asmlinkage void reboot_interrupt(void);
extern asmlinkage void threshold_interrupt(void);
+extern asmlinkage void deferred_error_interrupt(void);
extern asmlinkage void call_function_interrupt(void);
extern asmlinkage void call_function_single_interrupt(void);
@@ -87,60 +55,93 @@ extern void trace_spurious_interrupt(void);
extern void trace_thermal_interrupt(void);
extern void trace_reschedule_interrupt(void);
extern void trace_threshold_interrupt(void);
+extern void trace_deferred_error_interrupt(void);
extern void trace_call_function_interrupt(void);
extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
#endif /* CONFIG_TRACING */
-#ifdef CONFIG_IRQ_REMAP
-/* Intel specific interrupt remapping information */
-struct irq_2_iommu {
- struct intel_iommu *iommu;
- u16 irte_index;
- u16 sub_handle;
- u8 irte_mask;
-};
-
-/* AMD specific interrupt remapping information */
-struct irq_2_irte {
- u16 devid; /* Device ID for IRTE table */
- u16 index; /* Index into IRTE table*/
-};
-#endif /* CONFIG_IRQ_REMAP */
-
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
+struct pci_dev;
+struct msi_desc;
+
+enum irq_alloc_type {
+ X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
+ X86_IRQ_ALLOC_TYPE_HPET,
+ X86_IRQ_ALLOC_TYPE_MSI,
+ X86_IRQ_ALLOC_TYPE_MSIX,
+ X86_IRQ_ALLOC_TYPE_DMAR,
+ X86_IRQ_ALLOC_TYPE_UV,
+};
-struct irq_cfg {
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- u8 vector;
- u8 move_in_progress : 1;
-#ifdef CONFIG_IRQ_REMAP
- u8 remapped : 1;
+struct irq_alloc_info {
+ enum irq_alloc_type type;
+ u32 flags;
+ const struct cpumask *mask; /* CPU mask for vector allocation */
union {
- struct irq_2_iommu irq_2_iommu;
- struct irq_2_irte irq_2_irte;
- };
+ int unused;
+#ifdef CONFIG_HPET_TIMER
+ struct {
+ int hpet_id;
+ int hpet_index;
+ void *hpet_data;
+ };
#endif
- union {
-#ifdef CONFIG_X86_IO_APIC
+#ifdef CONFIG_PCI_MSI
struct {
- struct list_head irq_2_pin;
+ struct pci_dev *msi_dev;
+ irq_hw_number_t msi_hwirq;
+ };
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ struct {
+ int ioapic_id;
+ int ioapic_pin;
+ int ioapic_node;
+ u32 ioapic_trigger : 1;
+ u32 ioapic_polarity : 1;
+ u32 ioapic_valid : 1;
+ struct IO_APIC_route_entry *ioapic_entry;
+ };
+#endif
+#ifdef CONFIG_DMAR_TABLE
+ struct {
+ int dmar_id;
+ void *dmar_data;
+ };
+#endif
+#ifdef CONFIG_HT_IRQ
+ struct {
+ int ht_pos;
+ int ht_idx;
+ struct pci_dev *ht_dev;
+ void *ht_update;
+ };
+#endif
+#ifdef CONFIG_X86_UV
+ struct {
+ int uv_limit;
+ int uv_blade;
+ unsigned long uv_offset;
+ char *uv_name;
};
#endif
};
};
+struct irq_cfg {
+ unsigned int dest_apicid;
+ u8 vector;
+};
+
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
-extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
-extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
-extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
extern void setup_vector_irq(int cpu);
#ifdef CONFIG_SMP
extern void send_cleanup_vector(struct irq_cfg *);
@@ -150,10 +151,7 @@ static inline void send_cleanup_vector(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
-extern int apic_retrigger_irq(struct irq_data *data);
extern void apic_ack_edge(struct irq_data *data);
-extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- unsigned int *dest_id);
#else /* CONFIG_X86_LOCAL_APIC */
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
@@ -163,8 +161,7 @@ static inline void unlock_vector_lock(void) {}
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
-/* EISA */
-extern void eisa_set_level_irq(unsigned int irq);
+extern void elcr_set_level_irq(unsigned int irq);
/* SMP */
extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
@@ -178,7 +175,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
extern __visible void smp_reschedule_interrupt(struct pt_regs *);
extern __visible void smp_call_function_interrupt(struct pt_regs *);
extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
-extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
extern char irq_entries_start[];
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758..055ea99 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
/* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 34a5b93..83ec9b1 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -35,11 +35,13 @@
*/
#define ARCH_HAS_IOREMAP_WC
+#define ARCH_HAS_IOREMAP_WT
#include <linux/string.h>
#include <linux/compiler.h>
#include <asm/page.h>
#include <asm/early_ioremap.h>
+#include <asm/pgtable_types.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -177,6 +179,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* look at pci_iomap().
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
@@ -197,8 +200,6 @@ extern void set_iounmap_nonlazy(void);
#include <asm-generic/iomap.h>
-#include <linux/vmalloc.h>
-
/*
* Convert a virtual cached pointer to an uncached pointer
*/
@@ -320,6 +321,7 @@ extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum page_cache_mode pcm);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
extern bool is_early_ioremap_ptep(pte_t *ptep);
@@ -338,6 +340,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
#define IO_SPACE_LIMIT 0xffff
#ifdef CONFIG_MTRR
+extern int __must_check arch_phys_wc_index(int handle);
+#define arch_phys_wc_index arch_phys_wc_index
+
extern int __must_check arch_phys_wc_add(unsigned long base,
unsigned long size);
extern void arch_phys_wc_del(int handle);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 2f91685..6cbf2cf 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -95,9 +95,22 @@ struct IR_IO_APIC_route_entry {
index : 15;
} __attribute__ ((packed));
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
+struct irq_alloc_info;
+struct ioapic_domain_cfg;
+
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
+#define IOAPIC_MASKED 1
+#define IOAPIC_UNMASKED 0
+
+#define IOAPIC_POL_HIGH 0
+#define IOAPIC_POL_LOW 1
+
+#define IOAPIC_DEST_MODE_PHYSICAL 0
+#define IOAPIC_DEST_MODE_LOGICAL 1
+
#define IOAPIC_MAP_ALLOC 0x1
#define IOAPIC_MAP_CHECK 0x2
@@ -110,9 +123,6 @@ extern int nr_ioapics;
extern int mpc_ioapic_id(int ioapic);
extern unsigned int mpc_ioapic_addr(int ioapic);
-extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic);
-
-#define MP_MAX_IOAPIC_PIN 127
/* # of MP IRQ source entries */
extern int mp_irq_entries;
@@ -120,9 +130,6 @@ extern int mp_irq_entries;
/* MP IRQ source entries */
extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
-/* Older SiS APIC requires we rewrite the index register */
-extern int sis_apic_bug;
-
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
@@ -132,6 +139,8 @@ extern int noioapicquirk;
/* -1 if "noapic" boot option passed */
extern int noioapicreroute;
+extern u32 gsi_top;
+
extern unsigned long io_apic_irqs;
#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
@@ -147,13 +156,6 @@ struct irq_cfg;
extern void ioapic_insert_resources(void);
extern int arch_early_ioapic_init(void);
-extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
- unsigned int, int,
- struct io_apic_irq_attr *);
-extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
-
-extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
-
extern int save_ioapic_entries(void);
extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
@@ -161,82 +163,32 @@ extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
-struct io_apic_irq_attr {
- int ioapic;
- int ioapic_pin;
- int trigger;
- int polarity;
-};
-
-enum ioapic_domain_type {
- IOAPIC_DOMAIN_INVALID,
- IOAPIC_DOMAIN_LEGACY,
- IOAPIC_DOMAIN_STRICT,
- IOAPIC_DOMAIN_DYNAMIC,
-};
-
-struct device_node;
-struct irq_domain;
-struct irq_domain_ops;
-
-struct ioapic_domain_cfg {
- enum ioapic_domain_type type;
- const struct irq_domain_ops *ops;
- struct device_node *dev;
-};
-
-struct mp_ioapic_gsi{
- u32 gsi_base;
- u32 gsi_end;
-};
-extern u32 gsi_top;
-
extern int mp_find_ioapic(u32 gsi);
extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
-extern u32 mp_pin_to_gsi(int ioapic, int pin);
-extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
+extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags,
+ struct irq_alloc_info *info);
extern void mp_unmap_irq(int irq);
extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
struct ioapic_domain_cfg *cfg);
extern int mp_unregister_ioapic(u32 gsi_base);
extern int mp_ioapic_registered(u32 gsi_base);
-extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq);
-extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
-extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node);
-extern void __init pre_init_apic_IRQ0(void);
+
+extern void ioapic_set_alloc_attr(struct irq_alloc_info *info,
+ int node, int trigger, int polarity);
extern void mp_save_irq(struct mpc_intsrc *m);
extern void disable_ioapic_support(void);
-extern void __init native_io_apic_init_mappings(void);
+extern void __init io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
-extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
-extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_disable_io_apic(void);
-extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
-extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
-extern int native_ioapic_set_affinity(struct irq_data *,
- const struct cpumask *,
- bool);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
return x86_io_apic_ops.read(apic, reg);
}
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- x86_io_apic_ops.write(apic, reg, value);
-}
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- x86_io_apic_ops.modify(apic, reg, value);
-}
-
-extern void io_apic_eoi(unsigned int apic, unsigned int vector);
-
extern void setup_IO_APIC(void);
extern void enable_IO_APIC(void);
extern void disable_IO_APIC(void);
@@ -253,8 +205,12 @@ static inline int arch_early_ioapic_init(void) { return 0; }
static inline void print_IO_APICs(void) {}
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
-static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
-static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
+static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags,
+ struct irq_alloc_info *info)
+{
+ return gsi;
+}
+
static inline void mp_unmap_irq(int irq) { }
static inline int save_ioapic_entries(void)
@@ -268,17 +224,11 @@ static inline int restore_ioapic_entries(void)
return -ENOMEM;
}
-static inline void mp_save_irq(struct mpc_intsrc *m) { };
+static inline void mp_save_irq(struct mpc_intsrc *m) { }
static inline void disable_ioapic_support(void) { }
-#define native_io_apic_init_mappings NULL
+static inline void io_apic_init_mappings(void) { }
#define native_io_apic_read NULL
-#define native_io_apic_write NULL
-#define native_io_apic_modify NULL
#define native_disable_io_apic NULL
-#define native_io_apic_print_entries NULL
-#define native_ioapic_set_affinity NULL
-#define native_setup_ioapic_entry NULL
-#define native_eoi_ioapic_pin NULL
static inline void setup_IO_APIC(void) { }
static inline void enable_IO_APIC(void) { }
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index a80cbb8..8008d06 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -30,6 +30,10 @@ extern void fixup_irqs(void);
extern void irq_force_complete_move(int);
#endif
+#ifdef CONFIG_HAVE_KVM
+extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
+#endif
+
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 6224d31..046c7fb 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -22,84 +22,72 @@
#ifndef __X86_IRQ_REMAPPING_H
#define __X86_IRQ_REMAPPING_H
+#include <asm/irqdomain.h>
+#include <asm/hw_irq.h>
#include <asm/io_apic.h>
-struct IO_APIC_route_entry;
-struct io_apic_irq_attr;
-struct irq_chip;
struct msi_msg;
-struct pci_dev;
-struct irq_cfg;
+struct irq_alloc_info;
+
+enum irq_remap_cap {
+ IRQ_POSTING_CAP = 0,
+};
#ifdef CONFIG_IRQ_REMAP
+extern bool irq_remapping_cap(enum irq_remap_cap cap);
extern void set_irq_remapping_broken(void);
extern int irq_remapping_prepare(void);
extern int irq_remapping_enable(void);
extern void irq_remapping_disable(void);
extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
-extern int setup_ioapic_remapped_entry(int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination,
- int vector,
- struct io_apic_irq_attr *attr);
-extern void free_remapped_irq(int irq);
-extern void compose_remapped_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id);
-extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
extern void panic_if_irq_remap(const char *msg);
-extern bool setup_remapped_irq(int irq,
- struct irq_cfg *cfg,
- struct irq_chip *chip);
-void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+extern struct irq_domain *
+irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info);
+extern struct irq_domain *
+irq_remapping_get_irq_domain(struct irq_alloc_info *info);
+
+/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
+extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent);
+
+/* Get parent irqdomain for interrupt remapping irqdomain */
+static inline struct irq_domain *arch_get_ir_parent_domain(void)
+{
+ return x86_vector_domain;
+}
+
+struct vcpu_data {
+ u64 pi_desc_addr; /* Physical address of PI Descriptor */
+ u32 vector; /* Guest vector of the interrupt */
+};
#else /* CONFIG_IRQ_REMAP */
+static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
static inline void set_irq_remapping_broken(void) { }
static inline int irq_remapping_prepare(void) { return -ENODEV; }
static inline int irq_remapping_enable(void) { return -ENODEV; }
static inline void irq_remapping_disable(void) { }
static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
-static inline int setup_ioapic_remapped_entry(int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination,
- int vector,
- struct io_apic_irq_attr *attr)
-{
- return -ENODEV;
-}
-static inline void free_remapped_irq(int irq) { }
-static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
-}
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
-{
- return -ENODEV;
-}
static inline void panic_if_irq_remap(const char *msg)
{
}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+static inline struct irq_domain *
+irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
{
+ return NULL;
}
-static inline bool setup_remapped_irq(int irq,
- struct irq_cfg *cfg,
- struct irq_chip *chip)
+static inline struct irq_domain *
+irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
- return false;
+ return NULL;
}
-#endif /* CONFIG_IRQ_REMAP */
-
-#define dmar_alloc_hwirq() irq_alloc_hwirq(-1)
-#define dmar_free_hwirq irq_free_hwirq
+#endif /* CONFIG_IRQ_REMAP */
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 666c89e..4c2d2eb 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -47,31 +47,12 @@
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
#define IA32_SYSCALL_VECTOR 0x80
-#ifdef CONFIG_X86_32
-# define SYSCALL_VECTOR 0x80
-#endif
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
* round up to the next 16-vector boundary
*/
-#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
-
-#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
-#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
-#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
-#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
-#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
-#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
-#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
-#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
-#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
-#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
-#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
-#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
-#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
-#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
-#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
+#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq)
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
@@ -102,21 +83,23 @@
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
-/* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
-#define POSTED_INTR_VECTOR 0xf2
-#endif
-
+#define POSTED_INTR_WAKEUP_VECTOR 0xf1
/*
* IRQ work vector:
*/
#define IRQ_WORK_VECTOR 0xf6
#define UV_BAU_MESSAGE 0xf5
+#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
+/* Vector for KVM to deliver posted interrupt IPI */
+#ifdef CONFIG_HAVE_KVM
+#define POSTED_INTR_VECTOR 0xf2
+#endif
+
/*
* Local APIC timer IRQ vector is on a different priority level,
* to work around the 'lost local interrupt if more than 2 IRQ
@@ -155,18 +138,22 @@ static inline int invalid_vm86_irq(int irq)
* static arrays.
*/
-#define NR_IRQS_LEGACY 16
+#define NR_IRQS_LEGACY 16
-#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
+#define CPU_VECTOR_LIMIT (64 * NR_CPUS)
+#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS)
-#ifdef CONFIG_X86_IO_APIC
-# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
-# define NR_IRQS \
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI)
+#define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
-#else /* !CONFIG_X86_IO_APIC: */
-# define NR_IRQS NR_IRQS_LEGACY
+#elif defined(CONFIG_X86_IO_APIC)
+#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
+#elif defined(CONFIG_PCI_MSI)
+#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT)
+#else
+#define NR_IRQS NR_IRQS_LEGACY
#endif
#endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
new file mode 100644
index 0000000..d26075b
--- /dev/null
+++ b/arch/x86/include/asm/irqdomain.h
@@ -0,0 +1,63 @@
+#ifndef _ASM_IRQDOMAIN_H
+#define _ASM_IRQDOMAIN_H
+
+#include <linux/irqdomain.h>
+#include <asm/hw_irq.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+enum {
+ /* Allocate contiguous CPU vectors */
+ X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1,
+};
+
+extern struct irq_domain *x86_vector_domain;
+
+extern void init_irq_alloc_info(struct irq_alloc_info *info,
+ const struct cpumask *mask);
+extern void copy_irq_alloc_info(struct irq_alloc_info *dst,
+ struct irq_alloc_info *src);
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+struct device_node;
+struct irq_data;
+
+enum ioapic_domain_type {
+ IOAPIC_DOMAIN_INVALID,
+ IOAPIC_DOMAIN_LEGACY,
+ IOAPIC_DOMAIN_STRICT,
+ IOAPIC_DOMAIN_DYNAMIC,
+};
+
+struct ioapic_domain_cfg {
+ enum ioapic_domain_type type;
+ const struct irq_domain_ops *ops;
+ struct device_node *dev;
+};
+
+extern const struct irq_domain_ops mp_ioapic_irqdomain_ops;
+
+extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg);
+extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs);
+extern void mp_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data);
+extern void mp_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data);
+extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
+#endif /* CONFIG_X86_IO_APIC */
+
+#ifdef CONFIG_PCI_MSI
+extern void arch_init_msi_domain(struct irq_domain *domain);
+#else
+static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+#endif
+
+#ifdef CONFIG_HT_IRQ
+extern void arch_init_htirq_domain(struct irq_domain *domain);
+#else
+static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e..f4a555b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
unsigned nxe:1;
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
+ unsigned smap_andnot_wp:1;
};
};
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
+ bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 1f5a86d..982dfc3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -17,11 +17,16 @@
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
+#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */
+
+/* MCG_EXT_CTL register defines */
+#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */
/* MCi_STATUS register defines */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
@@ -104,6 +109,7 @@ struct mce_log {
struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
+ bool lmce_disabled;
bool ignore_ce;
bool disabled;
bool ser;
@@ -117,8 +123,19 @@ struct mca_config {
};
struct mce_vendor_flags {
- __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
- __reserved_0 : 63;
+ /*
+ * overflow recovery cpuid bit indicates that overflow
+ * conditions are not fatal
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * SUCCOR stands for S/W UnCorrectable error COntainment
+ * and Recovery. It indicates support for data poisoning
+ * in HW and deferred error interrupts.
+ */
+ succor : 1,
+ __reserved_0 : 62;
};
extern struct mce_vendor_flags mce_flags;
@@ -168,12 +185,16 @@ void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(void);
void cmci_recheck(void);
+void lmce_clear(void);
+void lmce_enable(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
+static inline void lmce_clear(void) {}
+static inline void lmce_enable(void) {}
#endif
#ifdef CONFIG_X86_MCE_AMD
@@ -223,6 +244,9 @@ void do_machine_check(struct pt_regs *, long);
extern void (*mce_threshold_vector)(void);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
+/* Deferred error interrupt handler */
+extern void (*deferred_error_int_vector)(void);
+
/*
* Thermal handler
*/
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
new file mode 100644
index 0000000..93724cc
--- /dev/null
+++ b/arch/x86/include/asm/msi.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_X86_MSI_H
+#define _ASM_X86_MSI_H
+#include <asm/hw_irq.h>
+
+typedef struct irq_alloc_info msi_alloc_info_t;
+
+#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c469490..9ebc3d0 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,7 @@
#define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_IA32_MCG_EXT_CTL 0x000004d0
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
@@ -140,6 +141,7 @@
#define MSR_CORE_C3_RESIDENCY 0x000003fc
#define MSR_CORE_C6_RESIDENCY 0x000003fd
#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
#define MSR_PKG_C2_RESIDENCY 0x0000060d
#define MSR_PKG_C8_RESIDENCY 0x00000630
#define MSR_PKG_C9_RESIDENCY 0x00000631
@@ -379,6 +381,7 @@
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_LMCE (1<<20)
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index de36f22..e6a707e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -1,13 +1,14 @@
#ifndef _ASM_X86_MSR_H
#define _ASM_X86_MSR_H
-#include <uapi/asm/msr.h>
+#include "msr-index.h"
#ifndef __ASSEMBLY__
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
+#include <uapi/asm/msr.h>
struct msr {
union {
@@ -205,8 +206,13 @@ do { \
#endif /* !CONFIG_PARAVIRT */
-#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \
- (u32)((val) >> 32))
+/*
+ * 64-bit version of wrmsr_safe():
+ */
+static inline int wrmsrl_safe(u32 msr, u64 val)
+{
+ return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
+}
#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index f768f62..b94f6f6 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -31,7 +31,7 @@
* arch_phys_wc_add and arch_phys_wc_del.
*/
# ifdef CONFIG_MTRR
-extern u8 mtrr_type_lookup(u64 addr, u64 end);
+extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
extern int mtrr_add(unsigned long base, unsigned long size,
@@ -48,14 +48,13 @@ extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
-extern int phys_wc_to_mtrr_index(int handle);
# else
-static inline u8 mtrr_type_lookup(u64 addr, u64 end)
+static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
{
/*
* Return no-MTRRs:
*/
- return 0xff;
+ return MTRR_TYPE_INVALID;
}
#define mtrr_save_fixed_ranges(arg) do {} while (0)
#define mtrr_save_state() do {} while (0)
@@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
-static inline int phys_wc_to_mtrr_index(int handle)
-{
- return -1;
-}
#define mtrr_ap_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
@@ -127,4 +122,8 @@ struct mtrr_gentry32 {
_IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
#endif /* CONFIG_COMPAT */
+/* Bit fields for enabled in struct mtrr_state_type */
+#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01
+#define MTRR_STATE_MTRR_ENABLED 0x02
+
#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810..d143bfa 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
+ u32 val)
+{
+ PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+}
+
+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+}
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+ PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+ PVOP_VCALL1(pv_lock_ops.kick, cpu);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket)
{
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
}
-#endif
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f7b0b5c..a6b8f9f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -160,13 +160,14 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
unsigned long long (*read_tscp)(unsigned int *aux);
+#ifdef CONFIG_X86_32
/*
* Atomically enable interrupts and return to userspace. This
- * is only ever used to return to 32-bit processes; in a
- * 64-bit kernel, it's used for 32-on-64 compat processes, but
- * never native 64-bit processes. (Jump, not call.)
+ * is only used in 32-bit kernels. 64-bit kernels use
+ * usergs_sysret32 instead.
*/
void (*irq_enable_sysexit)(void);
+#endif
/*
* Switch to usermode gs and return to 64-bit usermode using
@@ -333,9 +334,19 @@ struct arch_spinlock;
typedef u16 __ticket_t;
#endif
+struct qspinlock;
+
struct pv_lock_ops {
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
+ struct paravirt_callee_save queued_spin_unlock;
+
+ void (*wait)(u8 *ptr, u8 val);
+ void (*kick)(int cpu);
+#else /* !CONFIG_QUEUED_SPINLOCKS */
struct paravirt_callee_save lock_spinning;
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
};
/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 91bc4ba..ca6c228 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -4,14 +4,9 @@
#include <linux/types.h>
#include <asm/pgtable_types.h>
-#ifdef CONFIG_X86_PAT
-extern int pat_enabled;
-#else
-static const int pat_enabled;
-#endif
-
+bool pat_enabled(void);
extern void pat_init(void);
-void pat_init_cache_modes(void);
+void pat_init_cache_modes(u64);
extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 4e370a5..d8c80ff 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void);
#ifdef CONFIG_PCI_MSI
/* implemented in arch/x86/kernel/apic/io_apic. */
struct msi_desc;
-void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
- unsigned int dest, struct msi_msg *msg, u8 hpet_id);
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev);
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- unsigned int irq_base, unsigned int irq_offset);
#else
-#define native_compose_msi_msg NULL
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fe57e7a..2562e30 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -398,11 +398,17 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
* requested memtype:
* - request is uncached, return cannot be write-back
* - request is write-combine, return cannot be write-back
+ * - request is write-through, return cannot be write-back
+ * - request is write-through, return cannot be write-combine
*/
if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
new_pcm == _PAGE_CACHE_MODE_WB) ||
(pcm == _PAGE_CACHE_MODE_WC &&
- new_pcm == _PAGE_CACHE_MODE_WB)) {
+ new_pcm == _PAGE_CACHE_MODE_WB) ||
+ (pcm == _PAGE_CACHE_MODE_WT &&
+ new_pcm == _PAGE_CACHE_MODE_WB) ||
+ (pcm == _PAGE_CACHE_MODE_WT &&
+ new_pcm == _PAGE_CACHE_MODE_WC)) {
return 0;
}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 78f0c8c..13f310b 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -367,6 +367,9 @@ extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
+#define pgprot_writethrough pgprot_writethrough
+extern pgprot_t pgprot_writethrough(pgprot_t prot);
+
/* Indicate that x86 has its own track and untrack pfn vma functions */
#define __HAVE_PFNMAP_TRACKING
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a90f897..a4a7728 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,12 +5,14 @@
/* misc architecture specific prototypes */
-void system_call(void);
void syscall_init(void);
-void ia32_syscall(void);
-void ia32_cstar_target(void);
-void ia32_sysenter_target(void);
+void entry_SYSCALL_64(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_32(void);
+void entry_INT80_compat(void);
+void entry_SYSENTER_32(void);
+void entry_SYSENTER_compat(void);
void x86_configure_nx(void);
void x86_report_nx(void);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 0000000..9d51fae
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_QSPINLOCK_H
+#define _ASM_X86_QSPINLOCK_H
+
+#include <asm/cpufeature.h>
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+ smp_store_release((u8 *)lock, 0);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+ pv_queued_spin_lock_slowpath(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ pv_queued_spin_unlock(lock);
+}
+#else
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+#endif
+
+#define virt_queued_spin_lock virt_queued_spin_lock
+
+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+ cpu_relax();
+
+ return true;
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 0000000..b002e71
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
+#define __ASM_QSPINLOCK_PARAVIRT_H
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+
+#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5a9856e..7d5a192 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -231,11 +231,21 @@
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__
+
+/*
+ * early_idt_handler_array is an array of entry points referenced in the
+ * early IDT. For simplicity, it's a real array with one entry point
+ * every nine bytes. That leaves room for an optional 'push $0' if the
+ * vector has no error code (two bytes), a 'push $vector_number' (two
+ * bytes), and a jump to the common entry code (up to five bytes).
+ */
+#define EARLY_IDT_HANDLER_SIZE 9
+
#ifndef __ASSEMBLY__
-extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
#ifdef CONFIG_TRACING
-# define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handler_array early_idt_handler_array
#endif
/*
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aeb4666e..2270e41 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p)
: [pax] "a" (p));
}
+/**
+ * pcommit_sfence() - persistent commit and fence
+ *
+ * The PCOMMIT instruction ensures that data that has been flushed from the
+ * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
+ * memory and is durable on the DIMM. The primary use case for this is
+ * persistent memory.
+ *
+ * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
+ * with appropriate fencing.
+ *
+ * Example:
+ * void flush_and_commit_buffer(void *vaddr, unsigned int size)
+ * {
+ * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+ * void *vend = vaddr + size;
+ * void *p;
+ *
+ * for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+ * p < vend; p += boot_cpu_data.x86_clflush_size)
+ * clwb(p);
+ *
+ * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
+ * // MFENCE via mb() also works
+ * wmb();
+ *
+ * // PCOMMIT and the required SFENCE for ordering
+ * pcommit_sfence();
+ * }
+ *
+ * After this function completes the data pointed to by 'vaddr' has been
+ * accepted to memory and will be durable if the 'vaddr' points to persistent
+ * memory.
+ *
+ * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
+ * things we include both the PCOMMIT and the required SFENCE in the
+ * alternatives generated by pcommit_sfence().
+ */
static inline void pcommit_sfence(void)
{
alternative(ASM_NOP7,
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3..be0a059 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key);
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -169,7 +173,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
tmp.head &= ~TICKET_SLOWPATH_FLAG;
- return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+ return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
}
#define arch_spin_is_contended arch_spin_is_contended
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
cpu_relax();
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d757..65c3e37 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
typedef struct arch_spinlock {
union {
__ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
#include <asm-generic/qrwlock_types.h>
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b4bdec3..225ee54 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -177,8 +177,6 @@ struct thread_info {
*/
#ifndef __ASSEMBLY__
-DECLARE_PER_CPU(unsigned long, kernel_stack);
-
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
@@ -197,9 +195,13 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+#endif
+
/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
- _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
+ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
_ASM_SUB $(THREAD_SIZE),reg ;
/*
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0e8f04f..8d717fa 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -26,7 +26,7 @@
#define _ASM_X86_TOPOLOGY_H
#ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_HT
+# ifdef CONFIG_SMP
# define ENABLE_TOPO_DEFINES
# endif
#else
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 4cab890..38a09a1 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -101,6 +101,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
/*
+ * deferred_error_apic - called when entering/exiting a deferred apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
+
+/*
* thermal_apic - called when entering/exiting a thermal apic interrupt
* vector handler
*/
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 4e49d7d..c5380be 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
#ifndef CONFIG_X86_32
asmlinkage void smp_thermal_interrupt(void);
-asmlinkage void mce_threshold_interrupt(void);
+asmlinkage void smp_threshold_interrupt(void);
+asmlinkage void smp_deferred_error_interrupt(void);
#endif
extern enum ctx_state ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3c03a5d..0ed5504 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
__put_user_size(*(u32 *)from, (u32 __user *)to,
4, ret, 4);
return ret;
+ case 8:
+ __put_user_size(*(u64 *)from, (u64 __user *)to,
+ 8, ret, 8);
+ return ret;
}
}
return __copy_to_user_ll(to, from, n);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index f58a9c7..48d34d2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -171,38 +171,17 @@ struct x86_platform_ops {
};
struct pci_dev;
-struct msi_msg;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
- void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
- unsigned int dest, struct msi_msg *msg,
- u8 hpet_id);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
- int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
};
-struct IO_APIC_route_entry;
-struct io_apic_irq_attr;
-struct irq_data;
-struct cpumask;
-
struct x86_io_apic_ops {
- void (*init) (void);
unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
void (*disable)(void);
- void (*print_entries)(unsigned int apic, unsigned int nr_entries);
- int (*set_affinity)(struct irq_data *data,
- const struct cpumask *mask,
- bool force);
- int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr);
- void (*eoi_ioapic_pin)(int apic, int pin, int vector);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd3..c44a5d5 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
return false;
}
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+ return __get_free_pages(__GFP_NOWARN, order);
+}
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index 155e510..c41f4fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -1,8 +1,6 @@
#ifndef _UAPI_ASM_X86_MSR_H
#define _UAPI_ASM_X86_MSR_H
-#include <asm/msr-index.h>
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
index d0acb65..7528dcf 100644
--- a/arch/x86/include/uapi/asm/mtrr.h
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -103,7 +103,7 @@ struct mtrr_state_type {
#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
-/* These are the region types */
+/* MTRR memory types, which are defined in SDM */
#define MTRR_TYPE_UNCACHABLE 0
#define MTRR_TYPE_WRCOMB 1
/*#define MTRR_TYPE_ 2*/
@@ -113,5 +113,11 @@ struct mtrr_state_type {
#define MTRR_TYPE_WRBACK 6
#define MTRR_NUM_TYPES 7
+/*
+ * Invalid MTRR memory type. mtrr_type_lookup() returns this value when
+ * MTRRs are disabled. Note, this value is allocated from the reserved
+ * values (0x7-0xff) of the MTRR memory types.
+ */
+#define MTRR_TYPE_INVALID 0xff
#endif /* _UAPI_ASM_X86_MTRR_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b5..01663ee 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
-obj-y := process_$(BITS).o signal.o entry_$(BITS).o
+obj-y := process_$(BITS).o signal.o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
@@ -31,9 +31,6 @@ obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += mcount_64.o
-obj-y += syscall_$(BITS).o vsyscall_gtod.o
-obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
-obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index dbe76a1..e49ee24 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,12 +31,12 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
-#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/pci.h>
+#include <asm/irqdomain.h>
#include <asm/pci_x86.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -400,57 +400,13 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
-static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
- int polarity)
-{
- int irq, node;
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return gsi;
-
- trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
- polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
- node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
- if (mp_set_gsi_attr(gsi, trigger, polarity, node)) {
- pr_warn("Failed to set pin attr for GSI%d\n", gsi);
- return -1;
- }
-
- irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
- if (irq < 0)
- return irq;
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
- mp_config_acpi_gsi(dev, gsi, trigger, polarity);
-
- return irq;
-}
-
-static void mp_unregister_gsi(u32 gsi)
-{
- int irq;
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return;
-
- irq = mp_map_gsi_to_irq(gsi, 0);
- if (irq > 0)
- mp_unmap_irq(irq);
-}
-
-static struct irq_domain_ops acpi_irqdomain_ops = {
- .map = mp_irqdomain_map,
- .unmap = mp_irqdomain_unmap,
-};
-
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
- .ops = &acpi_irqdomain_ops,
+ .ops = &mp_ioapic_irqdomain_ops,
};
ioapic = (struct acpi_madt_io_apic *)header;
@@ -652,7 +608,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (trigger == ACPI_LEVEL_SENSITIVE)
- eisa_set_level_irq(gsi);
+ elcr_set_level_irq(gsi);
#endif
return gsi;
@@ -663,10 +619,21 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
int irq = gsi;
-
#ifdef CONFIG_X86_IO_APIC
+ int node;
+ struct irq_alloc_info info;
+
+ node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+ trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
+ polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
+ ioapic_set_alloc_attr(&info, node, trigger, polarity);
+
mutex_lock(&acpi_ioapic_lock);
- irq = mp_register_gsi(dev, gsi, trigger, polarity);
+ irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (irq >= 0 && enable_update_mptable &&
+ acpi_gbl_FADT.sci_interrupt != gsi)
+ mp_config_acpi_gsi(dev, gsi, trigger, polarity);
mutex_unlock(&acpi_ioapic_lock);
#endif
@@ -676,8 +643,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
static void acpi_unregister_gsi_ioapic(u32 gsi)
{
#ifdef CONFIG_X86_IO_APIC
+ int irq;
+
mutex_lock(&acpi_ioapic_lock);
- mp_unregister_gsi(gsi);
+ irq = mp_map_gsi_to_irq(gsi, 0, NULL);
+ if (irq > 0)
+ mp_unmap_irq(irq);
mutex_unlock(&acpi_ioapic_lock);
#endif
}
@@ -786,7 +757,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
u64 addr;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
- .ops = &acpi_irqdomain_ops,
+ .ops = &mp_ioapic_irqdomain_ops,
};
ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index ae693b5..8c35df4 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel)
pushfq
popq pt_regs_flags(%rax)
- movq $resume_point, saved_rip(%rip)
+ movq $.Lresume_point, saved_rip(%rip)
movq %rsp, saved_rsp
movq %rbp, saved_rbp
@@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel)
xorl %eax, %eax
call x86_acpi_enter_sleep_state
/* in case something went wrong, restore the machine status and go on */
- jmp resume_point
+ jmp .Lresume_point
.align 4
-resume_point:
+.Lresume_point:
/* We don't restore %rax, it must be 0 anyway */
movq $saved_context, %rax
movq saved_context_cr4(%rax), %rbx
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef6531..b0932c4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void)
#endif
}
break;
+
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 > 0xf) {
+ ideal_nops = p6_nops;
+ return;
+ }
+
+ /* fall through */
+
default:
#ifdef CONFIG_X86_64
ideal_nops = k8_nops;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 6a7c23f..ede92c3 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -171,10 +171,6 @@ static int __init apbt_clockevent_register(void)
static void apbt_setup_irq(struct apbt_dev *adev)
{
- /* timer0 irq has been setup early */
- if (adev->irq == 0)
- return;
-
irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
}
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index 816f36e..ae50d34 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Add support of hierarchical irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,78 +16,112 @@
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/htirq.h>
+#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/hypertransport.h>
+static struct irq_domain *htirq_domain;
+
/*
* Hypertransport interrupt support
*/
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
- struct ht_irq_msg msg;
-
- fetch_ht_irq_msg(irq, &msg);
-
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
- msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
- write_ht_irq_msg(irq, &msg);
-}
-
static int
ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- unsigned int dest;
+ struct irq_data *parent = data->parent_data;
int ret;
- ret = apic_set_affinity(data, mask, &dest);
- if (ret)
- return ret;
-
- target_ht_irq(data->irq, dest, cfg->vector);
- return IRQ_SET_MASK_OK_NOCOPY;
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret >= 0) {
+ struct ht_irq_msg msg;
+ struct irq_cfg *cfg = irqd_cfg(data);
+
+ fetch_ht_irq_msg(data->irq, &msg);
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
+ HT_IRQ_LOW_DEST_ID_MASK);
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
+ HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
+ write_ht_irq_msg(data->irq, &msg);
+ }
+
+ return ret;
}
static struct irq_chip ht_irq_chip = {
.name = "PCI-HT",
.irq_mask = mask_ht_irq,
.irq_unmask = unmask_ht_irq,
- .irq_ack = apic_ack_edge,
+ .irq_ack = irq_chip_ack_parent,
.irq_set_affinity = ht_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
- struct irq_cfg *cfg;
- struct ht_irq_msg msg;
- unsigned dest;
- int err;
+ struct ht_irq_cfg *ht_cfg;
+ struct irq_alloc_info *info = arg;
+ struct pci_dev *dev;
+ irq_hw_number_t hwirq;
+ int ret;
- if (disable_apic)
- return -ENXIO;
+ if (nr_irqs > 1 || !info)
+ return -EINVAL;
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
+ dev = info->ht_dev;
+ hwirq = (info->ht_idx & 0xFF) |
+ PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
+ (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
+ if (irq_find_mapping(domain, hwirq) > 0)
+ return -EEXIST;
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
+ ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
+ if (!ht_cfg)
+ return -ENOMEM;
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+ if (ret < 0) {
+ kfree(ht_cfg);
+ return ret;
+ }
+
+ /* Initialize msg to a value that will never match the first write. */
+ ht_cfg->msg.address_lo = 0xffffffff;
+ ht_cfg->msg.address_hi = 0xffffffff;
+ ht_cfg->dev = info->ht_dev;
+ ht_cfg->update = info->ht_update;
+ ht_cfg->pos = info->ht_pos;
+ ht_cfg->idx = 0x10 + (info->ht_idx * 2);
+ irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
+ handle_edge_irq, ht_cfg, "edge");
+
+ return 0;
+}
+
+static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+
+ BUG_ON(nr_irqs != 1);
+ kfree(irq_data->chip_data);
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
+}
+static void htirq_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct ht_irq_msg msg;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
+
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
msg.address_lo =
HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(dest) |
+ HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
HT_IRQ_LOW_VECTOR(cfg->vector) |
((apic->irq_dest_mode == 0) ?
HT_IRQ_LOW_DM_PHYSICAL :
@@ -95,13 +131,56 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
HT_IRQ_LOW_MT_FIXED :
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
+ write_ht_irq_msg(irq_data->irq, &msg);
+}
- write_ht_irq_msg(irq, &msg);
+static void htirq_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct ht_irq_msg msg;
- irq_set_chip_and_handler_name(irq, &ht_irq_chip,
- handle_edge_irq, "edge");
+ memset(&msg, 0, sizeof(msg));
+ write_ht_irq_msg(irq_data->irq, &msg);
+}
- dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+static const struct irq_domain_ops htirq_domain_ops = {
+ .alloc = htirq_domain_alloc,
+ .free = htirq_domain_free,
+ .activate = htirq_domain_activate,
+ .deactivate = htirq_domain_deactivate,
+};
- return 0;
+void arch_init_htirq_domain(struct irq_domain *parent)
+{
+ if (disable_apic)
+ return;
+
+ htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL);
+ if (!htirq_domain)
+ pr_warn("failed to initialize irqdomain for HTIRQ.\n");
+ else
+ htirq_domain->parent = parent;
+}
+
+int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
+ ht_irq_update_t *update)
+{
+ struct irq_alloc_info info;
+
+ if (!htirq_domain)
+ return -ENOSYS;
+
+ init_irq_alloc_info(&info, NULL);
+ info.ht_idx = idx;
+ info.ht_pos = pos;
+ info.ht_dev = dev;
+ info.ht_update = update;
+
+ return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
+ &info);
+}
+
+void arch_teardown_ht_irq(unsigned int irq)
+{
+ irq_domain_free_irqs(irq, 1);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f4dc246..845dc0d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -18,6 +18,16 @@
* and Rolf G. Tews
* for testing these extensively
* Paul Diefenbaugh : Added full ACPI support
+ *
+ * Historical information which is worth to be preserved:
+ *
+ * - SiS APIC rmw bug:
+ *
+ * We used to have a workaround for a bug in SiS chips which
+ * required to rewrite the index register for a read-modify-write
+ * operation as the chip lost the index information which was
+ * setup for the read already. We cache the data now, so that
+ * workaround has been removed.
*/
#include <linux/mm.h>
@@ -31,13 +41,13 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/syscore_ops.h>
-#include <linux/irqdomain.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/jiffies.h> /* time_after() */
#include <linux/slab.h>
#include <linux/bootmem.h>
+#include <asm/irqdomain.h>
#include <asm/idle.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -63,27 +73,31 @@
#define for_each_ioapic_pin(idx, pin) \
for_each_ioapic((idx)) \
for_each_pin((idx), (pin))
-
#define for_each_irq_pin(entry, head) \
list_for_each_entry(entry, &head, list)
-/*
- * Is the SiS APIC rmw bug present ?
- * -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
static DEFINE_RAW_SPINLOCK(ioapic_lock);
static DEFINE_MUTEX(ioapic_mutex);
static unsigned int ioapic_dynirq_base;
static int ioapic_initialized;
-struct mp_pin_info {
+struct irq_pin_list {
+ struct list_head list;
+ int apic, pin;
+};
+
+struct mp_chip_data {
+ struct list_head irq_2_pin;
+ struct IO_APIC_route_entry entry;
int trigger;
int polarity;
- int node;
- int set;
u32 count;
+ bool isa_irq;
+};
+
+struct mp_ioapic_gsi {
+ u32 gsi_base;
+ u32 gsi_end;
};
static struct ioapic {
@@ -101,7 +115,6 @@ static struct ioapic {
struct mp_ioapic_gsi gsi_config;
struct ioapic_domain_cfg irqdomain_cfg;
struct irq_domain *irqdomain;
- struct mp_pin_info *pin_info;
struct resource *iomem_res;
} ioapics[MAX_IO_APICS];
@@ -117,7 +130,7 @@ unsigned int mpc_ioapic_addr(int ioapic_idx)
return ioapics[ioapic_idx].mp_config.apicaddr;
}
-struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
+static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
{
return &ioapics[ioapic_idx].gsi_config;
}
@@ -129,11 +142,16 @@ static inline int mp_ioapic_pin_count(int ioapic)
return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
}
-u32 mp_pin_to_gsi(int ioapic, int pin)
+static inline u32 mp_pin_to_gsi(int ioapic, int pin)
{
return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
}
+static inline bool mp_is_legacy_irq(int irq)
+{
+ return irq >= 0 && irq < nr_legacy_irqs();
+}
+
/*
* Initialize all legacy IRQs and all pins on the first IOAPIC
* if we have legacy interrupt controller. Kernel boot option "pirq="
@@ -144,12 +162,7 @@ static inline int mp_init_irq_at_boot(int ioapic, int irq)
if (!nr_legacy_irqs())
return 0;
- return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
-}
-
-static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
-{
- return ioapics[ioapic_idx].pin_info + pin;
+ return ioapic == 0 || mp_is_legacy_irq(irq);
}
static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
@@ -216,16 +229,6 @@ void mp_save_irq(struct mpc_intsrc *m)
panic("Max # of irq sources exceeded!!\n");
}
-struct irq_pin_list {
- struct list_head list;
- int apic, pin;
-};
-
-static struct irq_pin_list *alloc_irq_pin_list(int node)
-{
- return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
-}
-
static void alloc_ioapic_saved_registers(int idx)
{
size_t size;
@@ -247,8 +250,7 @@ static void free_ioapic_saved_registers(int idx)
int __init arch_early_ioapic_init(void)
{
- struct irq_cfg *cfg;
- int i, node = cpu_to_node(0);
+ int i;
if (!nr_legacy_irqs())
io_apic_irqs = ~0UL;
@@ -256,16 +258,6 @@ int __init arch_early_ioapic_init(void)
for_each_ioapic(i)
alloc_ioapic_saved_registers(i);
- /*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
- */
- for (i = 0; i < nr_legacy_irqs(); i++) {
- cfg = alloc_irq_and_cfg_at(i, node);
- cfg->vector = IRQ0_VECTOR + i;
- cpumask_setall(cfg->domain);
- }
-
return 0;
}
@@ -283,7 +275,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
}
-void io_apic_eoi(unsigned int apic, unsigned int vector)
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(vector, &io_apic->eoi);
@@ -296,7 +288,8 @@ unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
return readl(&io_apic->data);
}
-void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+static void io_apic_write(unsigned int apic, unsigned int reg,
+ unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -304,21 +297,6 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu
writel(value, &io_apic->data);
}
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
-
- if (sis_apic_bug)
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
-}
-
union entry_union {
struct { u32 w1, w2; };
struct IO_APIC_route_entry entry;
@@ -378,7 +356,7 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
static void ioapic_mask_entry(int apic, int pin)
{
unsigned long flags;
- union entry_union eu = { .entry.mask = 1 };
+ union entry_union eu = { .entry.mask = IOAPIC_MASKED };
raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -391,16 +369,17 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int __add_pin_to_irq_node(struct mp_chip_data *data,
+ int node, int apic, int pin)
{
struct irq_pin_list *entry;
/* don't allow duplicates */
- for_each_irq_pin(entry, cfg->irq_2_pin)
+ for_each_irq_pin(entry, data->irq_2_pin)
if (entry->apic == apic && entry->pin == pin)
return 0;
- entry = alloc_irq_pin_list(node);
+ entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
if (!entry) {
pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
node, apic, pin);
@@ -408,16 +387,16 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
}
entry->apic = apic;
entry->pin = pin;
+ list_add_tail(&entry->list, &data->irq_2_pin);
- list_add_tail(&entry->list, &cfg->irq_2_pin);
return 0;
}
-static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
+static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
{
struct irq_pin_list *tmp, *entry;
- list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
+ list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list)
if (entry->apic == apic && entry->pin == pin) {
list_del(&entry->list);
kfree(entry);
@@ -425,22 +404,23 @@ static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
}
}
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static void add_pin_to_irq_node(struct mp_chip_data *data,
+ int node, int apic, int pin)
{
- if (__add_pin_to_irq_node(cfg, node, apic, pin))
+ if (__add_pin_to_irq_node(data, node, apic, pin))
panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
int oldapic, int oldpin,
int newapic, int newpin)
{
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
+ for_each_irq_pin(entry, data->irq_2_pin) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
@@ -450,32 +430,26 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
}
/* old apic/pin didn't exist, so just add new ones */
- add_pin_to_irq_node(cfg, node, newapic, newpin);
-}
-
-static void __io_apic_modify_irq(struct irq_pin_list *entry,
- int mask_and, int mask_or,
- void (*final)(struct irq_pin_list *entry))
-{
- unsigned int reg, pin;
-
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
- reg &= mask_and;
- reg |= mask_or;
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
- if (final)
- final(entry);
+ add_pin_to_irq_node(data, node, newapic, newpin);
}
-static void io_apic_modify_irq(struct irq_cfg *cfg,
+static void io_apic_modify_irq(struct mp_chip_data *data,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
+ union entry_union eu;
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin)
- __io_apic_modify_irq(entry, mask_and, mask_or, final);
+ eu.entry = data->entry;
+ eu.w1 &= mask_and;
+ eu.w1 |= mask_or;
+ data->entry = eu.entry;
+
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1);
+ if (final)
+ final(entry);
+ }
}
static void io_apic_sync(struct irq_pin_list *entry)
@@ -490,39 +464,31 @@ static void io_apic_sync(struct irq_pin_list *entry)
readl(&io_apic->data);
}
-static void mask_ioapic(struct irq_cfg *cfg)
+static void mask_ioapic_irq(struct irq_data *irq_data)
{
+ struct mp_chip_data *data = irq_data->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void mask_ioapic_irq(struct irq_data *data)
+static void __unmask_ioapic(struct mp_chip_data *data)
{
- mask_ioapic(irqd_cfg(data));
+ io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
-static void __unmask_ioapic(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
-}
-
-static void unmask_ioapic(struct irq_cfg *cfg)
+static void unmask_ioapic_irq(struct irq_data *irq_data)
{
+ struct mp_chip_data *data = irq_data->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_ioapic(cfg);
+ __unmask_ioapic(data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void unmask_ioapic_irq(struct irq_data *data)
-{
- unmask_ioapic(irqd_cfg(data));
-}
-
/*
* IO-APIC versions below 0x20 don't support EOI register.
* For the record, here is the information about various versions:
@@ -539,7 +505,7 @@ static void unmask_ioapic_irq(struct irq_data *data)
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-void native_eoi_ioapic_pin(int apic, int pin, int vector)
+static void __eoi_ioapic_pin(int apic, int pin, int vector)
{
if (mpc_ioapic_ver(apic) >= 0x20) {
io_apic_eoi(apic, vector);
@@ -551,7 +517,7 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector)
/*
* Mask the entry and change the trigger mode to edge.
*/
- entry1.mask = 1;
+ entry1.mask = IOAPIC_MASKED;
entry1.trigger = IOAPIC_EDGE;
__ioapic_write_entry(apic, pin, entry1);
@@ -563,15 +529,14 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector)
}
}
-void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
{
- struct irq_pin_list *entry;
unsigned long flags;
+ struct irq_pin_list *entry;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin)
- x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
- cfg->vector);
+ for_each_irq_pin(entry, data->irq_2_pin)
+ __eoi_ioapic_pin(entry->apic, entry->pin, vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -588,8 +553,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
* Make sure the entry is masked and re-read the contents to check
* if it is a level triggered pin and if the remote-IRR is set.
*/
- if (!entry.mask) {
- entry.mask = 1;
+ if (entry.mask == IOAPIC_UNMASKED) {
+ entry.mask = IOAPIC_MASKED;
ioapic_write_entry(apic, pin, entry);
entry = ioapic_read_entry(apic, pin);
}
@@ -602,13 +567,12 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
* doesn't clear the remote-IRR if the trigger mode is not
* set to level.
*/
- if (!entry.trigger) {
+ if (entry.trigger == IOAPIC_EDGE) {
entry.trigger = IOAPIC_LEVEL;
ioapic_write_entry(apic, pin, entry);
}
-
raw_spin_lock_irqsave(&ioapic_lock, flags);
- x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
+ __eoi_ioapic_pin(apic, pin, entry.vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -706,8 +670,8 @@ void mask_ioapic_entries(void)
struct IO_APIC_route_entry entry;
entry = ioapics[apic].saved_registers[pin];
- if (!entry.mask) {
- entry.mask = 1;
+ if (entry.mask == IOAPIC_UNMASKED) {
+ entry.mask = IOAPIC_MASKED;
ioapic_write_entry(apic, pin, entry);
}
}
@@ -809,11 +773,11 @@ static int EISA_ELCR(unsigned int irq)
#endif
-/* ISA interrupts are always polarity zero edge triggered,
+/* ISA interrupts are always active high edge triggered,
* when listed as conforming in the MP table. */
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
+#define default_ISA_trigger(idx) (IOAPIC_EDGE)
+#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH)
/* EISA interrupts are always polarity zero and can be edge or level
* trigger depending on the ELCR value. If an interrupt is listed as
@@ -823,53 +787,55 @@ static int EISA_ELCR(unsigned int irq)
#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
-/* PCI interrupts are always polarity one level triggered,
+/* PCI interrupts are always active low level triggered,
* when listed as conforming in the MP table. */
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
+#define default_PCI_trigger(idx) (IOAPIC_LEVEL)
+#define default_PCI_polarity(idx) (IOAPIC_POL_LOW)
static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
- int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
- switch (mp_irqs[idx].irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- if (test_bit(bus, mp_bus_not_pci))
- polarity = default_ISA_polarity(idx);
- else
- polarity = default_PCI_polarity(idx);
- break;
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- pr_warn("broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- pr_warn("broken BIOS!!\n");
- polarity = 1;
- break;
- }
+ switch (mp_irqs[idx].irqflag & 0x03) {
+ case 0:
+ /* conforms to spec, ie. bus-type dependent polarity */
+ if (test_bit(bus, mp_bus_not_pci))
+ return default_ISA_polarity(idx);
+ else
+ return default_PCI_polarity(idx);
+ case 1:
+ return IOAPIC_POL_HIGH;
+ case 2:
+ pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
+ case 3:
+ default: /* Pointless default required due to do gcc stupidity */
+ return IOAPIC_POL_LOW;
+ }
+}
+
+#ifdef CONFIG_EISA
+static int eisa_irq_trigger(int idx, int bus, int trigger)
+{
+ switch (mp_bus_id_to_type[bus]) {
+ case MP_BUS_PCI:
+ case MP_BUS_ISA:
+ return trigger;
+ case MP_BUS_EISA:
+ return default_EISA_trigger(idx);
}
- return polarity;
+ pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus);
+ return IOAPIC_LEVEL;
}
+#else
+static inline int eisa_irq_trigger(int idx, int bus, int trigger)
+{
+ return trigger;
+}
+#endif
static int irq_trigger(int idx)
{
@@ -879,153 +845,227 @@ static int irq_trigger(int idx)
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
- switch ((mp_irqs[idx].irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- if (test_bit(bus, mp_bus_not_pci))
- trigger = default_ISA_trigger(idx);
- else
- trigger = default_PCI_trigger(idx);
-#ifdef CONFIG_EISA
- switch (mp_bus_id_to_type[bus]) {
- case MP_BUS_ISA: /* ISA pin */
- {
- /* set before the switch */
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /* set before the switch */
- break;
- }
- default:
- {
- pr_warn("broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
+ switch ((mp_irqs[idx].irqflag >> 2) & 0x03) {
+ case 0:
+ /* conforms to spec, ie. bus-type dependent trigger mode */
+ if (test_bit(bus, mp_bus_not_pci))
+ trigger = default_ISA_trigger(idx);
+ else
+ trigger = default_PCI_trigger(idx);
+ /* Take EISA into account */
+ return eisa_irq_trigger(idx, bus, trigger);
+ case 1:
+ return IOAPIC_EDGE;
+ case 2:
+ pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
+ case 3:
+ default: /* Pointless default required due to do gcc stupidity */
+ return IOAPIC_LEVEL;
+ }
+}
+
+void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
+ int trigger, int polarity)
+{
+ init_irq_alloc_info(info, NULL);
+ info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
+ info->ioapic_node = node;
+ info->ioapic_trigger = trigger;
+ info->ioapic_polarity = polarity;
+ info->ioapic_valid = 1;
+}
+
+#ifndef CONFIG_ACPI
+int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
#endif
- break;
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- pr_warn("broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- pr_warn("broken BIOS!!\n");
- trigger = 0;
- break;
+
+static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
+ struct irq_alloc_info *src,
+ u32 gsi, int ioapic_idx, int pin)
+{
+ int trigger, polarity;
+
+ copy_irq_alloc_info(dst, src);
+ dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
+ dst->ioapic_id = mpc_ioapic_id(ioapic_idx);
+ dst->ioapic_pin = pin;
+ dst->ioapic_valid = 1;
+ if (src && src->ioapic_valid) {
+ dst->ioapic_node = src->ioapic_node;
+ dst->ioapic_trigger = src->ioapic_trigger;
+ dst->ioapic_polarity = src->ioapic_polarity;
+ } else {
+ dst->ioapic_node = NUMA_NO_NODE;
+ if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) {
+ dst->ioapic_trigger = trigger;
+ dst->ioapic_polarity = polarity;
+ } else {
+ /*
+ * PCI interrupts are always active low level
+ * triggered.
+ */
+ dst->ioapic_trigger = IOAPIC_LEVEL;
+ dst->ioapic_polarity = IOAPIC_POL_LOW;
}
}
- return trigger;
}
-static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
+static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
+{
+ return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE;
+}
+
+static void mp_register_handler(unsigned int irq, unsigned long trigger)
+{
+ irq_flow_handler_t hdl;
+ bool fasteoi;
+
+ if (trigger) {
+ irq_set_status_flags(irq, IRQ_LEVEL);
+ fasteoi = true;
+ } else {
+ irq_clear_status_flags(irq, IRQ_LEVEL);
+ fasteoi = false;
+ }
+
+ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+ __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge");
+}
+
+static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
{
+ struct mp_chip_data *data = irq_get_chip_data(irq);
+
+ /*
+ * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger
+ * and polarity attirbutes. So allow the first user to reprogram the
+ * pin with real trigger and polarity attributes.
+ */
+ if (irq < nr_legacy_irqs() && data->count == 1) {
+ if (info->ioapic_trigger != data->trigger)
+ mp_register_handler(irq, data->trigger);
+ data->entry.trigger = data->trigger = info->ioapic_trigger;
+ data->entry.polarity = data->polarity = info->ioapic_polarity;
+ }
+
+ return data->trigger == info->ioapic_trigger &&
+ data->polarity == info->ioapic_polarity;
+}
+
+static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
+ struct irq_alloc_info *info)
+{
+ bool legacy = false;
int irq = -1;
- int ioapic = (int)(long)domain->host_data;
int type = ioapics[ioapic].irqdomain_cfg.type;
switch (type) {
case IOAPIC_DOMAIN_LEGACY:
/*
- * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
- * GSIs on some weird platforms.
+ * Dynamically allocate IRQ number for non-ISA IRQs in the first
+ * 16 GSIs on some weird platforms.
*/
- if (gsi < nr_legacy_irqs())
- irq = irq_create_mapping(domain, pin);
- else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+ if (!ioapic_initialized || gsi >= nr_legacy_irqs())
irq = gsi;
+ legacy = mp_is_legacy_irq(irq);
break;
case IOAPIC_DOMAIN_STRICT:
- if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
- irq = gsi;
+ irq = gsi;
break;
case IOAPIC_DOMAIN_DYNAMIC:
- irq = irq_create_mapping(domain, pin);
break;
default:
WARN(1, "ioapic: unknown irqdomain type %d\n", type);
- break;
+ return -1;
+ }
+
+ return __irq_domain_alloc_irqs(domain, irq, 1,
+ ioapic_alloc_attr_node(info),
+ info, legacy);
+}
+
+/*
+ * Need special handling for ISA IRQs because there may be multiple IOAPIC pins
+ * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping
+ * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are
+ * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
+ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and
+ * some BIOSes may use MP Interrupt Source records to override IRQ numbers for
+ * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be
+ * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
+ */
+static int alloc_isa_irq_from_domain(struct irq_domain *domain,
+ int irq, int ioapic, int pin,
+ struct irq_alloc_info *info)
+{
+ struct mp_chip_data *data;
+ struct irq_data *irq_data = irq_get_irq_data(irq);
+ int node = ioapic_alloc_attr_node(info);
+
+ /*
+ * Legacy ISA IRQ has already been allocated, just add pin to
+ * the pin list assoicated with this IRQ and program the IOAPIC
+ * entry. The IOAPIC entry
+ */
+ if (irq_data && irq_data->parent_data) {
+ if (!mp_check_pin_attr(irq, info))
+ return -EBUSY;
+ if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
+ info->ioapic_pin))
+ return -ENOMEM;
+ } else {
+ irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true);
+ if (irq >= 0) {
+ irq_data = irq_domain_get_irq_data(domain, irq);
+ data = irq_data->chip_data;
+ data->isa_irq = true;
+ }
}
- return irq > 0 ? irq : -1;
+ return irq;
}
static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
- unsigned int flags)
+ unsigned int flags, struct irq_alloc_info *info)
{
int irq;
+ bool legacy = false;
+ struct irq_alloc_info tmp;
+ struct mp_chip_data *data;
struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
- struct mp_pin_info *info = mp_pin_info(ioapic, pin);
if (!domain)
- return -1;
+ return -ENOSYS;
- mutex_lock(&ioapic_mutex);
-
- /*
- * Don't use irqdomain to manage ISA IRQs because there may be
- * multiple IOAPIC pins sharing the same ISA IRQ number and
- * irqdomain only supports 1:1 mapping between IOAPIC pin and
- * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
- * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
- * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
- * available, and some BIOSes may use MP Interrupt Source records
- * to override IRQ numbers for PIRQs instead of reprogramming
- * the interrupt routing logic. Thus there may be multiple pins
- * sharing the same legacy IRQ number when ACPI is disabled.
- */
if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
- if (flags & IOAPIC_MAP_ALLOC) {
- if (info->count == 0 &&
- mp_irqdomain_map(domain, irq, pin) != 0)
- irq = -1;
+ legacy = mp_is_legacy_irq(irq);
+ }
- /* special handling for timer IRQ0 */
+ mutex_lock(&ioapic_mutex);
+ if (!(flags & IOAPIC_MAP_ALLOC)) {
+ if (!legacy) {
+ irq = irq_find_mapping(domain, pin);
if (irq == 0)
- info->count++;
+ irq = -ENOENT;
}
} else {
- irq = irq_find_mapping(domain, pin);
- if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
- irq = alloc_irq_from_domain(domain, gsi, pin);
- }
-
- if (flags & IOAPIC_MAP_ALLOC) {
- /* special handling for legacy IRQs */
- if (irq < nr_legacy_irqs() && info->count == 1 &&
- mp_irqdomain_map(domain, irq, pin) != 0)
- irq = -1;
-
- if (irq > 0)
- info->count++;
- else if (info->count == 0)
- info->set = 0;
+ ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin);
+ if (legacy)
+ irq = alloc_isa_irq_from_domain(domain, irq,
+ ioapic, pin, &tmp);
+ else if ((irq = irq_find_mapping(domain, pin)) == 0)
+ irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp);
+ else if (!mp_check_pin_attr(irq, &tmp))
+ irq = -EBUSY;
+ if (irq >= 0) {
+ data = irq_get_chip_data(irq);
+ data->count++;
+ }
}
-
mutex_unlock(&ioapic_mutex);
- return irq > 0 ? irq : -1;
+ return irq;
}
static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
@@ -1058,10 +1098,10 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
}
#endif
- return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL);
}
-int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
+int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info)
{
int ioapic, pin, idx;
@@ -1074,31 +1114,24 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
return -1;
- return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info);
}
void mp_unmap_irq(int irq)
{
- struct irq_data *data = irq_get_irq_data(irq);
- struct mp_pin_info *info;
- int ioapic, pin;
+ struct irq_data *irq_data = irq_get_irq_data(irq);
+ struct mp_chip_data *data;
- if (!data || !data->domain)
+ if (!irq_data || !irq_data->domain)
return;
- ioapic = (int)(long)data->domain->host_data;
- pin = (int)data->hwirq;
- info = mp_pin_info(ioapic, pin);
+ data = irq_data->chip_data;
+ if (!data || data->isa_irq)
+ return;
mutex_lock(&ioapic_mutex);
- if (--info->count == 0) {
- info->set = 0;
- if (irq < nr_legacy_irqs() &&
- ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
- mp_irqdomain_unmap(data->domain, irq);
- else
- irq_dispose_mapping(irq);
- }
+ if (--data->count == 0)
+ irq_domain_free_irqs(irq, 1);
mutex_unlock(&ioapic_mutex);
}
@@ -1165,7 +1198,7 @@ out:
}
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-static struct irq_chip ioapic_chip;
+static struct irq_chip ioapic_chip, ioapic_ir_chip;
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
@@ -1189,96 +1222,6 @@ static inline int IO_APIC_irq_trigger(int irq)
}
#endif
-static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
- unsigned long trigger)
-{
- struct irq_chip *chip = &ioapic_chip;
- irq_flow_handler_t hdl;
- bool fasteoi;
-
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL) {
- irq_set_status_flags(irq, IRQ_LEVEL);
- fasteoi = true;
- } else {
- irq_clear_status_flags(irq, IRQ_LEVEL);
- fasteoi = false;
- }
-
- if (setup_remapped_irq(irq, cfg, chip))
- fasteoi = trigger != 0;
-
- hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
- irq_set_chip_and_handler_name(irq, chip, hdl,
- fasteoi ? "fasteoi" : "edge");
-}
-
-int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
-{
- memset(entry, 0, sizeof(*entry));
-
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->dest = destination;
- entry->vector = vector;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
-
- /*
- * Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
-
- return 0;
-}
-
-static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
- struct io_apic_irq_attr *attr)
-{
- struct IO_APIC_route_entry entry;
- unsigned int dest;
-
- if (!IO_APIC_IRQ(irq))
- return;
-
- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
- return;
-
- if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
- &dest)) {
- pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
- mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
- clear_irq_vector(irq, cfg);
-
- return;
- }
-
- apic_printk(APIC_VERBOSE,KERN_DEBUG
- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i Dest:%d)\n",
- attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
- cfg->vector, irq, attr->trigger, attr->polarity, dest);
-
- if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
- mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
- clear_irq_vector(irq, cfg);
-
- return;
- }
-
- ioapic_register_intr(irq, cfg, attr->trigger);
- if (irq < nr_legacy_irqs())
- legacy_pic->mask(irq);
-
- ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
-}
-
static void __init setup_IO_APIC_irqs(void)
{
unsigned int ioapic, pin;
@@ -1298,106 +1241,41 @@ static void __init setup_IO_APIC_irqs(void)
}
}
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
- unsigned int pin, int vector)
-{
- struct IO_APIC_route_entry entry;
- unsigned int dest;
-
- memset(&entry, 0, sizeof(entry));
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
- apic->target_cpus(), &dest)))
- dest = BAD_APICID;
-
- entry.dest_mode = apic->irq_dest_mode;
- entry.mask = 0; /* don't mask IRQ for edge */
- entry.dest = dest;
- entry.delivery_mode = apic->irq_delivery_mode;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we may have a 8259A-master in AEOI mode ...
- */
- irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
- "edge");
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- ioapic_write_entry(ioapic_idx, pin, entry);
-}
-
-void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
+void ioapic_zap_locks(void)
{
- int i;
-
- pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
-
- for (i = 0; i <= nr_entries; i++) {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(apic, i);
-
- pr_debug(" %02x %02X ", i, entry.dest);
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector);
- }
+ raw_spin_lock_init(&ioapic_lock);
}
-void intel_ir_io_apic_print_entries(unsigned int apic,
- unsigned int nr_entries)
+static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
{
int i;
+ char buf[256];
+ struct IO_APIC_route_entry entry;
+ struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry;
- pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
-
+ printk(KERN_DEBUG "IOAPIC %d:\n", apic);
for (i = 0; i <= nr_entries; i++) {
- struct IR_IO_APIC_route_entry *ir_entry;
- struct IO_APIC_route_entry entry;
-
entry = ioapic_read_entry(apic, i);
-
- ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
-
- pr_debug(" %02x %04X ", i, ir_entry->index);
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %X %02X\n",
- ir_entry->format,
- ir_entry->mask,
- ir_entry->trigger,
- ir_entry->irr,
- ir_entry->polarity,
- ir_entry->delivery_status,
- ir_entry->index2,
- ir_entry->zero,
- ir_entry->vector);
+ snprintf(buf, sizeof(buf),
+ " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
+ i,
+ entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
+ entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
+ entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
+ entry.vector, entry.irr, entry.delivery_status);
+ if (ir_entry->format)
+ printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n",
+ buf, (ir_entry->index << 15) | ir_entry->index,
+ ir_entry->zero);
+ else
+ printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
+ buf,
+ entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
+ "logical " : "physical",
+ entry.dest, entry.delivery_mode);
}
}
-void ioapic_zap_locks(void)
-{
- raw_spin_lock_init(&ioapic_lock);
-}
-
static void __init print_IO_APIC(int ioapic_idx)
{
union IO_APIC_reg_00 reg_00;
@@ -1451,16 +1329,13 @@ static void __init print_IO_APIC(int ioapic_idx)
}
printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
+ io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
}
void __init print_IO_APICs(void)
{
int ioapic_idx;
- struct irq_cfg *cfg;
unsigned int irq;
- struct irq_chip *chip;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for_each_ioapic(ioapic_idx)
@@ -1480,18 +1355,20 @@ void __init print_IO_APICs(void)
printk(KERN_DEBUG "IRQ to pin mappings:\n");
for_each_active_irq(irq) {
struct irq_pin_list *entry;
+ struct irq_chip *chip;
+ struct mp_chip_data *data;
chip = irq_get_chip(irq);
- if (chip != &ioapic_chip)
+ if (chip != &ioapic_chip && chip != &ioapic_ir_chip)
continue;
-
- cfg = irq_cfg(irq);
- if (!cfg)
+ data = irq_get_chip_data(irq);
+ if (!data)
continue;
- if (list_empty(&cfg->irq_2_pin))
+ if (list_empty(&data->irq_2_pin))
continue;
+
printk(KERN_DEBUG "IRQ%d ", irq);
- for_each_irq_pin(entry, cfg->irq_2_pin)
+ for_each_irq_pin(entry, data->irq_2_pin)
pr_cont("-> %d:%d", entry->apic, entry->pin);
pr_cont("\n");
}
@@ -1564,15 +1441,12 @@ void native_disable_io_apic(void)
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest = read_apic_id();
+ entry.mask = IOAPIC_UNMASKED;
+ entry.trigger = IOAPIC_EDGE;
+ entry.polarity = IOAPIC_POL_HIGH;
+ entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
+ entry.delivery_mode = dest_ExtINT;
+ entry.dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
@@ -1582,7 +1456,6 @@ void native_disable_io_apic(void)
if (cpu_has_apic || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-
}
/*
@@ -1792,7 +1665,6 @@ static int __init timer_irq_works(void)
* This is not complete - we should be able to fake
* an edge even if it isn't on the 8259A...
*/
-
static unsigned int startup_ioapic_irq(struct irq_data *data)
{
int was_pending = 0, irq = data->irq;
@@ -1804,74 +1676,22 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
- __unmask_ioapic(irqd_cfg(data));
+ __unmask_ioapic(data->chip_data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
}
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
-
- apic = entry->apic;
- pin = entry->pin;
-
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- }
-}
-
-int native_ioapic_set_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force)
-{
- unsigned int dest, irq = data->irq;
- unsigned long flags;
- int ret;
-
- if (!config_enabled(CONFIG_SMP))
- return -EPERM;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = apic_set_affinity(data, mask, &dest);
- if (!ret) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, irqd_cfg(data));
- ret = IRQ_SET_MASK_OK_NOCOPY;
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- return ret;
-}
-
atomic_t irq_mis_count;
#ifdef CONFIG_GENERIC_PENDING_IRQ
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+static bool io_apic_level_ack_pending(struct mp_chip_data *data)
{
struct irq_pin_list *entry;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin) {
+ for_each_irq_pin(entry, data->irq_2_pin) {
unsigned int reg;
int pin;
@@ -1888,18 +1708,17 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
return false;
}
-static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+static inline bool ioapic_irqd_mask(struct irq_data *data)
{
/* If we are moving the irq we need to mask it */
if (unlikely(irqd_is_setaffinity_pending(data))) {
- mask_ioapic(cfg);
+ mask_ioapic_irq(data);
return true;
}
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data,
- struct irq_cfg *cfg, bool masked)
+static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
{
if (unlikely(masked)) {
/* Only migrate the irq if the ack has been received.
@@ -1928,31 +1747,30 @@ static inline void ioapic_irqd_unmask(struct irq_data *data,
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- if (!io_apic_level_ack_pending(cfg))
+ if (!io_apic_level_ack_pending(data->chip_data))
irq_move_masked_irq(data);
- unmask_ioapic(cfg);
+ unmask_ioapic_irq(data);
}
}
#else
-static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+static inline bool ioapic_irqd_mask(struct irq_data *data)
{
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data,
- struct irq_cfg *cfg, bool masked)
+static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
{
}
#endif
-static void ack_ioapic_level(struct irq_data *data)
+static void ioapic_ack_level(struct irq_data *irq_data)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- int i, irq = data->irq;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
unsigned long v;
bool masked;
+ int i;
irq_complete_move(cfg);
- masked = ioapic_irqd_mask(data, cfg);
+ masked = ioapic_irqd_mask(irq_data);
/*
* It appears there is an erratum which affects at least version 0x11
@@ -2004,11 +1822,49 @@ static void ack_ioapic_level(struct irq_data *data)
*/
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
+ eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
+ }
+
+ ioapic_irqd_unmask(irq_data, masked);
+}
+
+static void ioapic_ir_ack_level(struct irq_data *irq_data)
+{
+ struct mp_chip_data *data = irq_data->chip_data;
+
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ ack_APIC_irq();
+ eoi_ioapic_pin(data->entry.vector, data);
+}
- eoi_ioapic_irq(irq, cfg);
+static int ioapic_set_affinity(struct irq_data *irq_data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_data *parent = irq_data->parent_data;
+ struct mp_chip_data *data = irq_data->chip_data;
+ struct irq_pin_list *entry;
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ int ret;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
+ cfg = irqd_cfg(irq_data);
+ data->entry.dest = cfg->dest_apicid;
+ data->entry.vector = cfg->vector;
+ for_each_irq_pin(entry, data->irq_2_pin)
+ __ioapic_write_entry(entry->apic, entry->pin,
+ data->entry);
}
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- ioapic_irqd_unmask(data, cfg, masked);
+ return ret;
}
static struct irq_chip ioapic_chip __read_mostly = {
@@ -2016,10 +1872,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_startup = startup_ioapic_irq,
.irq_mask = mask_ioapic_irq,
.irq_unmask = unmask_ioapic_irq,
- .irq_ack = apic_ack_edge,
- .irq_eoi = ack_ioapic_level,
- .irq_set_affinity = native_ioapic_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_eoi = ioapic_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static struct irq_chip ioapic_ir_chip __read_mostly = {
+ .name = "IR-IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_eoi = ioapic_ir_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -2113,12 +1979,12 @@ static inline void __init unlock_ExtINT_logic(void)
memset(&entry1, 0, sizeof(entry1));
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
+ entry1.mask = IOAPIC_UNMASKED;
entry1.dest = hard_smp_processor_id();
entry1.delivery_mode = dest_ExtINT;
entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
+ entry1.trigger = IOAPIC_EDGE;
entry1.vector = 0;
ioapic_write_entry(apic, pin, entry1);
@@ -2152,6 +2018,25 @@ static int __init disable_timer_pin_setup(char *arg)
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
+static int mp_alloc_timer_irq(int ioapic, int pin)
+{
+ int irq = -1;
+ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+
+ if (domain) {
+ struct irq_alloc_info info;
+
+ ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
+ info.ioapic_id = mpc_ioapic_id(ioapic);
+ info.ioapic_pin = pin;
+ mutex_lock(&ioapic_mutex);
+ irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
+ mutex_unlock(&ioapic_mutex);
+ }
+
+ return irq;
+}
+
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -2162,7 +2047,9 @@ early_param("disable_timer_pin_1", disable_timer_pin_setup);
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_cfg(0);
+ struct irq_data *irq_data = irq_get_irq_data(0);
+ struct mp_chip_data *data = irq_data->chip_data;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -2174,7 +2061,6 @@ static inline void __init check_timer(void)
* get/set the timer IRQ vector:
*/
legacy_pic->mask(0);
- assign_irq_vector(0, cfg, apic->target_cpus());
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2215,23 +2101,21 @@ static inline void __init check_timer(void)
}
if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
+ /* Ok, does IRQ0 through the IOAPIC work? */
if (no_pin1) {
- add_pin_to_irq_node(cfg, node, apic1, pin1);
- setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+ mp_alloc_timer_irq(apic1, pin1);
} else {
- /* for edge trigger, setup_ioapic_irq already
- * leave it unmasked.
+ /*
+ * for edge trigger, it's already unmasked,
* so only need to unmask if it is level-trigger
* do we really have level trigger timer?
*/
int idx;
idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_trigger(idx))
- unmask_ioapic(cfg);
+ unmask_ioapic_irq(irq_get_chip_data(0));
}
+ irq_domain_activate_irq(irq_data);
if (timer_irq_works()) {
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -2251,8 +2135,8 @@ static inline void __init check_timer(void)
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
- setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
+ irq_domain_activate_irq(irq_data);
legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2329,36 +2213,35 @@ out:
static int mp_irqdomain_create(int ioapic)
{
- size_t size;
+ struct irq_alloc_info info;
+ struct irq_domain *parent;
int hwirqs = mp_ioapic_pin_count(ioapic);
struct ioapic *ip = &ioapics[ioapic];
struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
- size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
- ip->pin_info = kzalloc(size, GFP_KERNEL);
- if (!ip->pin_info)
- return -ENOMEM;
-
if (cfg->type == IOAPIC_DOMAIN_INVALID)
return 0;
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_IOAPIC;
+ info.ioapic_id = mpc_ioapic_id(ioapic);
+ parent = irq_remapping_get_ir_irq_domain(&info);
+ if (!parent)
+ parent = x86_vector_domain;
+
ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
(void *)(long)ioapic);
- if(!ip->irqdomain) {
- kfree(ip->pin_info);
- ip->pin_info = NULL;
+ if (!ip->irqdomain)
return -ENOMEM;
- }
+
+ ip->irqdomain->parent = parent;
if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
cfg->type == IOAPIC_DOMAIN_STRICT)
ioapic_dynirq_base = max(ioapic_dynirq_base,
gsi_cfg->gsi_end + 1);
- if (gsi_cfg->gsi_base == 0)
- irq_set_default_host(ip->irqdomain);
-
return 0;
}
@@ -2368,8 +2251,6 @@ static void ioapic_destroy_irqdomain(int idx)
irq_domain_remove(ioapics[idx].irqdomain);
ioapics[idx].irqdomain = NULL;
}
- kfree(ioapics[idx].pin_info);
- ioapics[idx].pin_info = NULL;
}
void __init setup_IO_APIC(void)
@@ -2399,20 +2280,6 @@ void __init setup_IO_APIC(void)
ioapic_initialized = 1;
}
-/*
- * Called after all the initialization is done. If we didn't find any
- * APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
- if (sis_apic_bug == -1)
- sis_apic_bug = 0;
- return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
static void resume_ioapic_id(int ioapic_idx)
{
unsigned long flags;
@@ -2451,20 +2318,6 @@ static int __init ioapic_init_ops(void)
device_initcall(ioapic_init_ops);
-static int
-io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
-{
- struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
- int ret;
-
- if (!cfg)
- return -EINVAL;
- ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
- if (!ret)
- setup_ioapic_irq(irq, cfg, attr);
- return ret;
-}
-
static int io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -2692,7 +2545,7 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- x86_io_apic_ops.set_affinity(idata, mask, false);
+ irq_set_affinity(irq, mask);
}
}
@@ -2737,7 +2590,7 @@ static struct resource * __init ioapic_setup_resources(void)
return res;
}
-void __init native_io_apic_init_mappings(void)
+void __init io_apic_init_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
@@ -2962,7 +2815,6 @@ int mp_unregister_ioapic(u32 gsi_base)
{
int ioapic, pin;
int found = 0;
- struct mp_pin_info *pin_info;
for_each_ioapic(ioapic)
if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
@@ -2975,11 +2827,17 @@ int mp_unregister_ioapic(u32 gsi_base)
}
for_each_pin(ioapic, pin) {
- pin_info = mp_pin_info(ioapic, pin);
- if (pin_info->count) {
- pr_warn("pin%d on IOAPIC%d is still in use.\n",
- pin, ioapic);
- return -EBUSY;
+ u32 gsi = mp_pin_to_gsi(ioapic, pin);
+ int irq = mp_map_gsi_to_irq(gsi, 0, NULL);
+ struct mp_chip_data *data;
+
+ if (irq >= 0) {
+ data = irq_get_chip_data(irq);
+ if (data && data->count) {
+ pr_warn("pin%d on IOAPIC%d is still in use.\n",
+ pin, ioapic);
+ return -EBUSY;
+ }
}
}
@@ -3006,108 +2864,141 @@ int mp_ioapic_registered(u32 gsi_base)
return 0;
}
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
- int ioapic, int ioapic_pin,
- int trigger, int polarity)
+static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
+ struct irq_alloc_info *info)
{
- irq_attr->ioapic = ioapic;
- irq_attr->ioapic_pin = ioapic_pin;
- irq_attr->trigger = trigger;
- irq_attr->polarity = polarity;
+ if (info && info->ioapic_valid) {
+ data->trigger = info->ioapic_trigger;
+ data->polarity = info->ioapic_polarity;
+ } else if (acpi_get_override_irq(gsi, &data->trigger,
+ &data->polarity) < 0) {
+ /* PCI interrupts are always active low level triggered. */
+ data->trigger = IOAPIC_LEVEL;
+ data->polarity = IOAPIC_POL_LOW;
+ }
}
-int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq)
+static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data,
+ struct IO_APIC_route_entry *entry)
{
- int ioapic = (int)(long)domain->host_data;
- struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
- struct io_apic_irq_attr attr;
+ memset(entry, 0, sizeof(*entry));
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->dest = cfg->dest_apicid;
+ entry->vector = cfg->vector;
+ entry->trigger = data->trigger;
+ entry->polarity = data->polarity;
+ /*
+ * Mask level triggered irqs. Edge triggered irqs are masked
+ * by the irq core code in case they fire.
+ */
+ if (data->trigger == IOAPIC_LEVEL)
+ entry->mask = IOAPIC_MASKED;
+ else
+ entry->mask = IOAPIC_UNMASKED;
+}
- /* Get default attribute if not set by caller yet */
- if (!info->set) {
- u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
+int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ int ret, ioapic, pin;
+ struct irq_cfg *cfg;
+ struct irq_data *irq_data;
+ struct mp_chip_data *data;
+ struct irq_alloc_info *info = arg;
- if (acpi_get_override_irq(gsi, &info->trigger,
- &info->polarity) < 0) {
- /*
- * PCI interrupts are always polarity one level
- * triggered.
- */
- info->trigger = 1;
- info->polarity = 1;
- }
- info->node = NUMA_NO_NODE;
+ if (!info || nr_irqs > 1)
+ return -EINVAL;
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (!irq_data)
+ return -EINVAL;
- /*
- * setup_IO_APIC_irqs() programs all legacy IRQs with default
- * trigger and polarity attributes. Don't set the flag for that
- * case so the first legacy IRQ user could reprogram the pin
- * with real trigger and polarity attributes.
- */
- if (virq >= nr_legacy_irqs() || info->count)
- info->set = 1;
- }
- set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
- info->polarity);
+ ioapic = mp_irqdomain_ioapic_idx(domain);
+ pin = info->ioapic_pin;
+ if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0)
+ return -EEXIST;
- return io_apic_setup_irq_pin(virq, info->node, &attr);
-}
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
-void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
-{
- struct irq_data *data = irq_get_irq_data(virq);
- struct irq_cfg *cfg = irq_cfg(virq);
- int ioapic = (int)(long)domain->host_data;
- int pin = (int)data->hwirq;
+ info->ioapic_entry = &data->entry;
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+ if (ret < 0) {
+ kfree(data);
+ return ret;
+ }
+
+ INIT_LIST_HEAD(&data->irq_2_pin);
+ irq_data->hwirq = info->ioapic_pin;
+ irq_data->chip = (domain->parent == x86_vector_domain) ?
+ &ioapic_chip : &ioapic_ir_chip;
+ irq_data->chip_data = data;
+ mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
+
+ cfg = irqd_cfg(irq_data);
+ add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+ if (info->ioapic_entry)
+ mp_setup_entry(cfg, data, info->ioapic_entry);
+ mp_register_handler(virq, data->trigger);
+ if (virq < nr_legacy_irqs())
+ legacy_pic->mask(virq);
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
+ ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector,
+ virq, data->trigger, data->polarity, cfg->dest_apicid);
- ioapic_mask_entry(ioapic, pin);
- __remove_pin_from_irq(cfg, ioapic, pin);
- WARN_ON(!list_empty(&cfg->irq_2_pin));
- arch_teardown_hwirq(virq);
+ return 0;
}
-int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
+void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- int ret = 0;
- int ioapic, pin;
- struct mp_pin_info *info;
+ struct irq_data *irq_data;
+ struct mp_chip_data *data;
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return -ENODEV;
-
- pin = mp_find_ioapic_pin(ioapic, gsi);
- info = mp_pin_info(ioapic, pin);
- trigger = trigger ? 1 : 0;
- polarity = polarity ? 1 : 0;
-
- mutex_lock(&ioapic_mutex);
- if (!info->set) {
- info->trigger = trigger;
- info->polarity = polarity;
- info->node = node;
- info->set = 1;
- } else if (info->trigger != trigger || info->polarity != polarity) {
- ret = -EBUSY;
+ BUG_ON(nr_irqs != 1);
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain),
+ (int)irq_data->hwirq);
+ WARN_ON(!list_empty(&data->irq_2_pin));
+ kfree(irq_data->chip_data);
}
- mutex_unlock(&ioapic_mutex);
-
- return ret;
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-/* Enable IOAPIC early just for system timer */
-void __init pre_init_apic_IRQ0(void)
+void mp_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
- struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
+ unsigned long flags;
+ struct irq_pin_list *entry;
+ struct mp_chip_data *data = irq_data->chip_data;
- printk(KERN_INFO "Early APIC setup for system timer0\n");
-#ifndef CONFIG_SMP
- physid_set_mask_of_physid(boot_cpu_physical_apicid,
- &phys_cpu_present_map);
-#endif
- setup_local_APIC();
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, data->irq_2_pin)
+ __ioapic_write_entry(entry->apic, entry->pin, data->entry);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
- io_apic_setup_irq_pin(0, 0, &attr);
- irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
- "edge");
+void mp_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ /* It won't be called for IRQ with multiple IOAPIC pins associated */
+ ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain),
+ (int)irq_data->hwirq);
+}
+
+int mp_irqdomain_ioapic_idx(struct irq_domain *domain)
+{
+ return (int)(long)domain->host_data;
}
+
+const struct irq_domain_ops mp_ioapic_irqdomain_ops = {
+ .alloc = mp_irqdomain_alloc,
+ .free = mp_irqdomain_free,
+ .activate = mp_irqdomain_activate,
+ .deactivate = mp_irqdomain_deactivate,
+};
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index d6ba2d6..1a9d735 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Convert to hierarchical irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,22 +16,23 @@
#include <linux/dmar.h>
#include <linux/hpet.h>
#include <linux/msi.h>
+#include <asm/irqdomain.h>
#include <asm/msidef.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
-void native_compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
+static struct irq_domain *msi_default_domain;
+
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_cfg *cfg = irqd_cfg(data);
msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
- msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+ msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
msg->address_lo =
MSI_ADDR_BASE_LO |
@@ -39,7 +42,7 @@ void native_compose_msi_msg(struct pci_dev *pdev,
((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_ADDR_REDIRECTION_CPU :
MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
+ MSI_ADDR_DEST_ID(cfg->dest_apicid);
msg->data =
MSI_DATA_TRIGGER_EDGE |
@@ -50,180 +53,201 @@ void native_compose_msi_msg(struct pci_dev *pdev,
MSI_DATA_VECTOR(cfg->vector);
}
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
- struct msi_msg *msg, u8 hpet_id)
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip pci_msi_controller = {
+ .name = "PCI-MSI",
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_mask = pci_msi_mask_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_compose_msi_msg = irq_msi_compose_msg,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- struct irq_cfg *cfg;
- int err;
- unsigned dest;
+ struct irq_domain *domain;
+ struct irq_alloc_info info;
- if (disable_apic)
- return -ENXIO;
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_MSI;
+ info.msi_dev = dev;
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
+ domain = irq_remapping_get_irq_domain(&info);
+ if (domain == NULL)
+ domain = msi_default_domain;
+ if (domain == NULL)
+ return -ENOSYS;
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
+ return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+}
- x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+void native_teardown_msi_irq(unsigned int irq)
+{
+ irq_domain_free_irqs(irq, 1);
+}
- return 0;
+static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
+ msi_alloc_info_t *arg)
+{
+ return arg->msi_hwirq;
}
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- struct msi_msg msg;
- unsigned int dest;
- int ret;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct msi_desc *desc = first_pci_msi_entry(pdev);
+
+ init_irq_alloc_info(arg, NULL);
+ arg->msi_dev = pdev;
+ if (desc->msi_attrib.is_msix) {
+ arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
+ } else {
+ arg->type = X86_IRQ_ALLOC_TYPE_MSI;
+ arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
+ }
- ret = apic_set_affinity(data, mask, &dest);
- if (ret)
- return ret;
+ return 0;
+}
- __get_cached_msi_msg(data->msi_desc, &msg);
+static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+ arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
+}
+
+static struct msi_domain_ops pci_msi_domain_ops = {
+ .get_hwirq = pci_msi_get_hwirq,
+ .msi_prepare = pci_msi_prepare,
+ .set_desc = pci_msi_set_desc,
+};
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+static struct msi_domain_info pci_msi_domain_info = {
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX,
+ .ops = &pci_msi_domain_ops,
+ .chip = &pci_msi_controller,
+ .handler = handle_edge_irq,
+ .handler_name = "edge",
+};
- __pci_write_msi_msg(data->msi_desc, &msg);
+void arch_init_msi_domain(struct irq_domain *parent)
+{
+ if (disable_apic)
+ return;
- return IRQ_SET_MASK_OK_NOCOPY;
+ msi_default_domain = pci_msi_create_irq_domain(NULL,
+ &pci_msi_domain_info, parent);
+ if (!msi_default_domain)
+ pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
}
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
+#ifdef CONFIG_IRQ_REMAP
+static struct irq_chip pci_msi_ir_controller = {
+ .name = "IR-PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
- .irq_ack = apic_ack_edge,
- .irq_set_affinity = msi_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- unsigned int irq_base, unsigned int irq_offset)
-{
- struct irq_chip *chip = &msi_chip;
- struct msi_msg msg;
- unsigned int irq = irq_base + irq_offset;
- int ret;
-
- ret = msi_compose_msg(dev, irq, &msg, -1);
- if (ret < 0)
- return ret;
-
- irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
- /*
- * MSI-X message is written per-IRQ, the offset is always 0.
- * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
- */
- if (!irq_offset)
- pci_write_msi_msg(irq, &msg);
+static struct msi_domain_info pci_msi_ir_domain_info = {
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+ .ops = &pci_msi_domain_ops,
+ .chip = &pci_msi_ir_controller,
+ .handler = handle_edge_irq,
+ .handler_name = "edge",
+};
- setup_remapped_irq(irq, irq_cfg(irq), chip);
+struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent)
+{
+ return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent);
+}
+#endif
- irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+#ifdef CONFIG_DMAR_TABLE
+static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ dmar_msi_write(data->irq, msg);
+}
- dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+static struct irq_chip dmar_msi_controller = {
+ .name = "DMAR-MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_set_affinity = msi_domain_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_compose_msi_msg = irq_msi_compose_msg,
+ .irq_write_msi_msg = dmar_msi_write_msg,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
- return 0;
+static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info,
+ msi_alloc_info_t *arg)
+{
+ return arg->dmar_id;
}
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+static int dmar_msi_init(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq,
+ irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
- struct msi_desc *msidesc;
- unsigned int irq;
- int node, ret;
+ irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL,
+ handle_edge_irq, arg->dmar_data, "edge");
- /* Multiple MSI vectors only supported with interrupt remapping */
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
+ return 0;
+}
- node = dev_to_node(&dev->dev);
+static struct msi_domain_ops dmar_msi_domain_ops = {
+ .get_hwirq = dmar_msi_get_hwirq,
+ .msi_init = dmar_msi_init,
+};
- list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = irq_alloc_hwirq(node);
- if (!irq)
- return -ENOSPC;
+static struct msi_domain_info dmar_msi_domain_info = {
+ .ops = &dmar_msi_domain_ops,
+ .chip = &dmar_msi_controller,
+};
- ret = setup_msi_irq(dev, msidesc, irq, 0);
- if (ret < 0) {
- irq_free_hwirq(irq);
- return ret;
- }
+static struct irq_domain *dmar_get_irq_domain(void)
+{
+ static struct irq_domain *dmar_domain;
+ static DEFINE_MUTEX(dmar_lock);
- }
- return 0;
-}
+ mutex_lock(&dmar_lock);
+ if (dmar_domain == NULL)
+ dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info,
+ x86_vector_domain);
+ mutex_unlock(&dmar_lock);
-void native_teardown_msi_irq(unsigned int irq)
-{
- irq_free_hwirq(irq);
+ return dmar_domain;
}
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+int dmar_alloc_hwirq(int id, int node, void *arg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- unsigned int dest, irq = data->irq;
- struct msi_msg msg;
- int ret;
-
- ret = apic_set_affinity(data, mask, &dest);
- if (ret)
- return ret;
+ struct irq_domain *domain = dmar_get_irq_domain();
+ struct irq_alloc_info info;
- dmar_msi_read(irq, &msg);
+ if (!domain)
+ return -1;
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_DMAR;
+ info.dmar_id = id;
+ info.dmar_data = arg;
- dmar_msi_write(irq, &msg);
-
- return IRQ_SET_MASK_OK_NOCOPY;
+ return irq_domain_alloc_irqs(domain, 1, node, &info);
}
-static struct irq_chip dmar_msi_type = {
- .name = "DMAR_MSI",
- .irq_unmask = dmar_msi_unmask,
- .irq_mask = dmar_msi_mask,
- .irq_ack = apic_ack_edge,
- .irq_set_affinity = dmar_msi_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
+void dmar_free_hwirq(int irq)
{
- int ret;
- struct msi_msg msg;
-
- ret = msi_compose_msg(NULL, irq, &msg, -1);
- if (ret < 0)
- return ret;
- dmar_msi_write(irq, &msg);
- irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
- return 0;
+ irq_domain_free_irqs(irq, 1);
}
#endif
@@ -231,56 +255,103 @@ int arch_setup_dmar_msi(unsigned int irq)
* MSI message composition
*/
#ifdef CONFIG_HPET_TIMER
+static inline int hpet_dev_id(struct irq_domain *domain)
+{
+ struct msi_domain_info *info = msi_get_domain_info(domain);
+
+ return (int)(long)info->data;
+}
-static int hpet_msi_set_affinity(struct irq_data *data,
- const struct cpumask *mask, bool force)
+static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- struct msi_msg msg;
- unsigned int dest;
- int ret;
+ hpet_msi_write(data->handler_data, msg);
+}
- ret = apic_set_affinity(data, mask, &dest);
- if (ret)
- return ret;
+static struct irq_chip hpet_msi_controller = {
+ .name = "HPET-MSI",
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_set_affinity = msi_domain_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_compose_msi_msg = irq_msi_compose_msg,
+ .irq_write_msi_msg = hpet_msi_write_msg,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
- hpet_msi_read(data->handler_data, &msg);
+static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info,
+ msi_alloc_info_t *arg)
+{
+ return arg->hpet_index;
+}
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+static int hpet_msi_init(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq,
+ irq_hw_number_t hwirq, msi_alloc_info_t *arg)
+{
+ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+ irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL,
+ handle_edge_irq, arg->hpet_data, "edge");
- hpet_msi_write(data->handler_data, &msg);
+ return 0;
+}
- return IRQ_SET_MASK_OK_NOCOPY;
+static void hpet_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq)
+{
+ irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
}
-static struct irq_chip hpet_msi_type = {
- .name = "HPET_MSI",
- .irq_unmask = hpet_msi_unmask,
- .irq_mask = hpet_msi_mask,
- .irq_ack = apic_ack_edge,
- .irq_set_affinity = hpet_msi_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+static struct msi_domain_ops hpet_msi_domain_ops = {
+ .get_hwirq = hpet_msi_get_hwirq,
+ .msi_init = hpet_msi_init,
+ .msi_free = hpet_msi_free,
+};
+
+static struct msi_domain_info hpet_msi_domain_info = {
+ .ops = &hpet_msi_domain_ops,
+ .chip = &hpet_msi_controller,
};
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+struct irq_domain *hpet_create_irq_domain(int hpet_id)
{
- struct irq_chip *chip = &hpet_msi_type;
- struct msi_msg msg;
- int ret;
+ struct irq_domain *parent;
+ struct irq_alloc_info info;
+ struct msi_domain_info *domain_info;
+
+ if (x86_vector_domain == NULL)
+ return NULL;
+
+ domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL);
+ if (!domain_info)
+ return NULL;
+
+ *domain_info = hpet_msi_domain_info;
+ domain_info->data = (void *)(long)hpet_id;
+
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_HPET;
+ info.hpet_id = hpet_id;
+ parent = irq_remapping_get_ir_irq_domain(&info);
+ if (parent == NULL)
+ parent = x86_vector_domain;
+ else
+ hpet_msi_controller.name = "IR-HPET-MSI";
+
+ return msi_create_irq_domain(NULL, domain_info, parent);
+}
- ret = msi_compose_msg(NULL, irq, &msg, id);
- if (ret < 0)
- return ret;
+int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
+ int dev_num)
+{
+ struct irq_alloc_info info;
- hpet_msi_write(irq_get_handler_data(irq), &msg);
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- setup_remapped_irq(irq, irq_cfg(irq), chip);
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_HPET;
+ info.hpet_data = dev;
+ info.hpet_id = hpet_dev_id(domain);
+ info.hpet_index = dev_num;
- irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
- return 0;
+ return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
}
#endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6cedd79..28eba2d 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Enable support of hierarchical irqdomains
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -11,15 +13,28 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/irqdomain.h>
#include <linux/slab.h>
+#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/i8259.h>
#include <asm/desc.h>
#include <asm/irq_remapping.h>
+struct apic_chip_data {
+ struct irq_cfg cfg;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
+ u8 move_in_progress : 1;
+};
+
+struct irq_domain *x86_vector_domain;
static DEFINE_RAW_SPINLOCK(vector_lock);
+static cpumask_var_t vector_cpumask;
+static struct irq_chip lapic_controller;
+#ifdef CONFIG_X86_IO_APIC
+static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
+#endif
void lock_vector_lock(void)
{
@@ -34,71 +49,59 @@ void unlock_vector_lock(void)
raw_spin_unlock(&vector_lock);
}
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
{
- return irq_get_chip_data(irq);
+ if (!irq_data)
+ return NULL;
+
+ while (irq_data->parent_data)
+ irq_data = irq_data->parent_data;
+
+ return irq_data->chip_data;
}
struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
{
- return irq_data->chip_data;
+ struct apic_chip_data *data = apic_chip_data(irq_data);
+
+ return data ? &data->cfg : NULL;
}
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
- struct irq_cfg *cfg;
+ return irqd_cfg(irq_get_irq_data(irq));
+}
- cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
- if (!cfg)
+static struct apic_chip_data *alloc_apic_chip_data(int node)
+{
+ struct apic_chip_data *data;
+
+ data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
+ if (!data)
return NULL;
- if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
- goto out_cfg;
- if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+ if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
+ goto out_data;
+ if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
goto out_domain;
-#ifdef CONFIG_X86_IO_APIC
- INIT_LIST_HEAD(&cfg->irq_2_pin);
-#endif
- return cfg;
+ return data;
out_domain:
- free_cpumask_var(cfg->domain);
-out_cfg:
- kfree(cfg);
+ free_cpumask_var(data->domain);
+out_data:
+ kfree(data);
return NULL;
}
-struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+static void free_apic_chip_data(struct apic_chip_data *data)
{
- int res = irq_alloc_desc_at(at, node);
- struct irq_cfg *cfg;
-
- if (res < 0) {
- if (res != -EEXIST)
- return NULL;
- cfg = irq_cfg(at);
- if (cfg)
- return cfg;
+ if (data) {
+ free_cpumask_var(data->domain);
+ free_cpumask_var(data->old_domain);
+ kfree(data);
}
-
- cfg = alloc_irq_cfg(at, node);
- if (cfg)
- irq_set_chip_data(at, cfg);
- else
- irq_free_desc(at);
- return cfg;
-}
-
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
-{
- if (!cfg)
- return;
- irq_set_chip_data(at, NULL);
- free_cpumask_var(cfg->domain);
- free_cpumask_var(cfg->old_domain);
- kfree(cfg);
}
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+static int __assign_irq_vector(int irq, struct apic_chip_data *d,
+ const struct cpumask *mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -114,36 +117,33 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
static int current_offset = VECTOR_OFFSET_START % 16;
int cpu, err;
- cpumask_var_t tmp_mask;
- if (cfg->move_in_progress)
+ if (d->move_in_progress)
return -EBUSY;
- if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
- return -ENOMEM;
-
/* Only try and allocate irqs on cpus that are present */
err = -ENOSPC;
- cpumask_clear(cfg->old_domain);
+ cpumask_clear(d->old_domain);
cpu = cpumask_first_and(mask, cpu_online_mask);
while (cpu < nr_cpu_ids) {
int new_cpu, vector, offset;
- apic->vector_allocation_domain(cpu, tmp_mask, mask);
+ apic->vector_allocation_domain(cpu, vector_cpumask, mask);
- if (cpumask_subset(tmp_mask, cfg->domain)) {
+ if (cpumask_subset(vector_cpumask, d->domain)) {
err = 0;
- if (cpumask_equal(tmp_mask, cfg->domain))
+ if (cpumask_equal(vector_cpumask, d->domain))
break;
/*
* New cpumask using the vector is a proper subset of
* the current in use mask. So cleanup the vector
* allocation for the members that are not used anymore.
*/
- cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
- cfg->move_in_progress =
- cpumask_intersects(cfg->old_domain, cpu_online_mask);
- cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+ cpumask_andnot(d->old_domain, d->domain,
+ vector_cpumask);
+ d->move_in_progress =
+ cpumask_intersects(d->old_domain, cpu_online_mask);
+ cpumask_and(d->domain, d->domain, vector_cpumask);
break;
}
@@ -157,16 +157,18 @@ next:
}
if (unlikely(current_vector == vector)) {
- cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
- cpumask_andnot(tmp_mask, mask, cfg->old_domain);
- cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+ cpumask_or(d->old_domain, d->old_domain,
+ vector_cpumask);
+ cpumask_andnot(vector_cpumask, mask, d->old_domain);
+ cpu = cpumask_first_and(vector_cpumask,
+ cpu_online_mask);
continue;
}
if (test_bit(vector, used_vectors))
goto next;
- for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+ for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
if (per_cpu(vector_irq, new_cpu)[vector] >
VECTOR_UNDEFINED)
goto next;
@@ -174,55 +176,73 @@ next:
/* Found one! */
current_vector = vector;
current_offset = offset;
- if (cfg->vector) {
- cpumask_copy(cfg->old_domain, cfg->domain);
- cfg->move_in_progress =
- cpumask_intersects(cfg->old_domain, cpu_online_mask);
+ if (d->cfg.vector) {
+ cpumask_copy(d->old_domain, d->domain);
+ d->move_in_progress =
+ cpumask_intersects(d->old_domain, cpu_online_mask);
}
- for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+ for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
- cfg->vector = vector;
- cpumask_copy(cfg->domain, tmp_mask);
+ d->cfg.vector = vector;
+ cpumask_copy(d->domain, vector_cpumask);
err = 0;
break;
}
- free_cpumask_var(tmp_mask);
+
+ if (!err) {
+ /* cache destination APIC IDs into cfg->dest_apicid */
+ err = apic->cpu_mask_to_apicid_and(mask, d->domain,
+ &d->cfg.dest_apicid);
+ }
return err;
}
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+static int assign_irq_vector(int irq, struct apic_chip_data *data,
+ const struct cpumask *mask)
{
int err;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, cfg, mask);
+ err = __assign_irq_vector(irq, data, mask);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
-void clear_irq_vector(int irq, struct irq_cfg *cfg)
+static int assign_irq_vector_policy(int irq, int node,
+ struct apic_chip_data *data,
+ struct irq_alloc_info *info)
+{
+ if (info && info->mask)
+ return assign_irq_vector(irq, data, info->mask);
+ if (node != NUMA_NO_NODE &&
+ assign_irq_vector(irq, data, cpumask_of_node(node)) == 0)
+ return 0;
+ return assign_irq_vector(irq, data, apic->target_cpus());
+}
+
+static void clear_irq_vector(int irq, struct apic_chip_data *data)
{
int cpu, vector;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
- BUG_ON(!cfg->vector);
+ BUG_ON(!data->cfg.vector);
- vector = cfg->vector;
- for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+ vector = data->cfg.vector;
+ for_each_cpu_and(cpu, data->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
- cfg->vector = 0;
- cpumask_clear(cfg->domain);
+ data->cfg.vector = 0;
+ cpumask_clear(data->domain);
- if (likely(!cfg->move_in_progress)) {
+ if (likely(!data->move_in_progress)) {
raw_spin_unlock_irqrestore(&vector_lock, flags);
return;
}
- for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+ for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -231,10 +251,95 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg)
break;
}
}
- cfg->move_in_progress = 0;
+ data->move_in_progress = 0;
raw_spin_unlock_irqrestore(&vector_lock, flags);
}
+void init_irq_alloc_info(struct irq_alloc_info *info,
+ const struct cpumask *mask)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = mask;
+}
+
+void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
+{
+ if (src)
+ *dst = *src;
+ else
+ memset(dst, 0, sizeof(*dst));
+}
+
+static void x86_vector_free_irqs(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *irq_data;
+ int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
+ if (irq_data && irq_data->chip_data) {
+ clear_irq_vector(virq + i, irq_data->chip_data);
+ free_apic_chip_data(irq_data->chip_data);
+#ifdef CONFIG_X86_IO_APIC
+ if (virq + i < nr_legacy_irqs())
+ legacy_irq_data[virq + i] = NULL;
+#endif
+ irq_domain_reset_irq_data(irq_data);
+ }
+ }
+}
+
+static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct apic_chip_data *data;
+ struct irq_data *irq_data;
+ int i, err;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ /* Currently vector allocator can't guarantee contiguous allocations */
+ if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
+ return -ENOSYS;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ BUG_ON(!irq_data);
+#ifdef CONFIG_X86_IO_APIC
+ if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
+ data = legacy_irq_data[virq + i];
+ else
+#endif
+ data = alloc_apic_chip_data(irq_data->node);
+ if (!data) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ irq_data->chip = &lapic_controller;
+ irq_data->chip_data = data;
+ irq_data->hwirq = virq + i;
+ err = assign_irq_vector_policy(virq, irq_data->node, data,
+ info);
+ if (err)
+ goto error;
+ }
+
+ return 0;
+
+error:
+ x86_vector_free_irqs(domain, virq, i + 1);
+ return err;
+}
+
+static const struct irq_domain_ops x86_vector_domain_ops = {
+ .alloc = x86_vector_alloc_irqs,
+ .free = x86_vector_free_irqs,
+};
+
int __init arch_probe_nr_irqs(void)
{
int nr;
@@ -258,8 +363,43 @@ int __init arch_probe_nr_irqs(void)
return nr_legacy_irqs();
}
+#ifdef CONFIG_X86_IO_APIC
+static void init_legacy_irqs(void)
+{
+ int i, node = cpu_to_node(0);
+ struct apic_chip_data *data;
+
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * ISA_IRQ_VECTOR(i) for all cpu's.
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ data = legacy_irq_data[i] = alloc_apic_chip_data(node);
+ BUG_ON(!data);
+
+ data->cfg.vector = ISA_IRQ_VECTOR(i);
+ cpumask_setall(data->domain);
+ irq_set_chip_data(i, data);
+ }
+}
+#else
+static void init_legacy_irqs(void) { }
+#endif
+
int __init arch_early_irq_init(void)
{
+ init_legacy_irqs();
+
+ x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops,
+ NULL);
+ BUG_ON(x86_vector_domain == NULL);
+ irq_set_default_host(x86_vector_domain);
+
+ arch_init_msi_domain(x86_vector_domain);
+ arch_init_htirq_domain(x86_vector_domain);
+
+ BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+
return arch_early_ioapic_init();
}
@@ -267,7 +407,7 @@ static void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
int irq, vector;
- struct irq_cfg *cfg;
+ struct apic_chip_data *data;
/*
* vector_lock will make sure that we don't run into irq vector
@@ -277,13 +417,13 @@ static void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = irq_cfg(irq);
- if (!cfg)
+ data = apic_chip_data(irq_get_irq_data(irq));
+ if (!data)
continue;
- if (!cpumask_test_cpu(cpu, cfg->domain))
+ if (!cpumask_test_cpu(cpu, data->domain))
continue;
- vector = cfg->vector;
+ vector = data->cfg.vector;
per_cpu(vector_irq, cpu)[vector] = irq;
}
/* Mark the free vectors */
@@ -292,8 +432,8 @@ static void __setup_vector_irq(int cpu)
if (irq <= VECTOR_UNDEFINED)
continue;
- cfg = irq_cfg(irq);
- if (!cpumask_test_cpu(cpu, cfg->domain))
+ data = apic_chip_data(irq_get_irq_data(irq));
+ if (!cpumask_test_cpu(cpu, data->domain))
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
}
raw_spin_unlock(&vector_lock);
@@ -314,20 +454,20 @@ void setup_vector_irq(int cpu)
* legacy vector to irq mapping:
*/
for (irq = 0; irq < nr_legacy_irqs(); irq++)
- per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+ per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq;
__setup_vector_irq(cpu);
}
-int apic_retrigger_irq(struct irq_data *data)
+static int apic_retrigger_irq(struct irq_data *irq_data)
{
- struct irq_cfg *cfg = irqd_cfg(data);
+ struct apic_chip_data *data = apic_chip_data(irq_data);
unsigned long flags;
int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
- cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
- apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+ cpu = cpumask_first_and(data->domain, cpu_online_mask);
+ apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -340,73 +480,76 @@ void apic_ack_edge(struct irq_data *data)
ack_APIC_irq();
}
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- unsigned int *dest_id)
+static int apic_set_affinity(struct irq_data *irq_data,
+ const struct cpumask *dest, bool force)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- unsigned int irq = data->irq;
- int err;
+ struct apic_chip_data *data = irq_data->chip_data;
+ int err, irq = irq_data->irq;
if (!config_enabled(CONFIG_SMP))
return -EPERM;
- if (!cpumask_intersects(mask, cpu_online_mask))
+ if (!cpumask_intersects(dest, cpu_online_mask))
return -EINVAL;
- err = assign_irq_vector(irq, cfg, mask);
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+ err = assign_irq_vector(irq, data, dest);
if (err) {
- if (assign_irq_vector(irq, cfg, data->affinity))
+ struct irq_data *top = irq_get_irq_data(irq);
+
+ if (assign_irq_vector(irq, data, top->affinity))
pr_err("Failed to recover vector for irq %d\n", irq);
return err;
}
- cpumask_copy(data->affinity, mask);
-
- return 0;
+ return IRQ_SET_MASK_OK;
}
+static struct irq_chip lapic_controller = {
+ .irq_ack = apic_ack_edge,
+ .irq_set_affinity = apic_set_affinity,
+ .irq_retrigger = apic_retrigger_irq,
+};
+
#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
+static void __send_cleanup_vector(struct apic_chip_data *data)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ for_each_cpu_and(i, data->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i),
IRQ_MOVE_CLEANUP_VECTOR);
} else {
- cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
- cfg->move_in_progress = 0;
+ data->move_in_progress = 0;
+}
+
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ struct apic_chip_data *data;
+
+ data = container_of(cfg, struct apic_chip_data, cfg);
+ if (data->move_in_progress)
+ __send_cleanup_vector(data);
}
asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
- ack_APIC_irq();
- irq_enter();
- exit_idle();
+ entering_ack_irq();
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
int irq;
unsigned int irr;
struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct apic_chip_data *data;
irq = __this_cpu_read(vector_irq[vector]);
@@ -417,8 +560,8 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
if (!desc)
continue;
- cfg = irq_cfg(irq);
- if (!cfg)
+ data = apic_chip_data(&desc->irq_data);
+ if (!data)
continue;
raw_spin_lock(&desc->lock);
@@ -427,10 +570,11 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
* Check if the irq migration is in progress. If so, we
* haven't received the cleanup request yet for this irq.
*/
- if (cfg->move_in_progress)
+ if (data->move_in_progress)
goto unlock;
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+ if (vector == data->cfg.vector &&
+ cpumask_test_cpu(me, data->domain))
goto unlock;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -450,20 +594,21 @@ unlock:
raw_spin_unlock(&desc->lock);
}
- irq_exit();
+ exiting_irq();
}
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
unsigned me;
+ struct apic_chip_data *data;
- if (likely(!cfg->move_in_progress))
+ data = container_of(cfg, struct apic_chip_data, cfg);
+ if (likely(!data->move_in_progress))
return;
me = smp_processor_id();
-
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
- send_cleanup_vector(cfg);
+ if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
+ __send_cleanup_vector(data);
}
void irq_complete_move(struct irq_cfg *cfg)
@@ -475,46 +620,11 @@ void irq_force_complete_move(int irq)
{
struct irq_cfg *cfg = irq_cfg(irq);
- if (!cfg)
- return;
-
- __irq_complete_move(cfg, cfg->vector);
+ if (cfg)
+ __irq_complete_move(cfg, cfg->vector);
}
#endif
-/*
- * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
- */
-int arch_setup_hwirq(unsigned int irq, int node)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- int ret;
-
- cfg = alloc_irq_cfg(irq, node);
- if (!cfg)
- return -ENOMEM;
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
- raw_spin_unlock_irqrestore(&vector_lock, flags);
-
- if (!ret)
- irq_set_chip_data(irq, cfg);
- else
- free_irq_cfg(irq, cfg);
- return ret;
-}
-
-void arch_teardown_hwirq(unsigned int irq)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
-
- free_remapped_irq(irq);
- clear_irq_vector(irq, cfg);
- free_irq_cfg(irq, cfg);
-}
-
static void __init print_APIC_field(int base)
{
int i;
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6fae733..3ffd925 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -21,11 +21,13 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
static bool x2apic_fadt_phys(void)
{
+#ifdef CONFIG_ACPI
if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
return true;
}
+#endif
return false;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9f6b934..8e3d22a1 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -41,6 +41,25 @@ void common(void) {
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+ BLANK();
+ OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
+ OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
+ OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
+ OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
+ OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
+ OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
+ OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
+ OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
+ OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
+
+ BLANK();
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+
+ BLANK();
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
+#endif
+
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
@@ -49,7 +68,9 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+#ifdef CONFIG_X86_32
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 47703ae..6ce3902 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -17,17 +17,6 @@ void foo(void);
void foo(void)
{
- OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
- OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
- OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
- OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
- OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
- OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
- OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
- OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
- OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
- BLANK();
-
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@@ -37,10 +26,6 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_sysenter_return, thread_info, sysenter_return);
- OFFSET(TI_cpu, thread_info, cpu);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -60,9 +45,6 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
- BLANK();
-
OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5ce6f2d..d8f42f9 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -29,27 +29,6 @@ int main(void)
BLANK();
#endif
-#ifdef CONFIG_IA32_EMULATION
- OFFSET(TI_sysenter_return, thread_info, sysenter_return);
- BLANK();
-
-#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
- ENTRY(ax);
- ENTRY(bx);
- ENTRY(cx);
- ENTRY(dx);
- ENTRY(si);
- ENTRY(di);
- ENTRY(bp);
- ENTRY(sp);
- ENTRY(ip);
- BLANK();
-#undef ENTRY
-
- OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
- BLANK();
-#endif
-
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(cx);
@@ -87,7 +66,7 @@ int main(void)
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
- DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1);
+ DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e4cf633..eb4f012 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -288,7 +288,7 @@ static int nearby_node(int apicid)
* Assumption: Number of cores in each internal node is the same.
* (2) AMD processors supporting compute units
*/
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
static void amd_get_topology(struct cpuinfo_x86 *c)
{
u32 nodes, cores_per_cu = 1;
@@ -341,7 +341,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -420,7 +420,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
static void early_init_amd_mc(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
unsigned bits, ecx;
/* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a62cf04..cc7f753 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -508,7 +508,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
void detect_ht(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
static bool printed;
@@ -844,7 +844,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
if (c->cpuid_level >= 0x00000001) {
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_HT
+# ifdef CONFIG_SMP
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
c->apicid = c->initial_apicid;
@@ -1026,7 +1026,7 @@ void enable_sep_cpu(void)
(unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
out:
put_cpu();
@@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg)
}
__setup("clearcpuid=", setup_disablecpuid);
-DEFINE_PER_CPU(unsigned long, kernel_stack) =
- (unsigned long)&init_thread_union + THREAD_SIZE;
-EXPORT_PER_CPU_SYMBOL(kernel_stack);
-
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
@@ -1208,10 +1204,10 @@ void syscall_init(void)
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
- wrmsrl(MSR_LSTAR, system_call);
+ wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
- wrmsrl(MSR_CSTAR, ia32_cstar_target);
+ wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
@@ -1220,7 +1216,7 @@ void syscall_init(void)
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, ignore_sysret);
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402..d820d8e 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
-#ifdef CONFIG_XEN_PVHVM
- &x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+ &x86_hyper_xen,
#endif
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index edcb0e2..be4febc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -654,7 +654,7 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
unsigned int cpu = c->cpu_index;
#endif
@@ -773,19 +773,19 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
if (new_l2) {
l2 = new_l2;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
per_cpu(cpu_llc_id, cpu) = l2_id;
#endif
}
if (new_l3) {
l3 = new_l3;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
per_cpu(cpu_llc_id, cpu) = l3_id;
#endif
}
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
/*
* If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
* turns means that the only possibility is SMT (as indicated in
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533..5b974c9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
int i, ret = 0;
+ char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
- if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
- MCE_PANIC_SEVERITY)
+
+ if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ *msg = tmp;
ret = 1;
+ }
}
return ret;
}
@@ -1047,6 +1050,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
char *msg = "Unknown";
u64 recover_paddr = ~0ull;
int flags = MF_ACTION_REQUIRED;
+ int lmce = 0;
prev_state = ist_enter(regs);
@@ -1074,11 +1078,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
- * Go through all the banks in exclusion of the other CPUs.
- * This way we don't report duplicated events on shared banks
- * because the first one to see it will clear it.
+ * Check if this MCE is signaled to only this logical processor
*/
- order = mce_start(&no_way_out);
+ if (m.mcgstatus & MCG_STATUS_LMCES)
+ lmce = 1;
+ else {
+ /*
+ * Go through all the banks in exclusion of the other CPUs.
+ * This way we don't report duplicated events on shared banks
+ * because the first one to see it will clear it.
+ * If this is a Local MCE, then no need to perform rendezvous.
+ */
+ order = mce_start(&no_way_out);
+ }
+
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
@@ -1155,8 +1168,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Do most of the synchronization with other CPUs.
* When there's any problem use only local no_way_out state.
*/
- if (mce_end(order) < 0)
- no_way_out = worst >= MCE_PANIC_SEVERITY;
+ if (!lmce) {
+ if (mce_end(order) < 0)
+ no_way_out = worst >= MCE_PANIC_SEVERITY;
+ } else {
+ /*
+ * Local MCE skipped calling mce_reign()
+ * If we found a fatal error, we need to panic here.
+ */
+ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+ mce_panic("Machine check from unknown source",
+ NULL, NULL);
+ }
/*
* At insane "tolerant" levels we take no action. Otherwise
@@ -1637,10 +1660,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_intel_feature_init(c);
mce_adjust_timer = cmci_intel_adjust_timer;
break;
- case X86_VENDOR_AMD:
+
+ case X86_VENDOR_AMD: {
+ u32 ebx = cpuid_ebx(0x80000007);
+
mce_amd_feature_init(c);
- mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
+ mce_flags.overflow_recov = !!(ebx & BIT(0));
+ mce_flags.succor = !!(ebx & BIT(1));
break;
+ }
+
default:
break;
}
@@ -1976,6 +2005,7 @@ void mce_disable_bank(int bank)
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs.
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -1999,6 +2029,8 @@ static int __init mcheck_enable(char *str)
cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
+ else if (!strcmp(str, "no_lmce"))
+ cfg->lmce_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
@@ -2008,11 +2040,8 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) {
- get_option(&str, &(cfg->tolerant));
- if (*str == ',') {
- ++str;
+ if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
- }
} else {
pr_info("mce argument %s ignored. Please use /sys\n", str);
return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 55ad9b3..e99b150 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,19 +1,13 @@
/*
- * (c) 2005-2012 Advanced Micro Devices, Inc.
+ * (c) 2005-2015 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
*
* Written by Jacob Shin - AMD, Inc.
- *
* Maintained by: Borislav Petkov <bp@alien8.de>
*
- * April 2006
- * - added support for AMD Family 0x10 processors
- * May 2012
- * - major scrubbing
- *
- * All MC4_MISCi registers are shared between multi-cores
+ * All MC4_MISCi registers are shared between cores on a node.
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
@@ -32,6 +26,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
@@ -47,6 +42,13 @@
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400
+/* Deferred error settings */
+#define MSR_CU_DEF_ERR 0xC0000410
+#define MASK_DEF_LVTOFF 0x000000F0
+#define MASK_DEF_INT_TYPE 0x00000006
+#define DEF_LVT_OFF 0x2
+#define DEF_INT_TYPE_APIC 0x2
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void);
+static void amd_deferred_error_interrupt(void);
+
+static void default_deferred_error_interrupt(void)
+{
+ pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
+}
+void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
/*
* CPU Initialization
@@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
threshold_restart_bank(&tr);
};
-static int setup_APIC_mce(int reserved, int new)
+static int setup_APIC_mce_threshold(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
APIC_EILVT_MSG_FIX, 0))
@@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
return reserved;
}
+static int setup_APIC_deferred_error(int reserved, int new)
+{
+ if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
+ APIC_EILVT_MSG_FIX, 0))
+ return new;
+
+ return reserved;
+}
+
+static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
+{
+ u32 low = 0, high = 0;
+ int def_offset = -1, def_new;
+
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
+ return;
+
+ def_new = (low & MASK_DEF_LVTOFF) >> 4;
+ if (!(low & MASK_DEF_LVTOFF)) {
+ pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
+ def_new = DEF_LVT_OFF;
+ low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
+ }
+
+ def_offset = setup_APIC_deferred_error(def_offset, def_new);
+ if ((def_offset == def_new) &&
+ (deferred_error_int_vector != amd_deferred_error_interrupt))
+ deferred_error_int_vector = amd_deferred_error_interrupt;
+
+ low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+ wrmsr(MSR_CU_DEF_ERR, low, high);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20;
- offset = setup_APIC_mce(offset, new);
+ offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) &&
(mce_threshold_vector != amd_threshold_interrupt))
@@ -262,6 +304,73 @@ init:
mce_threshold_block_init(&b, offset);
}
}
+
+ if (mce_flags.succor)
+ deferred_error_interrupt_enable(c);
+}
+
+static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+{
+ struct mce m;
+ u64 status;
+
+ rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ if (!(status & MCI_STATUS_VAL))
+ return;
+
+ mce_setup(&m);
+
+ m.status = status;
+ m.bank = bank;
+
+ if (threshold_err)
+ m.misc = misc;
+
+ if (m.status & MCI_STATUS_ADDRV)
+ rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+
+ mce_log(&m);
+ wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+}
+
+static inline void __smp_deferred_error_interrupt(void)
+{
+ inc_irq_stat(irq_deferred_error_count);
+ deferred_error_int_vector();
+}
+
+asmlinkage __visible void smp_deferred_error_interrupt(void)
+{
+ entering_irq();
+ __smp_deferred_error_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+{
+ entering_irq();
+ trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
+ __smp_deferred_error_interrupt();
+ trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
+ exiting_ack_irq();
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
+{
+ u64 status;
+ unsigned int bank;
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+
+ if (!(status & MCI_STATUS_VAL) ||
+ !(status & MCI_STATUS_DEFERRED))
+ continue;
+
+ __log_error(bank, false, 0);
+ break;
+ }
}
/*
@@ -273,12 +382,12 @@ init:
* the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number.
*/
+
static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
int cpu = smp_processor_id();
unsigned int bank, block;
- struct mce m;
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
return;
log:
- mce_setup(&m);
- rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
- if (!(m.status & MCI_STATUS_VAL))
- return;
- m.misc = ((u64)high << 32) | low;
- m.bank = bank;
- mce_log(&m);
-
- wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+ __log_error(bank, true, ((u64)high << 32) | low);
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf..844f56c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}
+static bool lmce_supported(void)
+{
+ u64 tmp;
+
+ if (mca_cfg.lmce_disabled)
+ return false;
+
+ rdmsrl(MSR_IA32_MCG_CAP, tmp);
+
+ /*
+ * LMCE depends on recovery support in the processor. Hence both
+ * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
+ */
+ if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
+ (MCG_SER_P | MCG_LMCE_P))
+ return false;
+
+ /*
+ * BIOS should indicate support for LMCE by setting bit 20 in
+ * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
+ * generate a #GP fault.
+ */
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
+ if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
+ (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
+ return true;
+
+ return false;
+}
+
bool mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,8 +435,22 @@ static void intel_init_cmci(void)
cmci_recheck();
}
+void intel_init_lmce(void)
+{
+ u64 val;
+
+ if (!lmce_supported())
+ return;
+
+ rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+
+ if (!(val & MCG_EXT_CTL_LMCE_EN))
+ wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
+ intel_init_lmce();
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 939155f..aad4bd8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -39,14 +39,12 @@ void hyperv_vector_handler(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
- exit_idle();
-
+ entering_irq();
inc_irq_stat(irq_hv_callback_count);
if (vmbus_handler)
vmbus_handler();
- irq_exit();
+ exiting_irq();
set_irq_regs(old_regs);
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 5f90b85..70d7c93 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
continue;
base = range_state[i].base_pfn;
if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
- (mtrr_state.enabled & 1)) {
+ (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
+ (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
printk(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7d74f7b..3b533cf 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -102,59 +102,76 @@ static int check_type_overlap(u8 *prev, u8 *curr)
return 0;
}
-/*
- * Error/Semi-error returns:
- * 0xFF - when MTRR is not enabled
- * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
- * corresponds only to [start:*partial_end].
- * Caller has to lookup again for [*partial_end:end].
+/**
+ * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries
+ *
+ * Return the MTRR fixed memory type of 'start'.
+ *
+ * MTRR fixed entries are divided into the following ways:
+ * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges
+ * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges
+ * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges
+ *
+ * Return Values:
+ * MTRR_TYPE_(type) - Matched memory type
+ * MTRR_TYPE_INVALID - Unmatched
+ */
+static u8 mtrr_type_lookup_fixed(u64 start, u64 end)
+{
+ int idx;
+
+ if (start >= 0x100000)
+ return MTRR_TYPE_INVALID;
+
+ /* 0x0 - 0x7FFFF */
+ if (start < 0x80000) {
+ idx = 0;
+ idx += (start >> 16);
+ return mtrr_state.fixed_ranges[idx];
+ /* 0x80000 - 0xBFFFF */
+ } else if (start < 0xC0000) {
+ idx = 1 * 8;
+ idx += ((start - 0x80000) >> 14);
+ return mtrr_state.fixed_ranges[idx];
+ }
+
+ /* 0xC0000 - 0xFFFFF */
+ idx = 3 * 8;
+ idx += ((start - 0xC0000) >> 12);
+ return mtrr_state.fixed_ranges[idx];
+}
+
+/**
+ * mtrr_type_lookup_variable - look up memory type in MTRR variable entries
+ *
+ * Return Value:
+ * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched)
+ *
+ * Output Arguments:
+ * repeat - Set to 1 when [start:end] spanned across MTRR range and type
+ * returned corresponds only to [start:*partial_end]. Caller has
+ * to lookup again for [*partial_end:end].
+ *
+ * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
+ * region is fully covered by a single MTRR entry or the default
+ * type.
*/
-static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
+static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
+ int *repeat, u8 *uniform)
{
int i;
u64 base, mask;
u8 prev_match, curr_match;
*repeat = 0;
- if (!mtrr_state_set)
- return 0xFF;
-
- if (!mtrr_state.enabled)
- return 0xFF;
+ *uniform = 1;
- /* Make end inclusive end, instead of exclusive */
+ /* Make end inclusive instead of exclusive */
end--;
- /* Look in fixed ranges. Just return the type as per start */
- if (mtrr_state.have_fixed && (start < 0x100000)) {
- int idx;
-
- if (start < 0x80000) {
- idx = 0;
- idx += (start >> 16);
- return mtrr_state.fixed_ranges[idx];
- } else if (start < 0xC0000) {
- idx = 1 * 8;
- idx += ((start - 0x80000) >> 14);
- return mtrr_state.fixed_ranges[idx];
- } else if (start < 0x1000000) {
- idx = 3 * 8;
- idx += ((start - 0xC0000) >> 12);
- return mtrr_state.fixed_ranges[idx];
- }
- }
-
- /*
- * Look in variable ranges
- * Look of multiple ranges matching this address and pick type
- * as per MTRR precedence
- */
- if (!(mtrr_state.enabled & 2))
- return mtrr_state.def_type;
-
- prev_match = 0xFF;
+ prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) {
- unsigned short start_state, end_state;
+ unsigned short start_state, end_state, inclusive;
if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
continue;
@@ -166,20 +183,29 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
start_state = ((start & mask) == (base & mask));
end_state = ((end & mask) == (base & mask));
+ inclusive = ((start < base) && (end > base));
- if (start_state != end_state) {
+ if ((start_state != end_state) || inclusive) {
/*
* We have start:end spanning across an MTRR.
- * We split the region into
- * either
- * (start:mtrr_end) (mtrr_end:end)
- * or
- * (start:mtrr_start) (mtrr_start:end)
+ * We split the region into either
+ *
+ * - start_state:1
+ * (start:mtrr_end)(mtrr_end:end)
+ * - end_state:1
+ * (start:mtrr_start)(mtrr_start:end)
+ * - inclusive:1
+ * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end)
+ *
* depending on kind of overlap.
- * Return the type for first region and a pointer to
- * the start of second region so that caller will
- * lookup again on the second region.
- * Note: This way we handle multiple overlaps as well.
+ *
+ * Return the type of the first region and a pointer
+ * to the start of next region so that caller will be
+ * advised to lookup again after having adjusted start
+ * and end.
+ *
+ * Note: This way we handle overlaps with multiple
+ * entries and the default type properly.
*/
if (start_state)
*partial_end = base + get_mtrr_size(mask);
@@ -193,59 +219,94 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
end = *partial_end - 1; /* end is inclusive */
*repeat = 1;
+ *uniform = 0;
}
if ((start & mask) != (base & mask))
continue;
curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
- if (prev_match == 0xFF) {
+ if (prev_match == MTRR_TYPE_INVALID) {
prev_match = curr_match;
continue;
}
+ *uniform = 0;
if (check_type_overlap(&prev_match, &curr_match))
return curr_match;
}
- if (mtrr_tom2) {
- if (start >= (1ULL<<32) && (end < mtrr_tom2))
- return MTRR_TYPE_WRBACK;
- }
-
- if (prev_match != 0xFF)
+ if (prev_match != MTRR_TYPE_INVALID)
return prev_match;
return mtrr_state.def_type;
}
-/*
- * Returns the effective MTRR type for the region
- * Error return:
- * 0xFF - when MTRR is not enabled
+/**
+ * mtrr_type_lookup - look up memory type in MTRR
+ *
+ * Return Values:
+ * MTRR_TYPE_(type) - The effective MTRR type for the region
+ * MTRR_TYPE_INVALID - MTRR is disabled
+ *
+ * Output Argument:
+ * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
+ * region is fully covered by a single MTRR entry or the default
+ * type.
*/
-u8 mtrr_type_lookup(u64 start, u64 end)
+u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
{
- u8 type, prev_type;
+ u8 type, prev_type, is_uniform = 1, dummy;
int repeat;
u64 partial_end;
- type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+ if (!mtrr_state_set)
+ return MTRR_TYPE_INVALID;
+
+ if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED))
+ return MTRR_TYPE_INVALID;
+
+ /*
+ * Look up the fixed ranges first, which take priority over
+ * the variable ranges.
+ */
+ if ((start < 0x100000) &&
+ (mtrr_state.have_fixed) &&
+ (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
+ is_uniform = 0;
+ type = mtrr_type_lookup_fixed(start, end);
+ goto out;
+ }
+
+ /*
+ * Look up the variable ranges. Look of multiple ranges matching
+ * this address and pick type as per MTRR precedence.
+ */
+ type = mtrr_type_lookup_variable(start, end, &partial_end,
+ &repeat, &is_uniform);
/*
* Common path is with repeat = 0.
* However, we can have cases where [start:end] spans across some
- * MTRR range. Do repeated lookups for that case here.
+ * MTRR ranges and/or the default type. Do repeated lookups for
+ * that case here.
*/
while (repeat) {
prev_type = type;
start = partial_end;
- type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+ is_uniform = 0;
+ type = mtrr_type_lookup_variable(start, end, &partial_end,
+ &repeat, &dummy);
if (check_type_overlap(&prev_type, &type))
- return type;
+ goto out;
}
+ if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2))
+ type = MTRR_TYPE_WRBACK;
+
+out:
+ *uniform = is_uniform;
return type;
}
@@ -347,7 +408,9 @@ static void __init print_mtrr_state(void)
mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
pr_debug("MTRR fixed ranges %sabled:\n",
- mtrr_state.enabled & 1 ? "en" : "dis");
+ ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
+ (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
+ "en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000,
@@ -360,7 +423,7 @@ static void __init print_mtrr_state(void)
print_fixed_last();
}
pr_debug("MTRR variable ranges %sabled:\n",
- mtrr_state.enabled & 2 ? "en" : "dis");
+ mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
@@ -382,7 +445,7 @@ static void __init print_mtrr_state(void)
}
/* Grab all of the MTRR state for this CPU into *state */
-void __init get_mtrr_state(void)
+bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
unsigned long flags;
@@ -426,6 +489,8 @@ void __init get_mtrr_state(void)
post_set();
local_irq_restore(flags);
+
+ return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
/* Some BIOS's are messed up and don't set all MTRRs the same! */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ea5f363..e7ed0d8 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,6 +59,12 @@
#define MTRR_TO_PHYS_WC_OFFSET 1000
u32 num_var_ranges;
+static bool __mtrr_enabled;
+
+static bool mtrr_enabled(void)
+{
+ return __mtrr_enabled;
+}
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
@@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
int i, replace, error;
mtrr_type ltype;
- if (!mtrr_if)
+ if (!mtrr_enabled())
return -ENXIO;
error = mtrr_if->validate_add_page(base, size, type);
@@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size)
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
bool increment)
{
+ if (!mtrr_enabled())
+ return -ENODEV;
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
@@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
unsigned long lbase, lsize;
int error = -EINVAL;
- if (!mtrr_if)
- return -ENXIO;
+ if (!mtrr_enabled())
+ return -ENODEV;
max = num_var_ranges;
/* No CPU hotplug when we change MTRR entries */
@@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
*/
int mtrr_del(int reg, unsigned long base, unsigned long size)
{
+ if (!mtrr_enabled())
+ return -ENODEV;
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
@@ -538,6 +548,9 @@ EXPORT_SYMBOL(mtrr_del);
* attempts to add a WC MTRR covering size bytes starting at base and
* logs an error if this fails.
*
+ * The called should provide a power of two size on an equivalent
+ * power of two boundary.
+ *
* Drivers must store the return value to pass to mtrr_del_wc_if_needed,
* but drivers should not try to interpret that return value.
*/
@@ -545,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size)
{
int ret;
- if (pat_enabled)
+ if (pat_enabled() || !mtrr_enabled())
return 0; /* Success! (We don't need to do anything.) */
ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
@@ -577,7 +590,7 @@ void arch_phys_wc_del(int handle)
EXPORT_SYMBOL(arch_phys_wc_del);
/*
- * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
+ * arch_phys_wc_index - translates arch_phys_wc_add's return value
* @handle: Return value from arch_phys_wc_add
*
* This will turn the return value from arch_phys_wc_add into an mtrr
@@ -587,14 +600,14 @@ EXPORT_SYMBOL(arch_phys_wc_del);
* in printk line. Alas there is an illegitimate use in some ancient
* drm ioctls.
*/
-int phys_wc_to_mtrr_index(int handle)
+int arch_phys_wc_index(int handle)
{
if (handle < MTRR_TO_PHYS_WC_OFFSET)
return -1;
else
return handle - MTRR_TO_PHYS_WC_OFFSET;
}
-EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
+EXPORT_SYMBOL_GPL(arch_phys_wc_index);
/*
* HACK ALERT!
@@ -734,10 +747,12 @@ void __init mtrr_bp_init(void)
}
if (mtrr_if) {
+ __mtrr_enabled = true;
set_num_var_ranges();
init_table();
if (use_intel()) {
- get_mtrr_state();
+ /* BIOS may override */
+ __mtrr_enabled = get_mtrr_state();
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
@@ -745,10 +760,16 @@ void __init mtrr_bp_init(void)
}
}
}
+
+ if (!mtrr_enabled())
+ pr_info("MTRR: Disabled\n");
}
void mtrr_ap_init(void)
{
+ if (!mtrr_enabled())
+ return;
+
if (!use_intel() || mtrr_aps_delayed_init)
return;
/*
@@ -774,6 +795,9 @@ void mtrr_save_state(void)
{
int first_cpu;
+ if (!mtrr_enabled())
+ return;
+
get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
@@ -782,6 +806,8 @@ void mtrr_save_state(void)
void set_mtrr_aps_delayed_init(void)
{
+ if (!mtrr_enabled())
+ return;
if (!use_intel())
return;
@@ -793,7 +819,7 @@ void set_mtrr_aps_delayed_init(void)
*/
void mtrr_aps_init(void)
{
- if (!use_intel())
+ if (!use_intel() || !mtrr_enabled())
return;
/*
@@ -810,7 +836,7 @@ void mtrr_aps_init(void)
void mtrr_bp_restore(void)
{
- if (!use_intel())
+ if (!use_intel() || !mtrr_enabled())
return;
mtrr_if->set_all();
@@ -818,7 +844,7 @@ void mtrr_bp_restore(void)
static int __init mtrr_init_finialize(void)
{
- if (!mtrr_if)
+ if (!mtrr_enabled())
return 0;
if (use_intel()) {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index df5e41f..951884d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
-void get_mtrr_state(void);
+bool get_mtrr_state(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb..3998131 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
- [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -2533,34 +2532,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
-static __initconst const struct x86_pmu core_pmu = {
- .name = "core",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
- .enable_all = core_pmu_enable_all,
- .enable = core_pmu_enable_event,
- .disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = intel_pmu_event_map,
- .max_events = ARRAY_SIZE(intel_perfmon_event_map),
- .apic = 1,
- /*
- * Intel PMCs cannot be accessed sanely above 32 bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic event period:
- */
- .max_period = (1ULL << 31) - 1,
- .get_event_constraints = intel_get_event_constraints,
- .put_event_constraints = intel_put_event_constraints,
- .event_constraints = intel_core_event_constraints,
- .guest_get_msrs = core_guest_get_msrs,
- .format_attrs = intel_arch_formats_attr,
- .events_sysfs_show = intel_event_sysfs_show,
-};
-
struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
@@ -2743,6 +2714,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};
+static __initconst const struct x86_pmu core_pmu = {
+ .name = "core",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = core_pmu_enable_all,
+ .enable = core_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = intel_pmu_event_map,
+ .max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
+ /*
+ * Intel PMCs cannot be accessed sanely above 32-bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic event period:
+ */
+ .max_period = (1ULL<<31) - 1,
+ .get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .event_constraints = intel_core_event_constraints,
+ .guest_get_msrs = core_guest_get_msrs,
+ .format_attrs = intel_arch_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
+ /*
+ * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+ * together with PMU version 1 and thus be using core_pmu with
+ * shared_regs. We need following callbacks here to allocate
+ * it properly.
+ */
+ .cpu_prepare = intel_pmu_cpu_prepare,
+ .cpu_starting = intel_pmu_cpu_starting,
+ .cpu_dying = intel_pmu_cpu_dying,
+};
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 999289b9..358c54a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
+ case 61: /* Broadwell */
rapl_cntr_mask = RAPL_IDX_HSW;
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015..4562e9e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,13 @@
/* Nehalem/SandBridge/Haswell uncore support */
#include "perf_event_intel_uncore.h"
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
@@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
+ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
{ /* end marker */ }
};
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c76d3e3..e068d66 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -22,6 +22,7 @@
#include <linux/elfcore.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 6367a78..5ee7718 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -4,7 +4,6 @@
#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/io.h>
-#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/of.h>
@@ -17,6 +16,7 @@
#include <linux/of_pci.h>
#include <linux/initrd.h>
+#include <asm/irqdomain.h>
#include <asm/hpet.h>
#include <asm/apic.h>
#include <asm/pci_x86.h>
@@ -196,38 +196,31 @@ static struct of_ioapic_type of_ioapic_type[] =
},
};
-static int ioapic_xlate(struct irq_domain *domain,
- struct device_node *controller,
- const u32 *intspec, u32 intsize,
- irq_hw_number_t *out_hwirq, u32 *out_type)
+static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
+ struct of_phandle_args *irq_data = (void *)arg;
struct of_ioapic_type *it;
- u32 line, idx, gsi;
+ struct irq_alloc_info tmp;
- if (WARN_ON(intsize < 2))
+ if (WARN_ON(irq_data->args_count < 2))
return -EINVAL;
-
- line = intspec[0];
-
- if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
+ if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
- it = &of_ioapic_type[intspec[1]];
+ it = &of_ioapic_type[irq_data->args[1]];
+ ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
+ tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
+ tmp.ioapic_pin = irq_data->args[0];
- idx = (u32)(long)domain->host_data;
- gsi = mp_pin_to_gsi(idx, line);
- if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0)))
- return -EBUSY;
-
- *out_hwirq = line;
- *out_type = it->out_type;
- return 0;
+ return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
}
-const struct irq_domain_ops ioapic_irq_domain_ops = {
- .map = mp_irqdomain_map,
- .unmap = mp_irqdomain_unmap,
- .xlate = ioapic_xlate,
+static const struct irq_domain_ops ioapic_irq_domain_ops = {
+ .alloc = dt_irqdomain_alloc,
+ .free = mp_irqdomain_free,
+ .activate = mp_irqdomain_activate,
+ .deactivate = mp_irqdomain_deactivate,
};
static void __init dtb_add_ioapic(struct device_node *dn)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
deleted file mode 100644
index 1c30976..0000000
--- a/arch/x86/kernel/entry_32.S
+++ /dev/null
@@ -1,1401 +0,0 @@
-/*
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- * I changed all the .align's to 4 (16 byte alignment), as that's faster
- * on a 486.
- *
- * Stack layout in 'syscall_exit':
- * ptrace needs to have all regs on the stack.
- * if the order here is changed, it needs to be
- * updated in fork.c:copy_process, signal.c:do_signal,
- * ptrace.c and ptrace.h
- *
- * 0(%esp) - %ebx
- * 4(%esp) - %ecx
- * 8(%esp) - %edx
- * C(%esp) - %esi
- * 10(%esp) - %edi
- * 14(%esp) - %ebp
- * 18(%esp) - %eax
- * 1C(%esp) - %ds
- * 20(%esp) - %es
- * 24(%esp) - %fs
- * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
- * 2C(%esp) - orig_eax
- * 30(%esp) - %eip
- * 34(%esp) - %cs
- * 38(%esp) - %eflags
- * 3C(%esp) - %oldesp
- * 40(%esp) - %oldss
- *
- * "current" is in register %ebx during any slow entries.
- */
-
-#include <linux/linkage.h>
-#include <linux/err.h>
-#include <asm/thread_info.h>
-#include <asm/irqflags.h>
-#include <asm/errno.h>
-#include <asm/segment.h>
-#include <asm/smp.h>
-#include <asm/page_types.h>
-#include <asm/percpu.h>
-#include <asm/dwarf2.h>
-#include <asm/processor-flags.h>
-#include <asm/ftrace.h>
-#include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative-asm.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE 0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-#define sysenter_audit syscall_trace_entry
-#define sysexit_audit syscall_exit_work
-#endif
-
- .section .entry.text, "ax"
-
-/*
- * We use macros for low-level operations which need to be overridden
- * for paravirtualization. The following will never clobber any registers:
- * INTERRUPT_RETURN (aka. "iret")
- * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
- *
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
- * Allowing a register to be clobbered can shrink the paravirt replacement
- * enough to patch inline, increasing performance.
- */
-
-#ifdef CONFIG_PREEMPT
-#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
-#else
-#define preempt_stop(clobbers)
-#define resume_kernel restore_all
-#endif
-
-.macro TRACE_IRQS_IRET
-#ifdef CONFIG_TRACE_IRQFLAGS
- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?
- jz 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-/*
- * User gs save/restore
- *
- * %gs is used for userland TLS and kernel only uses it for stack
- * canary which is required to be at %gs:20 by gcc. Read the comment
- * at the top of stackprotector.h for more info.
- *
- * Local labels 98 and 99 are used.
- */
-#ifdef CONFIG_X86_32_LAZY_GS
-
- /* unfortunately push/pop can't be no-op */
-.macro PUSH_GS
- pushl_cfi $0
-.endm
-.macro POP_GS pop=0
- addl $(4 + \pop), %esp
- CFI_ADJUST_CFA_OFFSET -(4 + \pop)
-.endm
-.macro POP_GS_EX
-.endm
-
- /* all the rest are no-op */
-.macro PTGS_TO_GS
-.endm
-.macro PTGS_TO_GS_EX
-.endm
-.macro GS_TO_REG reg
-.endm
-.macro REG_TO_PTGS reg
-.endm
-.macro SET_KERNEL_GS reg
-.endm
-
-#else /* CONFIG_X86_32_LAZY_GS */
-
-.macro PUSH_GS
- pushl_cfi %gs
- /*CFI_REL_OFFSET gs, 0*/
-.endm
-
-.macro POP_GS pop=0
-98: popl_cfi %gs
- /*CFI_RESTORE gs*/
- .if \pop <> 0
- add $\pop, %esp
- CFI_ADJUST_CFA_OFFSET -\pop
- .endif
-.endm
-.macro POP_GS_EX
-.pushsection .fixup, "ax"
-99: movl $0, (%esp)
- jmp 98b
-.popsection
- _ASM_EXTABLE(98b,99b)
-.endm
-
-.macro PTGS_TO_GS
-98: mov PT_GS(%esp), %gs
-.endm
-.macro PTGS_TO_GS_EX
-.pushsection .fixup, "ax"
-99: movl $0, PT_GS(%esp)
- jmp 98b
-.popsection
- _ASM_EXTABLE(98b,99b)
-.endm
-
-.macro GS_TO_REG reg
- movl %gs, \reg
- /*CFI_REGISTER gs, \reg*/
-.endm
-.macro REG_TO_PTGS reg
- movl \reg, PT_GS(%esp)
- /*CFI_REL_OFFSET gs, PT_GS*/
-.endm
-.macro SET_KERNEL_GS reg
- movl $(__KERNEL_STACK_CANARY), \reg
- movl \reg, %gs
-.endm
-
-#endif /* CONFIG_X86_32_LAZY_GS */
-
-.macro SAVE_ALL
- cld
- PUSH_GS
- pushl_cfi %fs
- /*CFI_REL_OFFSET fs, 0;*/
- pushl_cfi %es
- /*CFI_REL_OFFSET es, 0;*/
- pushl_cfi %ds
- /*CFI_REL_OFFSET ds, 0;*/
- pushl_cfi %eax
- CFI_REL_OFFSET eax, 0
- pushl_cfi %ebp
- CFI_REL_OFFSET ebp, 0
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %edx
- CFI_REL_OFFSET edx, 0
- pushl_cfi %ecx
- CFI_REL_OFFSET ecx, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
- movl $(__USER_DS), %edx
- movl %edx, %ds
- movl %edx, %es
- movl $(__KERNEL_PERCPU), %edx
- movl %edx, %fs
- SET_KERNEL_GS %edx
-.endm
-
-.macro RESTORE_INT_REGS
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %ecx
- CFI_RESTORE ecx
- popl_cfi %edx
- CFI_RESTORE edx
- popl_cfi %esi
- CFI_RESTORE esi
- popl_cfi %edi
- CFI_RESTORE edi
- popl_cfi %ebp
- CFI_RESTORE ebp
- popl_cfi %eax
- CFI_RESTORE eax
-.endm
-
-.macro RESTORE_REGS pop=0
- RESTORE_INT_REGS
-1: popl_cfi %ds
- /*CFI_RESTORE ds;*/
-2: popl_cfi %es
- /*CFI_RESTORE es;*/
-3: popl_cfi %fs
- /*CFI_RESTORE fs;*/
- POP_GS \pop
-.pushsection .fixup, "ax"
-4: movl $0, (%esp)
- jmp 1b
-5: movl $0, (%esp)
- jmp 2b
-6: movl $0, (%esp)
- jmp 3b
-.popsection
- _ASM_EXTABLE(1b,4b)
- _ASM_EXTABLE(2b,5b)
- _ASM_EXTABLE(3b,6b)
- POP_GS_EX
-.endm
-
-.macro RING0_INT_FRAME
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA esp, 3*4
- /*CFI_OFFSET cs, -2*4;*/
- CFI_OFFSET eip, -3*4
-.endm
-
-.macro RING0_EC_FRAME
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA esp, 4*4
- /*CFI_OFFSET cs, -2*4;*/
- CFI_OFFSET eip, -3*4
-.endm
-
-.macro RING0_PTREGS_FRAME
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
- /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
- CFI_OFFSET eip, PT_EIP-PT_OLDESP
- /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
- /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
- CFI_OFFSET eax, PT_EAX-PT_OLDESP
- CFI_OFFSET ebp, PT_EBP-PT_OLDESP
- CFI_OFFSET edi, PT_EDI-PT_OLDESP
- CFI_OFFSET esi, PT_ESI-PT_OLDESP
- CFI_OFFSET edx, PT_EDX-PT_OLDESP
- CFI_OFFSET ecx, PT_ECX-PT_OLDESP
- CFI_OFFSET ebx, PT_EBX-PT_OLDESP
-.endm
-
-ENTRY(ret_from_fork)
- CFI_STARTPROC
- pushl_cfi %eax
- call schedule_tail
- GET_THREAD_INFO(%ebp)
- popl_cfi %eax
- pushl_cfi $0x0202 # Reset kernel eflags
- popfl_cfi
- jmp syscall_exit
- CFI_ENDPROC
-END(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
- CFI_STARTPROC
- pushl_cfi %eax
- call schedule_tail
- GET_THREAD_INFO(%ebp)
- popl_cfi %eax
- pushl_cfi $0x0202 # Reset kernel eflags
- popfl_cfi
- movl PT_EBP(%esp),%eax
- call *PT_EBX(%esp)
- movl $0,PT_EAX(%esp)
- jmp syscall_exit
- CFI_ENDPROC
-ENDPROC(ret_from_kernel_thread)
-
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
- # userspace resumption stub bypassing syscall exit tracing
- ALIGN
- RING0_PTREGS_FRAME
-ret_from_exception:
- preempt_stop(CLBR_ANY)
-ret_from_intr:
- GET_THREAD_INFO(%ebp)
-#ifdef CONFIG_VM86
- movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
- movb PT_CS(%esp), %al
- andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
-#else
- /*
- * We can be coming here from child spawned by kernel_thread().
- */
- movl PT_CS(%esp), %eax
- andl $SEGMENT_RPL_MASK, %eax
-#endif
- cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
-
-ENTRY(resume_userspace)
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
- # int/exception return?
- jne work_pending
- jmp restore_all
-END(ret_from_exception)
-
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
- cmpl $0,PER_CPU_VAR(__preempt_count)
- jnz restore_all
- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
- call preempt_schedule_irq
- jmp need_resched
-END(resume_kernel)
-#endif
- CFI_ENDPROC
-
-/* SYSENTER_RETURN points to after the "sysenter" instruction in
- the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
-
- # sysenter call handler stub
-ENTRY(ia32_sysenter_target)
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA esp, 0
- CFI_REGISTER esp, ebp
- movl TSS_sysenter_sp0(%esp),%esp
-sysenter_past_esp:
- /*
- * Interrupts are disabled here, but we can't trace it until
- * enough kernel state to call TRACE_IRQS_OFF can be called - but
- * we immediately enable interrupts at that point anyway.
- */
- pushl_cfi $__USER_DS
- /*CFI_REL_OFFSET ss, 0*/
- pushl_cfi %ebp
- CFI_REL_OFFSET esp, 0
- pushfl_cfi
- orl $X86_EFLAGS_IF, (%esp)
- pushl_cfi $__USER_CS
- /*CFI_REL_OFFSET cs, 0*/
- /*
- * Push current_thread_info()->sysenter_return to the stack.
- * A tiny bit of offset fixup is necessary: TI_sysenter_return
- * is relative to thread_info, which is at the bottom of the
- * kernel stack page. 4*4 means the 4 words pushed above;
- * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
- * and THREAD_SIZE takes us to the bottom.
- */
- pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
- CFI_REL_OFFSET eip, 0
-
- pushl_cfi %eax
- SAVE_ALL
- ENABLE_INTERRUPTS(CLBR_NONE)
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
- cmpl $__PAGE_OFFSET-3,%ebp
- jae syscall_fault
- ASM_STAC
-1: movl (%ebp),%ebp
- ASM_CLAC
- movl %ebp,PT_EBP(%esp)
- _ASM_EXTABLE(1b,syscall_fault)
-
- GET_THREAD_INFO(%ebp)
-
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
- jnz sysenter_audit
-sysenter_do_call:
- cmpl $(NR_syscalls), %eax
- jae sysenter_badsys
- call *sys_call_table(,%eax,4)
-sysenter_after_call:
- movl %eax,PT_EAX(%esp)
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $_TIF_ALLWORK_MASK, %ecx
- jnz sysexit_audit
-sysenter_exit:
-/* if something modifies registers it must also disable sysexit */
- movl PT_EIP(%esp), %edx
- movl PT_OLDESP(%esp), %ecx
- xorl %ebp,%ebp
- TRACE_IRQS_ON
-1: mov PT_FS(%esp), %fs
- PTGS_TO_GS
- ENABLE_INTERRUPTS_SYSEXIT
-
-#ifdef CONFIG_AUDITSYSCALL
-sysenter_audit:
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
- jnz syscall_trace_entry
- /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
- movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
- /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
- pushl_cfi PT_ESI(%esp) /* a3: 5th arg */
- pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */
- call __audit_syscall_entry
- popl_cfi %ecx /* get that remapped edx off the stack */
- popl_cfi %ecx /* get that remapped esi off the stack */
- movl PT_EAX(%esp),%eax /* reload syscall number */
- jmp sysenter_do_call
-
-sysexit_audit:
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY)
- movl %eax,%edx /* second arg, syscall return value */
- cmpl $-MAX_ERRNO,%eax /* is it an error ? */
- setbe %al /* 1 if so, 0 if not */
- movzbl %al,%eax /* zero-extend that */
- call __audit_syscall_exit
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- movl PT_EAX(%esp),%eax /* reload syscall return value */
- jmp sysenter_exit
-#endif
-
- CFI_ENDPROC
-.pushsection .fixup,"ax"
-2: movl $0,PT_FS(%esp)
- jmp 1b
-.popsection
- _ASM_EXTABLE(1b,2b)
- PTGS_TO_GS_EX
-ENDPROC(ia32_sysenter_target)
-
- # system call handler stub
-ENTRY(system_call)
- RING0_INT_FRAME # can't unwind into user space anyway
- ASM_CLAC
- pushl_cfi %eax # save orig_eax
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
- # system call tracing in operation / emulation
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
- jnz syscall_trace_entry
- cmpl $(NR_syscalls), %eax
- jae syscall_badsys
-syscall_call:
- call *sys_call_table(,%eax,4)
-syscall_after_call:
- movl %eax,PT_EAX(%esp) # store the return value
-syscall_exit:
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $_TIF_ALLWORK_MASK, %ecx # current->work
- jnz syscall_exit_work
-
-restore_all:
- TRACE_IRQS_IRET
-restore_all_notrace:
-#ifdef CONFIG_X86_ESPFIX32
- movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
- # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
- # are returning to the kernel.
- # See comments in process.c:copy_thread() for details.
- movb PT_OLDSS(%esp), %ah
- movb PT_CS(%esp), %al
- andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
- cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
- CFI_REMEMBER_STATE
- je ldt_ss # returning to user-space with LDT SS
-#endif
-restore_nocheck:
- RESTORE_REGS 4 # skip orig_eax/error_code
-irq_return:
- INTERRUPT_RETURN
-.section .fixup,"ax"
-ENTRY(iret_exc)
- pushl $0 # no error code
- pushl $do_iret_error
- jmp error_code
-.previous
- _ASM_EXTABLE(irq_return,iret_exc)
-
-#ifdef CONFIG_X86_ESPFIX32
- CFI_RESTORE_STATE
-ldt_ss:
-#ifdef CONFIG_PARAVIRT
- /*
- * The kernel can't run on a non-flat stack if paravirt mode
- * is active. Rather than try to fixup the high bits of
- * ESP, bypass this code entirely. This may break DOSemu
- * and/or Wine support in a paravirt VM, although the option
- * is still available to implement the setting of the high
- * 16-bits in the INTERRUPT_RETURN paravirt-op.
- */
- cmpl $0, pv_info+PARAVIRT_enabled
- jne restore_nocheck
-#endif
-
-/*
- * Setup and switch to ESPFIX stack
- *
- * We're returning to userspace with a 16 bit stack. The CPU will not
- * restore the high word of ESP for us on executing iret... This is an
- * "official" bug of all the x86-compatible CPUs, which we can work
- * around to make dosemu and wine happy. We do this by preloading the
- * high word of ESP with the high word of the userspace ESP while
- * compensating for the offset by changing to the ESPFIX segment with
- * a base address that matches for the difference.
- */
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
- mov %esp, %edx /* load kernel esp */
- mov PT_OLDESP(%esp), %eax /* load userspace esp */
- mov %dx, %ax /* eax: new kernel esp */
- sub %eax, %edx /* offset (low word is 0) */
- shr $16, %edx
- mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
- mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
- pushl_cfi $__ESPFIX_SS
- pushl_cfi %eax /* new kernel esp */
- /* Disable interrupts, but do not irqtrace this section: we
- * will soon execute iret and the tracer was already set to
- * the irqstate after the iret */
- DISABLE_INTERRUPTS(CLBR_EAX)
- lss (%esp), %esp /* switch to espfix segment */
- CFI_ADJUST_CFA_OFFSET -8
- jmp restore_nocheck
-#endif
- CFI_ENDPROC
-ENDPROC(system_call)
-
- # perform work that needs to be done immediately before resumption
- ALIGN
- RING0_PTREGS_FRAME # can't unwind into user space anyway
-work_pending:
- testb $_TIF_NEED_RESCHED, %cl
- jz work_notifysig
-work_resched:
- call schedule
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
- # than syscall tracing?
- jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
- jnz work_resched
-
-work_notifysig: # deal with pending signals and
- # notify-resume requests
-#ifdef CONFIG_VM86
- testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
- movl %esp, %eax
- jnz work_notifysig_v86 # returning to kernel-space or
- # vm86-space
-1:
-#else
- movl %esp, %eax
-#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
-
-#ifdef CONFIG_VM86
- ALIGN
-work_notifysig_v86:
- pushl_cfi %ecx # save ti_flags for do_notify_resume
- call save_v86_state # %eax contains pt_regs pointer
- popl_cfi %ecx
- movl %eax, %esp
- jmp 1b
-#endif
-END(work_pending)
-
- # perform syscall exit tracing
- ALIGN
-syscall_trace_entry:
- movl $-ENOSYS,PT_EAX(%esp)
- movl %esp, %eax
- call syscall_trace_enter
- /* What it returned is what we'll actually use. */
- cmpl $(NR_syscalls), %eax
- jnae syscall_call
- jmp syscall_exit
-END(syscall_trace_entry)
-
- # perform syscall exit tracing
- ALIGN
-syscall_exit_work:
- testl $_TIF_WORK_SYSCALL_EXIT, %ecx
- jz work_pending
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
- # schedule() instead
- movl %esp, %eax
- call syscall_trace_leave
- jmp resume_userspace
-END(syscall_exit_work)
- CFI_ENDPROC
-
- RING0_INT_FRAME # can't unwind into user space anyway
-syscall_fault:
- ASM_CLAC
- GET_THREAD_INFO(%ebp)
- movl $-EFAULT,PT_EAX(%esp)
- jmp resume_userspace
-END(syscall_fault)
-
-syscall_badsys:
- movl $-ENOSYS,%eax
- jmp syscall_after_call
-END(syscall_badsys)
-
-sysenter_badsys:
- movl $-ENOSYS,%eax
- jmp sysenter_after_call
-END(sysenter_badsys)
- CFI_ENDPROC
-
-.macro FIXUP_ESPFIX_STACK
-/*
- * Switch back for ESPFIX stack to the normal zerobased stack
- *
- * We can't call C functions using the ESPFIX stack. This code reads
- * the high word of the segment base from the GDT and swiches to the
- * normal stack and adjusts ESP with the matching offset.
- */
-#ifdef CONFIG_X86_ESPFIX32
- /* fixup the stack */
- mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
- mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
- shl $16, %eax
- addl %esp, %eax /* the adjusted stack pointer */
- pushl_cfi $__KERNEL_DS
- pushl_cfi %eax
- lss (%esp), %esp /* switch to the normal stack segment */
- CFI_ADJUST_CFA_OFFSET -8
-#endif
-.endm
-.macro UNWIND_ESPFIX_STACK
-#ifdef CONFIG_X86_ESPFIX32
- movl %ss, %eax
- /* see if on espfix stack */
- cmpw $__ESPFIX_SS, %ax
- jne 27f
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- /* switch to normal stack */
- FIXUP_ESPFIX_STACK
-27:
-#endif
-.endm
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
- .align 8
-ENTRY(irq_entries_start)
- RING0_INT_FRAME
- vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
- jmp common_interrupt
- CFI_ADJUST_CFA_OFFSET -4
- .align 8
- .endr
-END(irq_entries_start)
-
-/*
- * the CPU automatically disables interrupts when executing an IRQ vector,
- * so IRQ-flags tracing has to follow that:
- */
- .p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
- ASM_CLAC
- addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
- SAVE_ALL
- TRACE_IRQS_OFF
- movl %esp,%eax
- call do_IRQ
- jmp ret_from_intr
-ENDPROC(common_interrupt)
- CFI_ENDPROC
-
-#define BUILD_INTERRUPT3(name, nr, fn) \
-ENTRY(name) \
- RING0_INT_FRAME; \
- ASM_CLAC; \
- pushl_cfi $~(nr); \
- SAVE_ALL; \
- TRACE_IRQS_OFF \
- movl %esp,%eax; \
- call fn; \
- jmp ret_from_intr; \
- CFI_ENDPROC; \
-ENDPROC(name)
-
-
-#ifdef CONFIG_TRACING
-#define TRACE_BUILD_INTERRUPT(name, nr) \
- BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
-#else
-#define TRACE_BUILD_INTERRUPT(name, nr)
-#endif
-
-#define BUILD_INTERRUPT(name, nr) \
- BUILD_INTERRUPT3(name, nr, smp_##name); \
- TRACE_BUILD_INTERRUPT(name, nr)
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include <asm/entry_arch.h>
-
-ENTRY(coprocessor_error)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_coprocessor_error
- jmp error_code
- CFI_ENDPROC
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
-#ifdef CONFIG_X86_INVD_BUG
- /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
- ALTERNATIVE "pushl_cfi $do_general_protection", \
- "pushl $do_simd_coprocessor_error", \
- X86_FEATURE_XMM
-#else
- pushl_cfi $do_simd_coprocessor_error
-#endif
- jmp error_code
- CFI_ENDPROC
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $-1 # mark this as an int
- pushl_cfi $do_device_not_available
- jmp error_code
- CFI_ENDPROC
-END(device_not_available)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
- iret
- _ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
-#endif
-
-ENTRY(overflow)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_overflow
- jmp error_code
- CFI_ENDPROC
-END(overflow)
-
-ENTRY(bounds)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_bounds
- jmp error_code
- CFI_ENDPROC
-END(bounds)
-
-ENTRY(invalid_op)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_invalid_op
- jmp error_code
- CFI_ENDPROC
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_coprocessor_segment_overrun
- jmp error_code
- CFI_ENDPROC
-END(coprocessor_segment_overrun)
-
-ENTRY(invalid_TSS)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_invalid_TSS
- jmp error_code
- CFI_ENDPROC
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_segment_not_present
- jmp error_code
- CFI_ENDPROC
-END(segment_not_present)
-
-ENTRY(stack_segment)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_stack_segment
- jmp error_code
- CFI_ENDPROC
-END(stack_segment)
-
-ENTRY(alignment_check)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_alignment_check
- jmp error_code
- CFI_ENDPROC
-END(alignment_check)
-
-ENTRY(divide_error)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0 # no error code
- pushl_cfi $do_divide_error
- jmp error_code
- CFI_ENDPROC
-END(divide_error)
-
-#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi machine_check_vector
- jmp error_code
- CFI_ENDPROC
-END(machine_check)
-#endif
-
-ENTRY(spurious_interrupt_bug)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $0
- pushl_cfi $do_spurious_interrupt_bug
- jmp error_code
- CFI_ENDPROC
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_XEN
-/* Xen doesn't set %esp to be precisely what the normal sysenter
- entrypoint expects, so fix it up before using the normal path. */
-ENTRY(xen_sysenter_target)
- RING0_INT_FRAME
- addl $5*4, %esp /* remove xen-provided frame */
- CFI_ADJUST_CFA_OFFSET -5*4
- jmp sysenter_past_esp
- CFI_ENDPROC
-
-ENTRY(xen_hypervisor_callback)
- CFI_STARTPROC
- pushl_cfi $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- TRACE_IRQS_OFF
-
- /* Check to see if we got the event in the critical
- region in xen_iret_direct, after we've reenabled
- events and checked for pending events. This simulates
- iret instruction's behaviour where it delivers a
- pending interrupt when enabling interrupts. */
- movl PT_EIP(%esp),%eax
- cmpl $xen_iret_start_crit,%eax
- jb 1f
- cmpl $xen_iret_end_crit,%eax
- jae 1f
-
- jmp xen_iret_crit_fixup
-
-ENTRY(xen_do_upcall)
-1: mov %esp, %eax
- call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
- call xen_maybe_preempt_hcall
-#endif
- jmp ret_from_intr
- CFI_ENDPROC
-ENDPROC(xen_hypervisor_callback)
-
-# Hypervisor uses this for application faults while it executes.
-# We get here for two reasons:
-# 1. Fault while reloading DS, ES, FS or GS
-# 2. Fault while executing IRET
-# Category 1 we fix up by reattempting the load, and zeroing the segment
-# register if the load fails.
-# Category 2 we fix up by jumping to do_iret_error. We cannot use the
-# normal Linux return path in this case because if we use the IRET hypercall
-# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
-# We distinguish between categories by maintaining a status value in EAX.
-ENTRY(xen_failsafe_callback)
- CFI_STARTPROC
- pushl_cfi %eax
- movl $1,%eax
-1: mov 4(%esp),%ds
-2: mov 8(%esp),%es
-3: mov 12(%esp),%fs
-4: mov 16(%esp),%gs
- /* EAX == 0 => Category 1 (Bad segment)
- EAX != 0 => Category 2 (Bad IRET) */
- testl %eax,%eax
- popl_cfi %eax
- lea 16(%esp),%esp
- CFI_ADJUST_CFA_OFFSET -16
- jz 5f
- jmp iret_exc
-5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- jmp ret_from_exception
- CFI_ENDPROC
-
-.section .fixup,"ax"
-6: xorl %eax,%eax
- movl %eax,4(%esp)
- jmp 1b
-7: xorl %eax,%eax
- movl %eax,8(%esp)
- jmp 2b
-8: xorl %eax,%eax
- movl %eax,12(%esp)
- jmp 3b
-9: xorl %eax,%eax
- movl %eax,16(%esp)
- jmp 4b
-.previous
- _ASM_EXTABLE(1b,6b)
- _ASM_EXTABLE(2b,7b)
- _ASM_EXTABLE(3b,8b)
- _ASM_EXTABLE(4b,9b)
-ENDPROC(xen_failsafe_callback)
-
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- xen_evtchn_do_upcall)
-
-#endif /* CONFIG_XEN */
-
-#if IS_ENABLED(CONFIG_HYPERV)
-
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- hyperv_vector_handler)
-
-#endif /* CONFIG_HYPERV */
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-ENTRY(mcount)
- ret
-END(mcount)
-
-ENTRY(ftrace_caller)
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl $0 /* Pass NULL as regs pointer */
- movl 4*4(%esp), %eax
- movl 0x4(%ebp), %edx
- movl function_trace_op, %ecx
- subl $MCOUNT_INSN_SIZE, %eax
-
-.globl ftrace_call
-ftrace_call:
- call ftrace_stub
-
- addl $4,%esp /* skip NULL pointer */
- popl %edx
- popl %ecx
- popl %eax
-ftrace_ret:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- jmp ftrace_stub
-#endif
-
-.globl ftrace_stub
-ftrace_stub:
- ret
-END(ftrace_caller)
-
-ENTRY(ftrace_regs_caller)
- pushf /* push flags before compare (in cs location) */
-
- /*
- * i386 does not save SS and ESP when coming from kernel.
- * Instead, to get sp, &regs->sp is used (see ptrace.h).
- * Unfortunately, that means eflags must be at the same location
- * as the current return ip is. We move the return ip into the
- * ip location, and move flags into the return ip location.
- */
- pushl 4(%esp) /* save return ip into ip slot */
-
- pushl $0 /* Load 0 into orig_ax */
- pushl %gs
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
-
- movl 13*4(%esp), %eax /* Get the saved flags */
- movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
- /* clobbering return ip */
- movl $__KERNEL_CS,13*4(%esp)
-
- movl 12*4(%esp), %eax /* Load ip (1st parameter) */
- subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
- movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
- movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
- pushl %esp /* Save pt_regs as 4th parameter */
-
-GLOBAL(ftrace_regs_call)
- call ftrace_stub
-
- addl $4, %esp /* Skip pt_regs */
- movl 14*4(%esp), %eax /* Move flags back into cs */
- movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
- movl 12*4(%esp), %eax /* Get return ip from regs->ip */
- movl %eax, 14*4(%esp) /* Put return ip back for ret */
-
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- addl $8, %esp /* Skip orig_ax and ip */
- popf /* Pop flags at end (no addl to corrupt flags) */
- jmp ftrace_ret
-
- popf
- jmp ftrace_stub
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(mcount)
- cmpl $__PAGE_OFFSET, %esp
- jb ftrace_stub /* Paging not enabled yet? */
-
- cmpl $ftrace_stub, ftrace_trace_function
- jnz trace
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- cmpl $ftrace_stub, ftrace_graph_return
- jnz ftrace_graph_caller
-
- cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
- jnz ftrace_graph_caller
-#endif
-.globl ftrace_stub
-ftrace_stub:
- ret
-
- /* taken from glibc */
-trace:
- pushl %eax
- pushl %ecx
- pushl %edx
- movl 0xc(%esp), %eax
- movl 0x4(%ebp), %edx
- subl $MCOUNT_INSN_SIZE, %eax
-
- call *ftrace_trace_function
-
- popl %edx
- popl %ecx
- popl %eax
- jmp ftrace_stub
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
- pushl %eax
- pushl %ecx
- pushl %edx
- movl 0xc(%esp), %eax
- lea 0x4(%ebp), %edx
- movl (%ebp), %ecx
- subl $MCOUNT_INSN_SIZE, %eax
- call prepare_ftrace_return
- popl %edx
- popl %ecx
- popl %eax
- ret
-END(ftrace_graph_caller)
-
-.globl return_to_handler
-return_to_handler:
- pushl %eax
- pushl %edx
- movl %ebp, %eax
- call ftrace_return_to_handler
- movl %eax, %ecx
- popl %edx
- popl %eax
- jmp *%ecx
-#endif
-
-#ifdef CONFIG_TRACING
-ENTRY(trace_page_fault)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $trace_do_page_fault
- jmp error_code
- CFI_ENDPROC
-END(trace_page_fault)
-#endif
-
-ENTRY(page_fault)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_page_fault
- ALIGN
-error_code:
- /* the function address is in %gs's slot on the stack */
- pushl_cfi %fs
- /*CFI_REL_OFFSET fs, 0*/
- pushl_cfi %es
- /*CFI_REL_OFFSET es, 0*/
- pushl_cfi %ds
- /*CFI_REL_OFFSET ds, 0*/
- pushl_cfi_reg eax
- pushl_cfi_reg ebp
- pushl_cfi_reg edi
- pushl_cfi_reg esi
- pushl_cfi_reg edx
- pushl_cfi_reg ecx
- pushl_cfi_reg ebx
- cld
- movl $(__KERNEL_PERCPU), %ecx
- movl %ecx, %fs
- UNWIND_ESPFIX_STACK
- GS_TO_REG %ecx
- movl PT_GS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
- movl $(__USER_DS), %ecx
- movl %ecx, %ds
- movl %ecx, %es
- TRACE_IRQS_OFF
- movl %esp,%eax # pt_regs pointer
- call *%edi
- jmp ret_from_exception
- CFI_ENDPROC
-END(page_fault)
-
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
- cmpw $__KERNEL_CS, 4(%esp)
- jne \ok
-\label:
- movl TSS_sysenter_sp0 + \offset(%esp), %esp
- CFI_DEF_CFA esp, 0
- CFI_UNDEFINED eip
- pushfl_cfi
- pushl_cfi $__KERNEL_CS
- pushl_cfi $sysenter_past_esp
- CFI_REL_OFFSET eip, 0
-.endm
-
-ENTRY(debug)
- RING0_INT_FRAME
- ASM_CLAC
- cmpl $ia32_sysenter_target,(%esp)
- jne debug_stack_correct
- FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
- pushl_cfi $-1 # mark this as an int
- SAVE_ALL
- TRACE_IRQS_OFF
- xorl %edx,%edx # error code 0
- movl %esp,%eax # pt_regs pointer
- call do_debug
- jmp ret_from_exception
- CFI_ENDPROC
-END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-ENTRY(nmi)
- RING0_INT_FRAME
- ASM_CLAC
-#ifdef CONFIG_X86_ESPFIX32
- pushl_cfi %eax
- movl %ss, %eax
- cmpw $__ESPFIX_SS, %ax
- popl_cfi %eax
- je nmi_espfix_stack
-#endif
- cmpl $ia32_sysenter_target,(%esp)
- je nmi_stack_fixup
- pushl_cfi %eax
- movl %esp,%eax
- /* Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1),%eax
- cmpl $(THREAD_SIZE-20),%eax
- popl_cfi %eax
- jae nmi_stack_correct
- cmpl $ia32_sysenter_target,12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- /* We have a RING0_INT_FRAME here */
- pushl_cfi %eax
- SAVE_ALL
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_nmi
- jmp restore_all_notrace
- CFI_ENDPROC
-
-nmi_stack_fixup:
- RING0_INT_FRAME
- FIX_STACK 12, nmi_stack_correct, 1
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- /* We have a RING0_INT_FRAME here */
- cmpw $__KERNEL_CS,16(%esp)
- jne nmi_stack_correct
- cmpl $debug,(%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn,(%esp)
- ja nmi_stack_correct
- FIX_STACK 24, nmi_stack_correct, 1
- jmp nmi_stack_correct
-
-#ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
- /* We have a RING0_INT_FRAME here.
- *
- * create the pointer to lss back
- */
- pushl_cfi %ss
- pushl_cfi %esp
- addl $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl_cfi 16(%esp)
- .endr
- pushl_cfi %eax
- SAVE_ALL
- FIXUP_ESPFIX_STACK # %eax == %esp
- xorl %edx,%edx # zero error code
- call do_nmi
- RESTORE_REGS
- lss 12+4(%esp), %esp # back to espfix stack
- CFI_ADJUST_CFA_OFFSET -24
- jmp irq_return
-#endif
- CFI_ENDPROC
-END(nmi)
-
-ENTRY(int3)
- RING0_INT_FRAME
- ASM_CLAC
- pushl_cfi $-1 # mark this as an int
- SAVE_ALL
- TRACE_IRQS_OFF
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
- CFI_ENDPROC
-END(int3)
-
-ENTRY(general_protection)
- RING0_EC_FRAME
- pushl_cfi $do_general_protection
- jmp error_code
- CFI_ENDPROC
-END(general_protection)
-
-#ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
- RING0_EC_FRAME
- ASM_CLAC
- pushl_cfi $do_async_page_fault
- jmp error_code
- CFI_ENDPROC
-END(async_page_fault)
-#endif
-
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6..5a46681 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
- set_intr_gate(i, early_idt_handlers[i]);
+ set_intr_gate(i, early_idt_handler_array[i]);
load_idt((const struct desc_ptr *)&idt_descr);
copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d031bad..544dec4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
__INIT
setup_once:
/*
- * Set up a idt with 256 entries pointing to ignore_int,
- * interrupt gates. It doesn't actually load idt - that needs
- * to be done on each CPU. Interrupts are enabled elsewhere,
- * when we can be relatively sure everything is ok.
+ * Set up a idt with 256 interrupt gates that push zero if there
+ * is no error code and then jump to early_idt_handler_common.
+ * It doesn't actually load the idt - that needs to be done on
+ * each CPU. Interrupts are enabled elsewhere, when we can be
+ * relatively sure everything is ok.
*/
movl $idt_table,%edi
- movl $early_idt_handlers,%eax
+ movl $early_idt_handler_array,%eax
movl $NUM_EXCEPTION_VECTORS,%ecx
1:
movl %eax,(%edi)
movl %eax,4(%edi)
/* interrupt gate, dpl=0, present */
movl $(0x8E000000 + __KERNEL_CS),2(%edi)
- addl $9,%eax
+ addl $EARLY_IDT_HANDLER_SIZE,%eax
addl $8,%edi
loop 1b
@@ -524,30 +525,32 @@ setup_once:
andl $0,setup_once_ref /* Once is enough, thanks */
ret
-ENTRY(early_idt_handlers)
+ENTRY(early_idt_handler_array)
# 36(%esp) %eflags
# 32(%esp) %cs
# 28(%esp) %eip
# 24(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .if (EXCEPTION_ERRCODE_MASK >> i) & 1
- ASM_NOP2
- .else
+ .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
pushl $0 # Dummy error code, to make stack frame uniform
.endif
pushl $i # 20(%esp) Vector number
- jmp early_idt_handler
+ jmp early_idt_handler_common
i = i + 1
+ .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handlers)
+ENDPROC(early_idt_handler_array)
- /* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+ */
cld
cmpl $2,(%esp) # X86_TRAP_NMI
- je is_nmi # Ignore NMI
+ je .Lis_nmi # Ignore NMI
cmpl $2,%ss:early_recursion_flag
je hlt_loop
@@ -600,10 +603,10 @@ ex_entry:
pop %ecx
pop %eax
decl %ss:early_recursion_flag
-is_nmi:
+.Lis_nmi:
addl $8,%esp /* drop vector number and error code */
iret
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
/* This is the default interrupt "handler" :-) */
ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ae6588b..e5c27f7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,30 +321,32 @@ bad_address:
jmp bad_address
__INIT
- .globl early_idt_handlers
-early_idt_handlers:
+ENTRY(early_idt_handler_array)
# 104(%rsp) %rflags
# 96(%rsp) %cs
# 88(%rsp) %rip
# 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .if (EXCEPTION_ERRCODE_MASK >> i) & 1
- ASM_NOP2
- .else
+ .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
pushq $0 # Dummy error code, to make stack frame uniform
.endif
pushq $i # 72(%rsp) Vector number
- jmp early_idt_handler
+ jmp early_idt_handler_common
i = i + 1
+ .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
+ENDPROC(early_idt_handler_array)
-/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+ */
cld
cmpl $2,(%rsp) # X86_TRAP_NMI
- je is_nmi # Ignore NMI
+ je .Lis_nmi # Ignore NMI
cmpl $2,early_recursion_flag(%rip)
jz 1f
@@ -409,10 +411,10 @@ ENTRY(early_idt_handler)
popq %rcx
popq %rax
decl early_recursion_flag(%rip)
-is_nmi:
+.Lis_nmi:
addq $16,%rsp # drop vector number and error code
INTERRUPT_RETURN
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
__INITDATA
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3acbff4..10757d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
+#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
#include <asm/time.h>
@@ -305,8 +306,6 @@ static void hpet_legacy_clockevent_register(void)
printk(KERN_DEBUG "hpet clockevent registered\n");
}
-static int hpet_setup_msi_irq(unsigned int irq);
-
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
@@ -357,7 +356,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
hpet_enable_legacy_int();
} else {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- hpet_setup_msi_irq(hdev->irq);
+ irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
disable_irq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
@@ -423,6 +422,7 @@ static int hpet_legacy_next_event(unsigned long delta,
static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
static struct hpet_dev *hpet_devs;
+static struct irq_domain *hpet_domain;
void hpet_msi_unmask(struct irq_data *data)
{
@@ -473,31 +473,6 @@ static int hpet_msi_next_event(unsigned long delta,
return hpet_next_event(delta, evt, hdev->num);
}
-static int hpet_setup_msi_irq(unsigned int irq)
-{
- if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
- irq_free_hwirq(irq);
- return -EINVAL;
- }
- return 0;
-}
-
-static int hpet_assign_irq(struct hpet_dev *dev)
-{
- unsigned int irq = irq_alloc_hwirq(-1);
-
- if (!irq)
- return -EINVAL;
-
- irq_set_handler_data(irq, dev);
-
- if (hpet_setup_msi_irq(irq))
- return -EINVAL;
-
- dev->irq = irq;
- return 0;
-}
-
static irqreturn_t hpet_interrupt_handler(int irq, void *data)
{
struct hpet_dev *dev = (struct hpet_dev *)data;
@@ -540,9 +515,6 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
if (!(hdev->flags & HPET_DEV_VALID))
return;
- if (hpet_setup_msi_irq(hdev->irq))
- return;
-
hdev->cpu = cpu;
per_cpu(cpu_hpet_dev, cpu) = hdev;
evt->name = hdev->name;
@@ -574,7 +546,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
unsigned int id;
unsigned int num_timers;
unsigned int num_timers_used = 0;
- int i;
+ int i, irq;
if (hpet_msi_disable)
return;
@@ -587,6 +559,10 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
num_timers++; /* Value read out starts from 0 */
hpet_print_config();
+ hpet_domain = hpet_create_irq_domain(hpet_blockid);
+ if (!hpet_domain)
+ return;
+
hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
if (!hpet_devs)
return;
@@ -604,12 +580,14 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
hdev->flags = 0;
if (cfg & HPET_TN_PERIODIC_CAP)
hdev->flags |= HPET_DEV_PERI_CAP;
+ sprintf(hdev->name, "hpet%d", i);
hdev->num = i;
- sprintf(hdev->name, "hpet%d", i);
- if (hpet_assign_irq(hdev))
+ irq = hpet_assign_irq(hpet_domain, hdev, hdev->num);
+ if (irq <= 0)
continue;
+ hdev->irq = irq;
hdev->flags |= HPET_DEV_FSB_CAP;
hdev->flags |= HPET_DEV_VALID;
num_timers_used++;
@@ -709,10 +687,6 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
}
#else
-static int hpet_setup_msi_irq(unsigned int irq)
-{
- return 0;
-}
static void hpet_msi_capability_lookup(unsigned int start_timer)
{
return;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 00918327..6185d31 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
xstate_size = sizeof(struct i387_fxsave_struct);
else
xstate_size = sizeof(struct i387_fsave_struct);
+
+ /*
+ * Quirk: we don't yet handle the XSAVES* instructions
+ * correctly, as we don't correctly convert between
+ * standard and compacted format when interfacing
+ * with user-space - so disable it for now.
+ *
+ * The difference is small: with recent CPUs the
+ * compacted format is only marginally smaller than
+ * the standard FPU state format.
+ *
+ * ( This is easy to backport while we are fixing
+ * XSAVES* support. )
+ */
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
}
/*
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index e7cc537..16cb827 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi)
*/
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
- /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
- outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+ /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */
+ outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR);
/* 8259A-1 (the master) has a slave on IR2 */
outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
@@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi)
outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
- /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
- outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
+ /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */
+ outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR);
/* 8259A-2 is a slave on master's IR2 */
outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
/* (slave's support for AEOI in flat mode is to be investigated) */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e5952c2..88b36648 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -22,6 +22,12 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
+
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
@@ -116,6 +122,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_puts(p, " Threshold APIC interrupts\n");
#endif
+#ifdef CONFIG_X86_MCE_AMD
+ seq_printf(p, "%*s: ", prec, "DFR");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
+ seq_puts(p, " Deferred Error APIC interrupts\n");
+#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
@@ -136,6 +148,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
+#ifdef CONFIG_HAVE_KVM
+ seq_printf(p, "%*s: ", prec, "PIN");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
+ seq_puts(p, " Posted-interrupt notification event\n");
+
+ seq_printf(p, "%*s: ", prec, "PIW");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->kvm_posted_intr_wakeup_ipis);
+ seq_puts(p, " Posted-interrupt wakeup event\n");
+#endif
return 0;
}
@@ -192,8 +216,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
unsigned vector = ~regs->orig_ax;
unsigned irq;
- irq_enter();
- exit_idle();
+ entering_irq();
irq = __this_cpu_read(vector_irq[vector]);
@@ -209,7 +232,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
}
}
- irq_exit();
+ exiting_irq();
set_irq_regs(old_regs);
return 1;
@@ -237,6 +260,18 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
}
#ifdef CONFIG_HAVE_KVM
+static void dummy_handler(void) {}
+static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
+
+void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
+{
+ if (handler)
+ kvm_posted_intr_wakeup_handler = handler;
+ else
+ kvm_posted_intr_wakeup_handler = dummy_handler;
+}
+EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
+
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
@@ -244,16 +279,23 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- ack_APIC_irq();
-
- irq_enter();
-
- exit_idle();
-
+ entering_ack_irq();
inc_irq_stat(kvm_posted_intr_ipis);
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
- irq_exit();
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ entering_ack_irq();
+ inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+ kvm_posted_intr_wakeup_handler();
+ exiting_irq();
set_irq_regs(old_regs);
}
#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index f9fd86a..cd74f59 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -21,12 +21,6 @@
#include <asm/apic.h>
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
-
-DEFINE_PER_CPU(struct pt_regs *, irq_regs);
-EXPORT_PER_CPU_SYMBOL(irq_regs);
-
#ifdef CONFIG_DEBUG_STACKOVERFLOW
int sysctl_panic_on_stackoverflow __read_mostly;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 394e643..bc4604e 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -20,12 +20,6 @@
#include <asm/idle.h>
#include <asm/apic.h>
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
-
-DEFINE_PER_CPU(struct pt_regs *, irq_regs);
-EXPORT_PER_CPU_SYMBOL(irq_regs);
-
int sysctl_panic_on_stackoverflow;
/*
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 15d741d..dc5fa6a 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -10,12 +10,6 @@
#include <asm/apic.h>
#include <asm/trace/irq_vectors.h>
-static inline void irq_work_entering_irq(void)
-{
- irq_enter();
- ack_APIC_irq();
-}
-
static inline void __smp_irq_work_interrupt(void)
{
inc_irq_stat(apic_irq_work_irqs);
@@ -24,14 +18,14 @@ static inline void __smp_irq_work_interrupt(void)
__visible void smp_irq_work_interrupt(struct pt_regs *regs)
{
- irq_work_entering_irq();
+ ipi_entering_ack_irq();
__smp_irq_work_interrupt();
exiting_irq();
}
__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
- irq_work_entering_irq();
+ ipi_entering_ack_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
__smp_irq_work_interrupt();
trace_irq_work_exit(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index cd10a64..a3a5e15 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -86,7 +86,7 @@ void __init init_IRQ(void)
int i;
/*
- * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
+ * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
* these IRQ's are handled by more mordern controllers like IO-APIC,
@@ -94,7 +94,7 @@ void __init init_IRQ(void)
* irq's migrate etc.
*/
for (i = 0; i < nr_legacy_irqs(); i++)
- per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
+ per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i;
x86_init.irqs.intr_init();
}
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
+#ifdef CONFIG_X86_MCE_AMD
+ alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
+#endif
+
#ifdef CONFIG_X86_LOCAL_APIC
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
@@ -144,6 +148,8 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+ /* IPI for KVM to deliver interrupt to wake up tasks */
+ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620..1681504 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ local_irq_save(flags);
+
+ if (READ_ONCE(*ptr) != val)
+ goto out;
+
+ /*
+ * halt until it's our turn and kicked. Note that we do safe halt
+ * for irq enabled case to avoid hang when lock info is overwritten
+ * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ */
+ if (arch_irqs_disabled_flags(flags))
+ halt();
+ else
+ safe_halt();
+
+out:
+ local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
enum kvm_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
}
}
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = kvm_wait;
+ pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
}
static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 415480d..11546b4 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -17,6 +17,7 @@
#include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/suspend.h>
+#include <linux/vmalloc.h>
#include <asm/init.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 2d2a237..30ca760 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -19,8 +19,8 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/pci.h>
-#include <linux/irqdomain.h>
+#include <asm/irqdomain.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
@@ -113,11 +113,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
pr_warn("Unknown bustype %s - ignoring\n", str);
}
-static struct irq_domain_ops mp_ioapic_irqdomain_ops = {
- .map = mp_irqdomain_map,
- .unmap = mp_irqdomain_unmap,
-};
-
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
struct ioapic_domain_cfg cfg = {
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c73..33ee3e0 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
#include <asm/paravirt.h>
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+ return pv_lock_ops.queued_spin_unlock.func ==
+ __raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+ .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+ .wait = paravirt_nop,
+ .kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
.unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c614dd4..58bcfb6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+#ifdef CONFIG_X86_32
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
+#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
@@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+#if defined(CONFIG_X86_32)
.irq_enable_sysexit = native_irq_enable_sysexit,
#endif
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6..e1b0136 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
/* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
return 0;
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da673..8aa0558 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
start__mov64, end__mov64);
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -49,7 +55,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);
@@ -59,14 +64,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da6..6e338e3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -156,11 +156,13 @@ void flush_thread(void)
/* FPU state will be reallocated lazily at the first use. */
drop_fpu(tsk);
free_thread_xstate(tsk);
- } else if (!used_math()) {
- /* kthread execs. TODO: cleanup this horror. */
- if (WARN_ON(init_fpu(tsk)))
- force_sig(SIGKILL, tsk);
- user_fpu_begin();
+ } else {
+ if (!tsk_used_math(tsk)) {
+ /* kthread execs. TODO: cleanup this horror. */
+ if (WARN_ON(init_fpu(tsk)))
+ force_sig(SIGKILL, tsk);
+ user_fpu_begin();
+ }
restore_init_xstate();
}
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8ed2106..a99900c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);
/*
- * Reload esp0, kernel_stack, and current_top_of_stack. This changes
+ * Reload esp0 and cpu_current_top_of_stack. This changes
* current_thread_info().
*/
load_sp0(tss, next);
- this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) +
- THREAD_SIZE);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ddfdbf7..8213450 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload esp0 and ss1. This changes current_thread_info(). */
load_sp0(tss, next);
- this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
-
/*
* Now maybe reload the debug registers and handle I/O bitmaps
*/
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d74ac33..8d04a75 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1222,8 +1222,7 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
init_apic_mappings();
- if (x86_io_apic_ops.init)
- x86_io_apic_ops.init();
+ io_apic_init_mappings();
kvm_guest_init();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index be8e1bd..15aaa69 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -170,8 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
asmlinkage __visible void smp_reboot_interrupt(void)
{
- ack_APIC_irq();
- irq_enter();
+ ipi_entering_ack_irq();
stop_this_cpu(NULL);
irq_exit();
}
@@ -265,12 +264,6 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}
-static inline void smp_entering_irq(void)
-{
- ack_APIC_irq();
- irq_enter();
-}
-
__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
/*
@@ -279,7 +272,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
* scheduler_ipi(). This is OK, since those functions are allowed
* to nest.
*/
- smp_entering_irq();
+ ipi_entering_ack_irq();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
@@ -297,14 +290,14 @@ static inline void __smp_call_function_interrupt(void)
__visible void smp_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ ipi_entering_ack_irq();
__smp_call_function_interrupt();
exiting_irq();
}
__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ ipi_entering_ack_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -319,14 +312,14 @@ static inline void __smp_call_function_single_interrupt(void)
__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ ipi_entering_ack_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ ipi_entering_ack_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 50e547e..fd6291c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -514,6 +514,40 @@ void __inquire_remote_apic(int apicid)
}
/*
+ * The Multiprocessor Specification 1.4 (1997) example code suggests
+ * that there should be a 10ms delay between the BSP asserting INIT
+ * and de-asserting INIT, when starting a remote processor.
+ * But that slows boot and resume on modern processors, which include
+ * many cores and don't require that delay.
+ *
+ * Cmdline "init_cpu_udelay=" is available to over-ride this delay.
+ * Modern processor families are quirked to remove the delay entirely.
+ */
+#define UDELAY_10MS_DEFAULT 10000
+
+static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+
+static int __init cpu_init_udelay(char *str)
+{
+ get_option(&str, &init_udelay);
+
+ return 0;
+}
+early_param("cpu_init_udelay", cpu_init_udelay);
+
+static void __init smp_quirk_init_udelay(void)
+{
+ /* if cmdline changed it from default, leave it alone */
+ if (init_udelay != UDELAY_10MS_DEFAULT)
+ return;
+
+ /* if modern processor, use no delay */
+ if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
+ ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
+ init_udelay = 0;
+}
+
+/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
@@ -555,7 +589,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
static int
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
- unsigned long send_status, accept_status = 0;
+ unsigned long send_status = 0, accept_status = 0;
int maxlvt, num_starts, j;
maxlvt = lapic_get_maxlvt();
@@ -583,7 +617,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
- mdelay(10);
+ udelay(init_udelay);
pr_debug("Deasserting INIT\n");
@@ -651,6 +685,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
+
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
accept_status = (apic_read(APIC_ESR) & 0xEF);
@@ -792,8 +827,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
- per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
}
/*
@@ -1176,6 +1209,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
uv_system_init();
set_mtrr_aps_delayed_init();
+
+ smp_quirk_init_udelay();
}
void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab52..de37936 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,8 +72,7 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
-
-asmlinkage int system_call(void);
+#include <asm/proto.h>
#endif
/* Must be page-aligned because the real IDT is used in a fixmap. */
@@ -813,18 +812,6 @@ dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
conditional_sti(regs);
-#if 0
- /* No need to warn about this any longer. */
- pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
-#endif
-}
-
-asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
-{
-}
-
-asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
-{
}
/*
@@ -992,13 +979,13 @@ void __init trap_init(void)
set_bit(i, used_vectors);
#ifdef CONFIG_IA32_EMULATION
- set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
#ifdef CONFIG_X86_32
- set_system_trap_gate(SYSCALL_VECTOR, &system_call);
- set_bit(SYSCALL_VECTOR, used_vectors);
+ set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
/*
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 234b072..3cee10a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform);
#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
- .compose_msi_msg = native_compose_msi_msg,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
- .setup_hpet_msi = default_setup_hpet_msi,
};
/* MSI arch specific hooks */
@@ -141,13 +139,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
#endif
struct x86_io_apic_ops x86_io_apic_ops = {
- .init = native_io_apic_init_mappings,
.read = native_io_apic_read,
- .write = native_io_apic_write,
- .modify = native_io_apic_modify,
.disable = native_disable_io_apic,
- .print_entries = native_io_apic_print_entries,
- .set_affinity = native_ioapic_set_affinity,
- .setup_entry = native_setup_ioapic_entry,
- .eoi_ioapic_pin = native_eoi_ioapic_pin,
};
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59b69f6..1d08ad3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
+#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/xsave.h>
#include "cpuid.h"
@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+ vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
/*
* The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c3b1ad9..496b369 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d43867c..44a7d25 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
}
}
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
struct kvm_mmu *context = &vcpu->arch.mmu;
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.cr0_wp = is_write_protection(vcpu);
context->base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu);
+ context->base_role.smap_andnot_wp
+ = smap && !is_write_protection(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
- union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush, zap_page;
+ union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
+ .cr0_wp = 1,
+ .cr4_pae = 1,
+ .nxe = 1,
+ .smep_andnot_wp = 1,
+ .smap_andnot_wp = 1,
+ };
/*
* If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
- mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7d6563..0ada65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,8 +71,6 @@ enum {
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ WARN_ON(pfec & PFERR_RSVD_MASK);
+
return (mmu->permissions[index] >> pte_access) & 1;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c86..6e6d115 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID))
return r;
+
+ /*
+ * page fault with PFEC.RSVD = 1 is caused by shadow
+ * page fault, should not be used to walk guest page
+ * table.
+ */
+ error_code &= ~PFERR_RSVD_MASK;
};
r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8..9afa233 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
+ .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b6168..2d73807 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
+ .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd..ea306ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
- X86_CR4_PAE | X86_CR4_SMEP;
+ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+ X86_CR4_SMEP | X86_CR4_SMAP;
+
if (cr4 & CR4_RESERVED_BITS)
return 1;
@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
- if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
- update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_page(page))
+ return;
kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
/*
@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+ if (!vcpu->arch.eager_fpu)
+ kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
trace_kvm_fpu(0);
}
@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
+ struct kvm_vcpu *vcpu;
+
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
- return kvm_x86_ops->vcpu_create(kvm, id);
+
+ vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+ /*
+ * Activate fpu unconditionally in case the guest needs eager FPU. It will be
+ * deactivated soon if it doesn't.
+ */
+ kvm_x86_ops->fpu_activate(vcpu);
+ return vcpu;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8f9a133..cab9aaa 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -90,7 +90,7 @@ struct lguest_data lguest_data = {
.noirq_iret = (u32)lguest_noirq_iret,
.kernel_address = PAGE_OFFSET,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
- .syscall_vec = SYSCALL_VECTOR,
+ .syscall_vec = IA32_SYSCALL_VECTOR,
};
/*G:037
@@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void)
for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
/* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
- if (i != SYSCALL_VECTOR)
+ if (i != IA32_SYSCALL_VECTOR)
set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 1530afb..f258788 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -17,7 +17,6 @@ clean-files := inat-tables.c
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o
-lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
@@ -40,6 +39,6 @@ else
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
- lib-y += copy_user_64.o copy_user_nocache_64.o
+ lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 00933d5..9b0ca8f 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -11,26 +11,23 @@
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
-#include <asm/dwarf2.h>
/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
- pushfl_cfi
+ pushfl
cli
.endm
.macro UNLOCK reg
- popfl_cfi
+ popfl
.endm
#define BEGIN(op) \
.macro endp; \
- CFI_ENDPROC; \
ENDPROC(atomic64_##op##_386); \
.purgem endp; \
.endm; \
ENTRY(atomic64_##op##_386); \
- CFI_STARTPROC; \
LOCK v;
#define ENDP endp
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 082a851..db3ae854 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -11,7 +11,6 @@
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
-#include <asm/dwarf2.h>
.macro read64 reg
movl %ebx, %eax
@@ -22,16 +21,11 @@
.endm
ENTRY(atomic64_read_cx8)
- CFI_STARTPROC
-
read64 %ecx
ret
- CFI_ENDPROC
ENDPROC(atomic64_read_cx8)
ENTRY(atomic64_set_cx8)
- CFI_STARTPROC
-
1:
/* we don't need LOCK_PREFIX since aligned 64-bit writes
* are atomic on 586 and newer */
@@ -39,28 +33,23 @@ ENTRY(atomic64_set_cx8)
jne 1b
ret
- CFI_ENDPROC
ENDPROC(atomic64_set_cx8)
ENTRY(atomic64_xchg_cx8)
- CFI_STARTPROC
-
1:
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
ret
- CFI_ENDPROC
ENDPROC(atomic64_xchg_cx8)
.macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
- CFI_STARTPROC
- pushl_cfi_reg ebp
- pushl_cfi_reg ebx
- pushl_cfi_reg esi
- pushl_cfi_reg edi
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
movl %eax, %esi
movl %edx, %edi
@@ -79,12 +68,11 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
- popl_cfi_reg edi
- popl_cfi_reg esi
- popl_cfi_reg ebx
- popl_cfi_reg ebp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
ret
- CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
.endm
@@ -93,8 +81,7 @@ addsub_return sub sub sbb
.macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
- CFI_STARTPROC
- pushl_cfi_reg ebx
+ pushl %ebx
read64 %esi
1:
@@ -109,9 +96,8 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
- popl_cfi_reg ebx
+ popl %ebx
ret
- CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
.endm
@@ -119,8 +105,7 @@ incdec_return inc add adc
incdec_return dec sub sbb
ENTRY(atomic64_dec_if_positive_cx8)
- CFI_STARTPROC
- pushl_cfi_reg ebx
+ pushl %ebx
read64 %esi
1:
@@ -136,18 +121,16 @@ ENTRY(atomic64_dec_if_positive_cx8)
2:
movl %ebx, %eax
movl %ecx, %edx
- popl_cfi_reg ebx
+ popl %ebx
ret
- CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8)
ENTRY(atomic64_add_unless_cx8)
- CFI_STARTPROC
- pushl_cfi_reg ebp
- pushl_cfi_reg ebx
+ pushl %ebp
+ pushl %ebx
/* these just push these two parameters on the stack */
- pushl_cfi_reg edi
- pushl_cfi_reg ecx
+ pushl %edi
+ pushl %ecx
movl %eax, %ebp
movl %edx, %edi
@@ -168,21 +151,18 @@ ENTRY(atomic64_add_unless_cx8)
movl $1, %eax
3:
addl $8, %esp
- CFI_ADJUST_CFA_OFFSET -8
- popl_cfi_reg ebx
- popl_cfi_reg ebp
+ popl %ebx
+ popl %ebp
ret
4:
cmpl %edx, 4(%esp)
jne 2b
xorl %eax, %eax
jmp 3b
- CFI_ENDPROC
ENDPROC(atomic64_add_unless_cx8)
ENTRY(atomic64_inc_not_zero_cx8)
- CFI_STARTPROC
- pushl_cfi_reg ebx
+ pushl %ebx
read64 %esi
1:
@@ -199,7 +179,6 @@ ENTRY(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
- popl_cfi_reg ebx
+ popl %ebx
ret
- CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 9bc944a..c1e6232 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -26,7 +26,6 @@
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/errno.h>
#include <asm/asm.h>
@@ -50,9 +49,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
* alignment for the unrolled loop.
*/
ENTRY(csum_partial)
- CFI_STARTPROC
- pushl_cfi_reg esi
- pushl_cfi_reg ebx
+ pushl %esi
+ pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -129,10 +127,9 @@ ENTRY(csum_partial)
jz 8f
roll $8, %eax
8:
- popl_cfi_reg ebx
- popl_cfi_reg esi
+ popl %ebx
+ popl %esi
ret
- CFI_ENDPROC
ENDPROC(csum_partial)
#else
@@ -140,9 +137,8 @@ ENDPROC(csum_partial)
/* Version for PentiumII/PPro */
ENTRY(csum_partial)
- CFI_STARTPROC
- pushl_cfi_reg esi
- pushl_cfi_reg ebx
+ pushl %esi
+ pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -249,10 +245,9 @@ ENTRY(csum_partial)
jz 90f
roll $8, %eax
90:
- popl_cfi_reg ebx
- popl_cfi_reg esi
+ popl %ebx
+ popl %esi
ret
- CFI_ENDPROC
ENDPROC(csum_partial)
#endif
@@ -287,12 +282,10 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
#define FP 12
ENTRY(csum_partial_copy_generic)
- CFI_STARTPROC
subl $4,%esp
- CFI_ADJUST_CFA_OFFSET 4
- pushl_cfi_reg edi
- pushl_cfi_reg esi
- pushl_cfi_reg ebx
+ pushl %edi
+ pushl %esi
+ pushl %ebx
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
@@ -401,12 +394,11 @@ DST( movb %cl, (%edi) )
.previous
- popl_cfi_reg ebx
- popl_cfi_reg esi
- popl_cfi_reg edi
- popl_cfi %ecx # equivalent to addl $4,%esp
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ecx # equivalent to addl $4,%esp
ret
- CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
#else
@@ -426,10 +418,9 @@ ENDPROC(csum_partial_copy_generic)
#define ARGBASE 12
ENTRY(csum_partial_copy_generic)
- CFI_STARTPROC
- pushl_cfi_reg ebx
- pushl_cfi_reg edi
- pushl_cfi_reg esi
+ pushl %ebx
+ pushl %edi
+ pushl %esi
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
@@ -489,11 +480,10 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
- popl_cfi_reg esi
- popl_cfi_reg edi
- popl_cfi_reg ebx
+ popl %esi
+ popl %edi
+ popl %ebx
ret
- CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
#undef ROUND
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index e67e579..a2fe51b 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,4 @@
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
@@ -15,7 +14,6 @@
* %rdi - page
*/
ENTRY(clear_page)
- CFI_STARTPROC
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
"jmp clear_page_c_e", X86_FEATURE_ERMS
@@ -24,11 +22,9 @@ ENTRY(clear_page)
xorl %eax,%eax
rep stosq
ret
- CFI_ENDPROC
ENDPROC(clear_page)
ENTRY(clear_page_orig)
- CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
@@ -48,14 +44,11 @@ ENTRY(clear_page_orig)
jnz .Lloop
nop
ret
- CFI_ENDPROC
ENDPROC(clear_page_orig)
ENTRY(clear_page_c_e)
- CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
- CFI_ENDPROC
ENDPROC(clear_page_c_e)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 40a1725..9b33024 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -6,7 +6,6 @@
*
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/percpu.h>
.text
@@ -21,7 +20,6 @@
* %al : Operation successful
*/
ENTRY(this_cpu_cmpxchg16b_emu)
-CFI_STARTPROC
#
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
@@ -32,7 +30,7 @@ CFI_STARTPROC
# *atomic* on a single cpu (as provided by the this_cpu_xx class of
# macros).
#
- pushfq_cfi
+ pushfq
cli
cmpq PER_CPU_VAR((%rsi)), %rax
@@ -43,17 +41,13 @@ CFI_STARTPROC
movq %rbx, PER_CPU_VAR((%rsi))
movq %rcx, PER_CPU_VAR(8(%rsi))
- CFI_REMEMBER_STATE
- popfq_cfi
+ popfq
mov $1, %al
ret
- CFI_RESTORE_STATE
.Lnot_same:
- popfq_cfi
+ popfq
xor %al,%al
ret
-CFI_ENDPROC
-
ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index b4807fce..ad53497 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -7,7 +7,6 @@
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
.text
@@ -20,14 +19,13 @@
* %ecx : high 32 bits of new value
*/
ENTRY(cmpxchg8b_emu)
-CFI_STARTPROC
#
# Emulate 'cmpxchg8b (%esi)' on UP except we don't
# set the whole ZF thing (caller will just compare
# eax:edx with the expected value)
#
- pushfl_cfi
+ pushfl
cli
cmpl (%esi), %eax
@@ -38,18 +36,15 @@ CFI_STARTPROC
movl %ebx, (%esi)
movl %ecx, 4(%esi)
- CFI_REMEMBER_STATE
- popfl_cfi
+ popfl
ret
- CFI_RESTORE_STATE
.Lnot_same:
movl (%esi), %eax
.Lhalf_same:
movl 4(%esi), %edx
- popfl_cfi
+ popfl
ret
-CFI_ENDPROC
ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 8239dbc..009f982 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,6 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
@@ -13,22 +12,16 @@
*/
ALIGN
ENTRY(copy_page)
- CFI_STARTPROC
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
- CFI_ENDPROC
ENDPROC(copy_page)
ENTRY(copy_page_regs)
- CFI_STARTPROC
subq $2*8, %rsp
- CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx, (%rsp)
- CFI_REL_OFFSET rbx, 0
movq %r12, 1*8(%rsp)
- CFI_REL_OFFSET r12, 1*8
movl $(4096/64)-5, %ecx
.p2align 4
@@ -87,11 +80,7 @@ ENTRY(copy_page_regs)
jnz .Loop2
movq (%rsp), %rbx
- CFI_RESTORE rbx
movq 1*8(%rsp), %r12
- CFI_RESTORE r12
addq $2*8, %rsp
- CFI_ADJUST_CFA_OFFSET -2*8
ret
- CFI_ENDPROC
ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index fa997df..982ce34 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -7,7 +7,6 @@
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
@@ -16,33 +15,8 @@
#include <asm/asm.h>
#include <asm/smap.h>
- .macro ALIGN_DESTINATION
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(100b,103b)
- _ASM_EXTABLE(101b,103b)
- .endm
-
/* Standard copy_to_user with segment limit checking */
ENTRY(_copy_to_user)
- CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
@@ -54,12 +28,10 @@ ENTRY(_copy_to_user)
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
- CFI_ENDPROC
ENDPROC(_copy_to_user)
/* Standard copy_from_user with segment limit checking */
ENTRY(_copy_from_user)
- CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
@@ -71,14 +43,12 @@ ENTRY(_copy_from_user)
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
- CFI_ENDPROC
ENDPROC(_copy_from_user)
.section .fixup,"ax"
/* must zero dest */
ENTRY(bad_from_user)
bad_from_user:
- CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
@@ -86,7 +56,6 @@ bad_from_user:
bad_to_user:
movl %edx,%eax
ret
- CFI_ENDPROC
ENDPROC(bad_from_user)
.previous
@@ -104,7 +73,6 @@ ENDPROC(bad_from_user)
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_generic_unrolled)
- CFI_STARTPROC
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -186,7 +154,6 @@ ENTRY(copy_user_generic_unrolled)
_ASM_EXTABLE(19b,40b)
_ASM_EXTABLE(21b,50b)
_ASM_EXTABLE(22b,50b)
- CFI_ENDPROC
ENDPROC(copy_user_generic_unrolled)
/* Some CPUs run faster using the string copy instructions.
@@ -208,7 +175,6 @@ ENDPROC(copy_user_generic_unrolled)
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_generic_string)
- CFI_STARTPROC
ASM_STAC
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
@@ -233,7 +199,6 @@ ENTRY(copy_user_generic_string)
_ASM_EXTABLE(1b,11b)
_ASM_EXTABLE(3b,12b)
- CFI_ENDPROC
ENDPROC(copy_user_generic_string)
/*
@@ -249,7 +214,6 @@ ENDPROC(copy_user_generic_string)
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_enhanced_fast_string)
- CFI_STARTPROC
ASM_STAC
movl %edx,%ecx
1: rep
@@ -264,5 +228,94 @@ ENTRY(copy_user_enhanced_fast_string)
.previous
_ASM_EXTABLE(1b,12b)
- CFI_ENDPROC
ENDPROC(copy_user_enhanced_fast_string)
+
+/*
+ * copy_user_nocache - Uncached memory copy with exception handling
+ * This will force destination/source out of cache for more performance.
+ */
+ENTRY(__copy_user_nocache)
+ ASM_STAC
+ cmpl $8,%edx
+ jb 20f /* less then 8 bytes, go to byte copy loop */
+ ALIGN_DESTINATION
+ movl %edx,%ecx
+ andl $63,%edx
+ shrl $6,%ecx
+ jz 17f
+1: movq (%rsi),%r8
+2: movq 1*8(%rsi),%r9
+3: movq 2*8(%rsi),%r10
+4: movq 3*8(%rsi),%r11
+5: movnti %r8,(%rdi)
+6: movnti %r9,1*8(%rdi)
+7: movnti %r10,2*8(%rdi)
+8: movnti %r11,3*8(%rdi)
+9: movq 4*8(%rsi),%r8
+10: movq 5*8(%rsi),%r9
+11: movq 6*8(%rsi),%r10
+12: movq 7*8(%rsi),%r11
+13: movnti %r8,4*8(%rdi)
+14: movnti %r9,5*8(%rdi)
+15: movnti %r10,6*8(%rdi)
+16: movnti %r11,7*8(%rdi)
+ leaq 64(%rsi),%rsi
+ leaq 64(%rdi),%rdi
+ decl %ecx
+ jnz 1b
+17: movl %edx,%ecx
+ andl $7,%edx
+ shrl $3,%ecx
+ jz 20f
+18: movq (%rsi),%r8
+19: movnti %r8,(%rdi)
+ leaq 8(%rsi),%rsi
+ leaq 8(%rdi),%rdi
+ decl %ecx
+ jnz 18b
+20: andl %edx,%edx
+ jz 23f
+ movl %edx,%ecx
+21: movb (%rsi),%al
+22: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 21b
+23: xorl %eax,%eax
+ ASM_CLAC
+ sfence
+ ret
+
+ .section .fixup,"ax"
+30: shll $6,%ecx
+ addl %ecx,%edx
+ jmp 60f
+40: lea (%rdx,%rcx,8),%rdx
+ jmp 60f
+50: movl %ecx,%edx
+60: sfence
+ jmp copy_user_handle_tail
+ .previous
+
+ _ASM_EXTABLE(1b,30b)
+ _ASM_EXTABLE(2b,30b)
+ _ASM_EXTABLE(3b,30b)
+ _ASM_EXTABLE(4b,30b)
+ _ASM_EXTABLE(5b,30b)
+ _ASM_EXTABLE(6b,30b)
+ _ASM_EXTABLE(7b,30b)
+ _ASM_EXTABLE(8b,30b)
+ _ASM_EXTABLE(9b,30b)
+ _ASM_EXTABLE(10b,30b)
+ _ASM_EXTABLE(11b,30b)
+ _ASM_EXTABLE(12b,30b)
+ _ASM_EXTABLE(13b,30b)
+ _ASM_EXTABLE(14b,30b)
+ _ASM_EXTABLE(15b,30b)
+ _ASM_EXTABLE(16b,30b)
+ _ASM_EXTABLE(18b,40b)
+ _ASM_EXTABLE(19b,40b)
+ _ASM_EXTABLE(21b,50b)
+ _ASM_EXTABLE(22b,50b)
+ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
deleted file mode 100644
index 6a4f43c..0000000
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-
- .macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(100b,103b)
- _ASM_EXTABLE(101b,103b)
-#endif
- .endm
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
- */
-ENTRY(__copy_user_nocache)
- CFI_STARTPROC
- ASM_STAC
- cmpl $8,%edx
- jb 20f /* less then 8 bytes, go to byte copy loop */
- ALIGN_DESTINATION
- movl %edx,%ecx
- andl $63,%edx
- shrl $6,%ecx
- jz 17f
-1: movq (%rsi),%r8
-2: movq 1*8(%rsi),%r9
-3: movq 2*8(%rsi),%r10
-4: movq 3*8(%rsi),%r11
-5: movnti %r8,(%rdi)
-6: movnti %r9,1*8(%rdi)
-7: movnti %r10,2*8(%rdi)
-8: movnti %r11,3*8(%rdi)
-9: movq 4*8(%rsi),%r8
-10: movq 5*8(%rsi),%r9
-11: movq 6*8(%rsi),%r10
-12: movq 7*8(%rsi),%r11
-13: movnti %r8,4*8(%rdi)
-14: movnti %r9,5*8(%rdi)
-15: movnti %r10,6*8(%rdi)
-16: movnti %r11,7*8(%rdi)
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
- decl %ecx
- jnz 1b
-17: movl %edx,%ecx
- andl $7,%edx
- shrl $3,%ecx
- jz 20f
-18: movq (%rsi),%r8
-19: movnti %r8,(%rdi)
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
- decl %ecx
- jnz 18b
-20: andl %edx,%edx
- jz 23f
- movl %edx,%ecx
-21: movb (%rsi),%al
-22: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 21b
-23: xorl %eax,%eax
- ASM_CLAC
- sfence
- ret
-
- .section .fixup,"ax"
-30: shll $6,%ecx
- addl %ecx,%edx
- jmp 60f
-40: lea (%rdx,%rcx,8),%rdx
- jmp 60f
-50: movl %ecx,%edx
-60: sfence
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(1b,30b)
- _ASM_EXTABLE(2b,30b)
- _ASM_EXTABLE(3b,30b)
- _ASM_EXTABLE(4b,30b)
- _ASM_EXTABLE(5b,30b)
- _ASM_EXTABLE(6b,30b)
- _ASM_EXTABLE(7b,30b)
- _ASM_EXTABLE(8b,30b)
- _ASM_EXTABLE(9b,30b)
- _ASM_EXTABLE(10b,30b)
- _ASM_EXTABLE(11b,30b)
- _ASM_EXTABLE(12b,30b)
- _ASM_EXTABLE(13b,30b)
- _ASM_EXTABLE(14b,30b)
- _ASM_EXTABLE(15b,30b)
- _ASM_EXTABLE(16b,30b)
- _ASM_EXTABLE(18b,40b)
- _ASM_EXTABLE(19b,40b)
- _ASM_EXTABLE(21b,50b)
- _ASM_EXTABLE(22b,50b)
- CFI_ENDPROC
-ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 9734182..7e48807 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -6,7 +6,6 @@
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/errno.h>
#include <asm/asm.h>
@@ -47,23 +46,16 @@
ENTRY(csum_partial_copy_generic)
- CFI_STARTPROC
cmpl $3*64, %edx
jle .Lignore
.Lignore:
subq $7*8, %rsp
- CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx, 2*8(%rsp)
- CFI_REL_OFFSET rbx, 2*8
movq %r12, 3*8(%rsp)
- CFI_REL_OFFSET r12, 3*8
movq %r14, 4*8(%rsp)
- CFI_REL_OFFSET r14, 4*8
movq %r13, 5*8(%rsp)
- CFI_REL_OFFSET r13, 5*8
movq %rbp, 6*8(%rsp)
- CFI_REL_OFFSET rbp, 6*8
movq %r8, (%rsp)
movq %r9, 1*8(%rsp)
@@ -206,22 +198,14 @@ ENTRY(csum_partial_copy_generic)
addl %ebx, %eax
adcl %r9d, %eax /* carry */
- CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp), %rbx
- CFI_RESTORE rbx
movq 3*8(%rsp), %r12
- CFI_RESTORE r12
movq 4*8(%rsp), %r14
- CFI_RESTORE r14
movq 5*8(%rsp), %r13
- CFI_RESTORE r13
movq 6*8(%rsp), %rbp
- CFI_RESTORE rbp
addq $7*8, %rsp
- CFI_ADJUST_CFA_OFFSET -7*8
ret
- CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
@@ -237,5 +221,4 @@ ENTRY(csum_partial_copy_generic)
jz .Lende
movl $-EFAULT, (%rax)
jmp .Lende
- CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index a451235..46668cd 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -26,7 +26,6 @@
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
@@ -36,7 +35,6 @@
.text
ENTRY(__get_user_1)
- CFI_STARTPROC
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
@@ -45,11 +43,9 @@ ENTRY(__get_user_1)
xor %eax,%eax
ASM_CLAC
ret
- CFI_ENDPROC
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
- CFI_STARTPROC
add $1,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
@@ -60,11 +56,9 @@ ENTRY(__get_user_2)
xor %eax,%eax
ASM_CLAC
ret
- CFI_ENDPROC
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
- CFI_STARTPROC
add $3,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
@@ -75,11 +69,9 @@ ENTRY(__get_user_4)
xor %eax,%eax
ASM_CLAC
ret
- CFI_ENDPROC
ENDPROC(__get_user_4)
ENTRY(__get_user_8)
- CFI_STARTPROC
#ifdef CONFIG_X86_64
add $7,%_ASM_AX
jc bad_get_user
@@ -104,28 +96,23 @@ ENTRY(__get_user_8)
ASM_CLAC
ret
#endif
- CFI_ENDPROC
ENDPROC(__get_user_8)
bad_get_user:
- CFI_STARTPROC
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
ASM_CLAC
ret
- CFI_ENDPROC
END(bad_get_user)
#ifdef CONFIG_X86_32
bad_get_user_8:
- CFI_STARTPROC
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
ASM_CLAC
ret
- CFI_ENDPROC
END(bad_get_user_8)
#endif
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index 05a95e7..33147fe 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -16,15 +16,12 @@
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
/*
* override generic version in lib/iomap_copy.c
*/
ENTRY(__iowrite32_copy)
- CFI_STARTPROC
movl %edx,%ecx
rep movsd
ret
- CFI_ENDPROC
ENDPROC(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index b046664..16698bb 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -2,7 +2,6 @@
#include <linux/linkage.h>
#include <asm/cpufeature.h>
-#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
@@ -53,7 +52,6 @@ ENTRY(memcpy_erms)
ENDPROC(memcpy_erms)
ENTRY(memcpy_orig)
- CFI_STARTPROC
movq %rdi, %rax
cmpq $0x20, %rdx
@@ -178,5 +176,4 @@ ENTRY(memcpy_orig)
.Lend:
retq
- CFI_ENDPROC
ENDPROC(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0f8a0d0..ca2afdd 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,6 @@
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
@@ -27,7 +26,6 @@
ENTRY(memmove)
ENTRY(__memmove)
- CFI_STARTPROC
/* Handle more 32 bytes in loop */
mov %rdi, %rax
@@ -207,6 +205,5 @@ ENTRY(__memmove)
movb %r11b, (%rdi)
13:
retq
- CFI_ENDPROC
ENDPROC(__memmove)
ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 93118fb..2661fad 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,6 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
@@ -66,7 +65,6 @@ ENTRY(memset_erms)
ENDPROC(memset_erms)
ENTRY(memset_orig)
- CFI_STARTPROC
movq %rdi,%r10
/* expand byte value */
@@ -78,7 +76,6 @@ ENTRY(memset_orig)
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
- CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movq %rdx,%rcx
@@ -128,7 +125,6 @@ ENTRY(memset_orig)
movq %r10,%rax
ret
- CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%rdx
jbe .Lhandle_7
@@ -139,5 +135,4 @@ ENTRY(memset_orig)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
- CFI_ENDPROC
ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 3ca5218..c815564 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -1,6 +1,5 @@
#include <linux/linkage.h>
#include <linux/errno.h>
-#include <asm/dwarf2.h>
#include <asm/asm.h>
#include <asm/msr.h>
@@ -13,9 +12,8 @@
*/
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
- CFI_STARTPROC
- pushq_cfi_reg rbx
- pushq_cfi_reg rbp
+ pushq %rbx
+ pushq %rbp
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
@@ -25,7 +23,6 @@ ENTRY(\op\()_safe_regs)
movl 20(%rdi), %ebp
movl 24(%rdi), %esi
movl 28(%rdi), %edi
- CFI_REMEMBER_STATE
1: \op
2: movl %eax, (%r10)
movl %r11d, %eax /* Return value */
@@ -35,16 +32,14 @@ ENTRY(\op\()_safe_regs)
movl %ebp, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
- popq_cfi_reg rbp
- popq_cfi_reg rbx
+ popq %rbp
+ popq %rbx
ret
3:
- CFI_RESTORE_STATE
movl $-EIO, %r11d
jmp 2b
_ASM_EXTABLE(1b, 3b)
- CFI_ENDPROC
ENDPROC(\op\()_safe_regs)
.endm
@@ -52,13 +47,12 @@ ENDPROC(\op\()_safe_regs)
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
- CFI_STARTPROC
- pushl_cfi_reg ebx
- pushl_cfi_reg ebp
- pushl_cfi_reg esi
- pushl_cfi_reg edi
- pushl_cfi $0 /* Return value */
- pushl_cfi %eax
+ pushl %ebx
+ pushl %ebp
+ pushl %esi
+ pushl %edi
+ pushl $0 /* Return value */
+ pushl %eax
movl 4(%eax), %ecx
movl 8(%eax), %edx
movl 12(%eax), %ebx
@@ -66,32 +60,28 @@ ENTRY(\op\()_safe_regs)
movl 24(%eax), %esi
movl 28(%eax), %edi
movl (%eax), %eax
- CFI_REMEMBER_STATE
1: \op
-2: pushl_cfi %eax
+2: pushl %eax
movl 4(%esp), %eax
- popl_cfi (%eax)
+ popl (%eax)
addl $4, %esp
- CFI_ADJUST_CFA_OFFSET -4
movl %ecx, 4(%eax)
movl %edx, 8(%eax)
movl %ebx, 12(%eax)
movl %ebp, 20(%eax)
movl %esi, 24(%eax)
movl %edi, 28(%eax)
- popl_cfi %eax
- popl_cfi_reg edi
- popl_cfi_reg esi
- popl_cfi_reg ebp
- popl_cfi_reg ebx
+ popl %eax
+ popl %edi
+ popl %esi
+ popl %ebp
+ popl %ebx
ret
3:
- CFI_RESTORE_STATE
movl $-EIO, 4(%esp)
jmp 2b
_ASM_EXTABLE(1b, 3b)
- CFI_ENDPROC
ENDPROC(\op\()_safe_regs)
.endm
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index fc6ba17..e0817a1 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -11,7 +11,6 @@
* return value.
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
#include <asm/asm.h>
@@ -30,11 +29,9 @@
* as they get called from within inline assembly.
*/
-#define ENTER CFI_STARTPROC ; \
- GET_THREAD_INFO(%_ASM_BX)
+#define ENTER GET_THREAD_INFO(%_ASM_BX)
#define EXIT ASM_CLAC ; \
- ret ; \
- CFI_ENDPROC
+ ret
.text
ENTRY(__put_user_1)
@@ -87,7 +84,6 @@ ENTRY(__put_user_8)
ENDPROC(__put_user_8)
bad_put_user:
- CFI_STARTPROC
movl $-EFAULT,%eax
EXIT
END(bad_put_user)
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 2322abe..40027db 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -15,7 +15,6 @@
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
-#include <asm/dwarf2.h>
#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
@@ -34,10 +33,10 @@
*/
#define save_common_regs \
- pushl_cfi_reg ecx
+ pushl %ecx
#define restore_common_regs \
- popl_cfi_reg ecx
+ popl %ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst
@@ -64,50 +63,45 @@
*/
#define save_common_regs \
- pushq_cfi_reg rdi; \
- pushq_cfi_reg rsi; \
- pushq_cfi_reg rcx; \
- pushq_cfi_reg r8; \
- pushq_cfi_reg r9; \
- pushq_cfi_reg r10; \
- pushq_cfi_reg r11
+ pushq %rdi; \
+ pushq %rsi; \
+ pushq %rcx; \
+ pushq %r8; \
+ pushq %r9; \
+ pushq %r10; \
+ pushq %r11
#define restore_common_regs \
- popq_cfi_reg r11; \
- popq_cfi_reg r10; \
- popq_cfi_reg r9; \
- popq_cfi_reg r8; \
- popq_cfi_reg rcx; \
- popq_cfi_reg rsi; \
- popq_cfi_reg rdi
+ popq %r11; \
+ popq %r10; \
+ popq %r9; \
+ popq %r8; \
+ popq %rcx; \
+ popq %rsi; \
+ popq %rdi
#endif
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
- CFI_STARTPROC
save_common_regs
- __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
+ __ASM_SIZE(push,) %__ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed
- __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
+ __ASM_SIZE(pop,) %__ASM_REG(dx)
restore_common_regs
ret
- CFI_ENDPROC
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
- CFI_STARTPROC
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
- CFI_ENDPROC
ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
- CFI_STARTPROC
/* do nothing if still outstanding active readers */
__ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
jnz 1f
@@ -116,17 +110,14 @@ ENTRY(call_rwsem_wake)
call rwsem_wake
restore_common_regs
1: ret
- CFI_ENDPROC
ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake)
- CFI_STARTPROC
save_common_regs
- __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
+ __ASM_SIZE(push,) %__ASM_REG(dx)
movq %rax,%rdi
call rwsem_downgrade_wake
- __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
+ __ASM_SIZE(pop,) %__ASM_REG(dx)
restore_common_regs
ret
- CFI_ENDPROC
ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d55318..8533b46 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -40,7 +40,7 @@
*/
uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
[_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
- [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 ,
+ [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
[_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
@@ -50,11 +50,11 @@ EXPORT_SYMBOL(__cachemode2pte_tbl);
uint8_t __pte2cachemode_tbl[8] = {
[__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
- [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
[__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
- [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+ [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
};
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9ca35fc..a9dc7a3 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -77,13 +77,13 @@ void __iomem *
iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
/*
- * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
- * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
- * MTRR is UC or WC. UC_MINUS gets the real intention, of the
- * user, which is "WC if the MTRR is WC, UC if you can't do that."
+ * For non-PAT systems, translate non-WB request to UC- just in
+ * case the caller set the PWT bit to prot directly without using
+ * pgprot_writecombine(). UC- translates to uncached if the MTRR
+ * is UC or WC. UC- gets the real intention, of the user, which is
+ * "WC if the MTRR is WC, UC if you can't do that."
*/
- if (!pat_enabled && pgprot_val(prot) ==
- (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC)))
+ if (!pat_enabled() && pgprot2cachemode(prot) != _PAGE_CACHE_MODE_WB)
prot = __pgprot(__PAGE_KERNEL |
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5ead4d6c..8405c0c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -42,6 +42,9 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
case _PAGE_CACHE_MODE_WC:
err = _set_memory_wc(vaddr, nrpages);
break;
+ case _PAGE_CACHE_MODE_WT:
+ err = _set_memory_wt(vaddr, nrpages);
+ break;
case _PAGE_CACHE_MODE_WB:
err = _set_memory_wb(vaddr, nrpages);
break;
@@ -172,6 +175,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
prot = __pgprot(pgprot_val(prot) |
cachemode2protval(_PAGE_CACHE_MODE_WC));
break;
+ case _PAGE_CACHE_MODE_WT:
+ prot = __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_WT));
+ break;
case _PAGE_CACHE_MODE_WB:
break;
}
@@ -234,10 +241,11 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
/*
* Ideally, this should be:
- * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
+ * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
*
* Till we fix all X drivers to use ioremap_wc(), we will use
- * UC MINUS.
+ * UC MINUS. Drivers that are certain they need or can already
+ * be converted over to strong UC can use ioremap_uc().
*/
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
@@ -247,6 +255,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
EXPORT_SYMBOL(ioremap_nocache);
/**
+ * ioremap_uc - map bus memory into CPU space as strongly uncachable
+ * @phys_addr: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap_uc performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked with a strong
+ * preference as completely uncachable on the CPU when possible. For non-PAT
+ * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
+ * systems this will set the PAT entry for the pages as strong UC. This call
+ * will honor existing caching rules from things like the PCI bus. Note that
+ * there are other caches and buffers on many busses. In particular driver
+ * authors should read up on PCI writes.
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
+{
+ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
+
+ return __ioremap_caller(phys_addr, size, pcm,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL_GPL(ioremap_uc);
+
+/**
* ioremap_wc - map memory into CPU space write combined
* @phys_addr: bus address of the memory
* @size: size of the resource to map
@@ -258,14 +299,28 @@ EXPORT_SYMBOL(ioremap_nocache);
*/
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
- if (pat_enabled)
- return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
__builtin_return_address(0));
- else
- return ioremap_nocache(phys_addr, size);
}
EXPORT_SYMBOL(ioremap_wc);
+/**
+ * ioremap_wt - map memory into CPU space write through
+ * @phys_addr: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * This version of ioremap ensures that the memory is marked write through.
+ * Write through stores data into memory while keeping the cache up-to-date.
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
+{
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
@@ -331,7 +386,7 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int arch_ioremap_pud_supported(void)
+int __init arch_ioremap_pud_supported(void)
{
#ifdef CONFIG_X86_64
return cpu_has_gbpages;
@@ -340,7 +395,7 @@ int arch_ioremap_pud_supported(void)
#endif
}
-int arch_ioremap_pmd_supported(void)
+int __init arch_ioremap_pmd_supported(void)
{
return cpu_has_pse;
}
@@ -351,18 +406,20 @@ int arch_ioremap_pmd_supported(void)
*/
void *xlate_dev_mem_ptr(phys_addr_t phys)
{
- void *addr;
- unsigned long start = phys & PAGE_MASK;
+ unsigned long start = phys & PAGE_MASK;
+ unsigned long offset = phys & ~PAGE_MASK;
+ unsigned long vaddr;
/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
- if (addr)
- addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+ vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE);
+ /* Only add the offset on success and return NULL if the ioremap() failed: */
+ if (vaddr)
+ vaddr += offset;
- return addr;
+ return (void *)vaddr;
}
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 6629f39..8ff686a 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -9,6 +9,7 @@
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 89af288..727158c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/gfp.h>
#include <linux/pci.h>
+#include <linux/vmalloc.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -129,16 +130,15 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
- void *vend = vaddr + size - 1;
+ unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+ void *vend = vaddr + size;
+ void *p;
mb();
- for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
- clflushopt(vaddr);
- /*
- * Flush any possible final partial cacheline:
- */
- clflushopt(vend);
+ for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+ p < vend; p += boot_cpu_data.x86_clflush_size)
+ clflushopt(p);
mb();
}
@@ -418,13 +418,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
phys_addr_t phys_addr;
unsigned long offset;
enum pg_level level;
- unsigned long psize;
unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
- psize = page_level_size(level);
pmask = page_level_mask(level);
offset = virt_addr & ~pmask;
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@@ -1468,6 +1466,9 @@ int _set_memory_uc(unsigned long addr, int numpages)
{
/*
* for now UC MINUS. see comments in ioremap_nocache()
+ * If you really need strong UC use ioremap_uc(), but note
+ * that you cannot override IO areas with set_memory_*() as
+ * these helpers cannot work with IO memory.
*/
return change_page_attr_set(&addr, numpages,
cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
@@ -1502,12 +1503,10 @@ EXPORT_SYMBOL(set_memory_uc);
static int _set_memory_array(unsigned long *addr, int addrinarray,
enum page_cache_mode new_type)
{
+ enum page_cache_mode set_type;
int i, j;
int ret;
- /*
- * for now UC MINUS. see comments in ioremap_nocache()
- */
for (i = 0; i < addrinarray; i++) {
ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
new_type, NULL);
@@ -1515,9 +1514,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray,
goto out_free;
}
+ /* If WC, set to UC- first and then WC */
+ set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
+ _PAGE_CACHE_MODE_UC_MINUS : new_type;
+
ret = change_page_attr_set(addr, addrinarray,
- cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
- 1);
+ cachemode2pgprot(set_type), 1);
if (!ret && new_type == _PAGE_CACHE_MODE_WC)
ret = change_page_attr_set_clr(addr, addrinarray,
@@ -1549,6 +1551,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray)
}
EXPORT_SYMBOL(set_memory_array_wc);
+int set_memory_array_wt(unsigned long *addr, int addrinarray)
+{
+ return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT);
+}
+EXPORT_SYMBOL_GPL(set_memory_array_wt);
+
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
@@ -1571,27 +1579,42 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
- if (!pat_enabled)
- return set_memory_uc(addr, numpages);
-
ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_WC, NULL);
if (ret)
- goto out_err;
+ return ret;
ret = _set_memory_wc(addr, numpages);
if (ret)
- goto out_free;
-
- return 0;
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-out_free:
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-out_err:
return ret;
}
EXPORT_SYMBOL(set_memory_wc);
+int _set_memory_wt(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages,
+ cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
+}
+
+int set_memory_wt(unsigned long addr, int numpages)
+{
+ int ret;
+
+ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ _PAGE_CACHE_MODE_WT, NULL);
+ if (ret)
+ return ret;
+
+ ret = _set_memory_wt(addr, numpages);
+ if (ret)
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(set_memory_wt);
+
int _set_memory_wb(unsigned long addr, int numpages)
{
/* WB cache mode is hard wired to all cache attribute bits being 0 */
@@ -1682,6 +1705,7 @@ static int _set_pages_array(struct page **pages, int addrinarray,
{
unsigned long start;
unsigned long end;
+ enum page_cache_mode set_type;
int i;
int free_idx;
int ret;
@@ -1695,8 +1719,12 @@ static int _set_pages_array(struct page **pages, int addrinarray,
goto err_out;
}
+ /* If WC, set to UC- first and then WC */
+ set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
+ _PAGE_CACHE_MODE_UC_MINUS : new_type;
+
ret = cpa_set_pages_array(pages, addrinarray,
- cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS));
+ cachemode2pgprot(set_type));
if (!ret && new_type == _PAGE_CACHE_MODE_WC)
ret = change_page_attr_set_clr(NULL, addrinarray,
cachemode2pgprot(
@@ -1730,6 +1758,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray)
}
EXPORT_SYMBOL(set_pages_array_wc);
+int set_pages_array_wt(struct page **pages, int addrinarray)
+{
+ return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT);
+}
+EXPORT_SYMBOL_GPL(set_pages_array_wt);
+
int set_pages_wb(struct page *page, int numpages)
{
unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 35af677..188e3e0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -33,13 +33,17 @@
#include "pat_internal.h"
#include "mm_internal.h"
-#ifdef CONFIG_X86_PAT
-int __read_mostly pat_enabled = 1;
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static bool boot_cpu_done;
+
+static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
static inline void pat_disable(const char *reason)
{
- pat_enabled = 0;
- printk(KERN_INFO "%s\n", reason);
+ __pat_enabled = 0;
+ pr_info("x86/PAT: %s\n", reason);
}
static int __init nopat(char *str)
@@ -48,13 +52,12 @@ static int __init nopat(char *str)
return 0;
}
early_param("nopat", nopat);
-#else
-static inline void pat_disable(const char *reason)
+
+bool pat_enabled(void)
{
- (void)reason;
+ return !!__pat_enabled;
}
-#endif
-
+EXPORT_SYMBOL_GPL(pat_enabled);
int pat_debug_enable;
@@ -65,22 +68,24 @@ static int __init pat_debug_setup(char *str)
}
__setup("debugpat", pat_debug_setup);
-static u64 __read_mostly boot_pat_state;
-
#ifdef CONFIG_X86_PAT
/*
- * X86 PAT uses page flags WC and Uncached together to keep track of
- * memory type of pages that have backing page struct. X86 PAT supports 3
- * different memory types, _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC and
- * _PAGE_CACHE_MODE_UC_MINUS and fourth state where page's memory type has not
- * been changed from its default (value of -1 used to denote this).
- * Note we do not support _PAGE_CACHE_MODE_UC here.
+ * X86 PAT uses page flags arch_1 and uncached together to keep track of
+ * memory type of pages that have backing page struct.
+ *
+ * X86 PAT supports 4 different memory types:
+ * - _PAGE_CACHE_MODE_WB
+ * - _PAGE_CACHE_MODE_WC
+ * - _PAGE_CACHE_MODE_UC_MINUS
+ * - _PAGE_CACHE_MODE_WT
+ *
+ * _PAGE_CACHE_MODE_WB is the default type.
*/
-#define _PGMT_DEFAULT 0
+#define _PGMT_WB 0
#define _PGMT_WC (1UL << PG_arch_1)
#define _PGMT_UC_MINUS (1UL << PG_uncached)
-#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
@@ -88,14 +93,14 @@ static inline enum page_cache_mode get_page_memtype(struct page *pg)
{
unsigned long pg_flags = pg->flags & _PGMT_MASK;
- if (pg_flags == _PGMT_DEFAULT)
- return -1;
+ if (pg_flags == _PGMT_WB)
+ return _PAGE_CACHE_MODE_WB;
else if (pg_flags == _PGMT_WC)
return _PAGE_CACHE_MODE_WC;
else if (pg_flags == _PGMT_UC_MINUS)
return _PAGE_CACHE_MODE_UC_MINUS;
else
- return _PAGE_CACHE_MODE_WB;
+ return _PAGE_CACHE_MODE_WT;
}
static inline void set_page_memtype(struct page *pg,
@@ -112,11 +117,12 @@ static inline void set_page_memtype(struct page *pg,
case _PAGE_CACHE_MODE_UC_MINUS:
memtype_flags = _PGMT_UC_MINUS;
break;
- case _PAGE_CACHE_MODE_WB:
- memtype_flags = _PGMT_WB;
+ case _PAGE_CACHE_MODE_WT:
+ memtype_flags = _PGMT_WT;
break;
+ case _PAGE_CACHE_MODE_WB:
default:
- memtype_flags = _PGMT_DEFAULT;
+ memtype_flags = _PGMT_WB;
break;
}
@@ -174,78 +180,154 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-void pat_init_cache_modes(void)
+void pat_init_cache_modes(u64 pat)
{
- int i;
enum page_cache_mode cache;
char pat_msg[33];
- u64 pat;
+ int i;
- rdmsrl(MSR_IA32_CR_PAT, pat);
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
pat_msg + 4 * i);
update_cache_mode_entry(i, cache);
}
- pr_info("PAT configuration [0-7]: %s\n", pat_msg);
+ pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
-void pat_init(void)
+static void pat_bsp_init(u64 pat)
{
- u64 pat;
- bool boot_cpu = !boot_pat_state;
+ u64 tmp_pat;
- if (!pat_enabled)
+ if (!cpu_has_pat) {
+ pat_disable("PAT not supported by CPU.");
return;
+ }
- if (!cpu_has_pat) {
- if (!boot_pat_state) {
- pat_disable("PAT not supported by CPU.");
- return;
- } else {
- /*
- * If this happens we are on a secondary CPU, but
- * switched to PAT on the boot CPU. We have no way to
- * undo PAT.
- */
- printk(KERN_ERR "PAT enabled, "
- "but not supported by secondary CPU\n");
- BUG();
- }
+ if (!pat_enabled())
+ goto done;
+
+ rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+ if (!tmp_pat) {
+ pat_disable("PAT MSR is 0, disabled.");
+ return;
}
- /* Set PWT to Write-Combining. All other bits stay the same */
- /*
- * PTE encoding used in Linux:
- * PAT
- * |PCD
- * ||PWT
- * |||
- * 000 WB _PAGE_CACHE_WB
- * 001 WC _PAGE_CACHE_WC
- * 010 UC- _PAGE_CACHE_UC_MINUS
- * 011 UC _PAGE_CACHE_UC
- * PAT bit unused
- */
- pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
-
- /* Boot CPU check */
- if (!boot_pat_state) {
- rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
- if (!boot_pat_state) {
- pat_disable("PAT read returns always zero, disabled.");
- return;
- }
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+
+done:
+ pat_init_cache_modes(pat);
+}
+
+static void pat_ap_init(u64 pat)
+{
+ if (!pat_enabled())
+ return;
+
+ if (!cpu_has_pat) {
+ /*
+ * If this happens we are on a secondary CPU, but switched to
+ * PAT on the boot CPU. We have no way to undo PAT.
+ */
+ panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
wrmsrl(MSR_IA32_CR_PAT, pat);
+}
+
+void pat_init(void)
+{
+ u64 pat;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!pat_enabled()) {
+ /*
+ * No PAT. Emulate the PAT table that corresponds to the two
+ * cache bits, PWT (Write Through) and PCD (Cache Disable). This
+ * setup is the same as the BIOS default setup when the system
+ * has PAT but the "nopat" boot option has been specified. This
+ * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+ *
+ * PTE encoding:
+ *
+ * PCD
+ * |PWT PAT
+ * || slot
+ * 00 0 WB : _PAGE_CACHE_MODE_WB
+ * 01 1 WT : _PAGE_CACHE_MODE_WT
+ * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 11 3 UC : _PAGE_CACHE_MODE_UC
+ *
+ * NOTE: When WC or WP is used, it is redirected to UC- per
+ * the default setup in __cachemode2pte_tbl[].
+ */
+ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
- if (boot_cpu)
- pat_init_cache_modes();
+ } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ /*
+ * PAT support with the lower four entries. Intel Pentium 2,
+ * 3, M, and 4 are affected by PAT errata, which makes the
+ * upper four entries unusable. To be on the safe side, we don't
+ * use those.
+ *
+ * PTE encoding:
+ * PAT
+ * |PCD
+ * ||PWT PAT
+ * ||| slot
+ * 000 0 WB : _PAGE_CACHE_MODE_WB
+ * 001 1 WC : _PAGE_CACHE_MODE_WC
+ * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 011 3 UC : _PAGE_CACHE_MODE_UC
+ * PAT bit unused
+ *
+ * NOTE: When WT or WP is used, it is redirected to UC- per
+ * the default setup in __cachemode2pte_tbl[].
+ */
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+ } else {
+ /*
+ * Full PAT support. We put WT in slot 7 to improve
+ * robustness in the presence of errata that might cause
+ * the high PAT bit to be ignored. This way, a buggy slot 7
+ * access will hit slot 3, and slot 3 is UC, so at worst
+ * we lose performance without causing a correctness issue.
+ * Pentium 4 erratum N46 is an example for such an erratum,
+ * although we try not to use PAT at all on affected CPUs.
+ *
+ * PTE encoding:
+ * PAT
+ * |PCD
+ * ||PWT PAT
+ * ||| slot
+ * 000 0 WB : _PAGE_CACHE_MODE_WB
+ * 001 1 WC : _PAGE_CACHE_MODE_WC
+ * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 011 3 UC : _PAGE_CACHE_MODE_UC
+ * 100 4 WB : Reserved
+ * 101 5 WC : Reserved
+ * 110 6 UC-: Reserved
+ * 111 7 WT : _PAGE_CACHE_MODE_WT
+ *
+ * The reserved slots are unused, but mapped to their
+ * corresponding types in the presence of PAT errata.
+ */
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ }
+
+ if (!boot_cpu_done) {
+ pat_bsp_init(pat);
+ boot_cpu_done = true;
+ } else {
+ pat_ap_init(pat);
+ }
}
#undef PAT
@@ -267,9 +349,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
* request is for WB.
*/
if (req_type == _PAGE_CACHE_MODE_WB) {
- u8 mtrr_type;
+ u8 mtrr_type, uniform;
- mtrr_type = mtrr_type_lookup(start, end);
+ mtrr_type = mtrr_type_lookup(start, end, &uniform);
if (mtrr_type != MTRR_TYPE_WRBACK)
return _PAGE_CACHE_MODE_UC_MINUS;
@@ -324,9 +406,14 @@ static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
/*
* For RAM pages, we use page flags to mark the pages with appropriate type.
- * Here we do two pass:
- * - Find the memtype of all the pages in the range, look for any conflicts
- * - In case of no conflicts, set the new memtype for pages in the range
+ * The page flags are limited to four types, WB (default), WC, WT and UC-.
+ * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting
+ * a new memory type is only allowed for a page mapped with the default WB
+ * type.
+ *
+ * Here we do two passes:
+ * - Find the memtype of all the pages in the range, look for any conflicts.
+ * - In case of no conflicts, set the new memtype for pages in the range.
*/
static int reserve_ram_pages_type(u64 start, u64 end,
enum page_cache_mode req_type,
@@ -335,6 +422,12 @@ static int reserve_ram_pages_type(u64 start, u64 end,
struct page *page;
u64 pfn;
+ if (req_type == _PAGE_CACHE_MODE_WP) {
+ if (new_type)
+ *new_type = _PAGE_CACHE_MODE_UC_MINUS;
+ return -EINVAL;
+ }
+
if (req_type == _PAGE_CACHE_MODE_UC) {
/* We do not support strong UC */
WARN_ON_ONCE(1);
@@ -346,8 +439,8 @@ static int reserve_ram_pages_type(u64 start, u64 end,
page = pfn_to_page(pfn);
type = get_page_memtype(page);
- if (type != -1) {
- pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
+ if (type != _PAGE_CACHE_MODE_WB) {
+ pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
start, end - 1, type, req_type);
if (new_type)
*new_type = type;
@@ -373,7 +466,7 @@ static int free_ram_pages_type(u64 start, u64 end)
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
page = pfn_to_page(pfn);
- set_page_memtype(page, -1);
+ set_page_memtype(page, _PAGE_CACHE_MODE_WB);
}
return 0;
}
@@ -384,6 +477,7 @@ static int free_ram_pages_type(u64 start, u64 end)
* - _PAGE_CACHE_MODE_WC
* - _PAGE_CACHE_MODE_UC_MINUS
* - _PAGE_CACHE_MODE_UC
+ * - _PAGE_CACHE_MODE_WT
*
* If new_type is NULL, function will return an error if it cannot reserve the
* region with req_type. If new_type is non-NULL, function will return
@@ -400,14 +494,10 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
BUG_ON(start >= end); /* end is exclusive */
- if (!pat_enabled) {
+ if (!pat_enabled()) {
/* This is identical to page table setting without PAT */
- if (new_type) {
- if (req_type == _PAGE_CACHE_MODE_WC)
- *new_type = _PAGE_CACHE_MODE_UC_MINUS;
- else
- *new_type = req_type;
- }
+ if (new_type)
+ *new_type = req_type;
return 0;
}
@@ -451,9 +541,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
err = rbt_memtype_check_insert(new, new_type);
if (err) {
- printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
- start, end - 1,
- cattr_name(new->type), cattr_name(req_type));
+ pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ start, end - 1,
+ cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
@@ -475,7 +565,7 @@ int free_memtype(u64 start, u64 end)
int is_range_ram;
struct memtype *entry;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Low ISA region is always mapped WB. No need to track */
@@ -497,8 +587,8 @@ int free_memtype(u64 start, u64 end)
spin_unlock(&memtype_lock);
if (!entry) {
- printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
- current->comm, current->pid, start, end - 1);
+ pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+ current->comm, current->pid, start, end - 1);
return -EINVAL;
}
@@ -517,7 +607,7 @@ int free_memtype(u64 start, u64 end)
* Only to be called when PAT is enabled
*
* Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
- * or _PAGE_CACHE_MODE_UC
+ * or _PAGE_CACHE_MODE_WT.
*/
static enum page_cache_mode lookup_memtype(u64 paddr)
{
@@ -529,16 +619,9 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
struct page *page;
- page = pfn_to_page(paddr >> PAGE_SHIFT);
- rettype = get_page_memtype(page);
- /*
- * -1 from get_page_memtype() implies RAM page is in its
- * default state and not reserved, and hence of type WB
- */
- if (rettype == -1)
- rettype = _PAGE_CACHE_MODE_WB;
- return rettype;
+ page = pfn_to_page(paddr >> PAGE_SHIFT);
+ return get_page_memtype(page);
}
spin_lock(&memtype_lock);
@@ -623,13 +706,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
u64 to = from + size;
u64 cursor = from;
- if (!pat_enabled)
+ if (!pat_enabled())
return 1;
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
- current->comm, from, to - 1);
+ pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
@@ -659,7 +742,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
* caching for the high addresses through the KEN pin, but
* we maintain the tradition of paranoia in this code.
*/
- if (!pat_enabled &&
+ if (!pat_enabled() &&
!(boot_cpu_has(X86_FEATURE_MTRR) ||
boot_cpu_has(X86_FEATURE_K6_MTRR) ||
boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
@@ -698,8 +781,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
- printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
- "for [mem %#010Lx-%#010Lx]\n",
+ pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(pcm),
base, (unsigned long long)(base + size-1));
@@ -729,12 +811,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
* the type requested matches the type of first page in the range.
*/
if (is_ram) {
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
pcm = lookup_memtype(paddr);
if (want_pcm != pcm) {
- printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
+ pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
@@ -755,13 +837,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
- " for [mem %#010Lx-%#010Lx], got %s\n",
- current->comm, current->pid,
- cattr_name(want_pcm),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size - 1),
- cattr_name(pcm));
+ pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_pcm),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size - 1),
+ cattr_name(pcm));
return -EINVAL;
}
/*
@@ -844,7 +925,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return ret;
}
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/*
@@ -872,7 +953,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
{
enum page_cache_mode pcm;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Set prot based on lookup */
@@ -913,14 +994,18 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
pgprot_t pgprot_writecombine(pgprot_t prot)
{
- if (pat_enabled)
- return __pgprot(pgprot_val(prot) |
+ return __pgprot(pgprot_val(prot) |
cachemode2protval(_PAGE_CACHE_MODE_WC));
- else
- return pgprot_noncached(prot);
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_WT));
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
static struct memtype *memtype_get_idx(loff_t pos)
@@ -996,7 +1081,7 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void)
{
- if (pat_enabled) {
+ if (pat_enabled()) {
debugfs_create_file("pat_memtype_list", S_IRUSR,
arch_debugfs_dir, NULL, &memtype_fops);
}
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index f641162..a739bfc 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -4,7 +4,7 @@
extern int pat_debug_enable;
#define dprintk(fmt, arg...) \
- do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
+ do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0)
struct memtype {
u64 start;
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 6582adc..6393108 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -160,9 +160,9 @@ success:
return 0;
failure:
- printk(KERN_INFO "%s:%d conflicting memory types "
- "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
- end, cattr_name(found_type), cattr_name(match->type));
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(match->type));
return -EBUSY;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 0b97d2c..fb0a9dd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -563,16 +563,31 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
}
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+/**
+ * pud_set_huge - setup kernel PUD mapping
+ *
+ * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
+ * function sets up a huge page only if any of the following conditions are met:
+ *
+ * - MTRRs are disabled, or
+ *
+ * - MTRRs are enabled and the range is completely covered by a single MTRR, or
+ *
+ * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
+ * has no effect on the requested PAT memory type.
+ *
+ * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
+ * page mapping attempt fails.
+ *
+ * Returns 1 on success and 0 on failure.
+ */
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr;
+ u8 mtrr, uniform;
- /*
- * Do not use a huge page when the range is covered by non-WB type
- * of MTRRs.
- */
- mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE);
- if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
+ mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
+ if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
+ (mtrr != MTRR_TYPE_WRBACK))
return 0;
prot = pgprot_4k_2_large(prot);
@@ -584,17 +599,24 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
return 1;
}
+/**
+ * pmd_set_huge - setup kernel PMD mapping
+ *
+ * See text over pud_set_huge() above.
+ *
+ * Returns 1 on success and 0 on failure.
+ */
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr;
+ u8 mtrr, uniform;
- /*
- * Do not use a huge page when the range is covered by non-WB type
- * of MTRRs.
- */
- mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE);
- if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
+ mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
+ if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
+ (mtrr != MTRR_TYPE_WRBACK)) {
+ pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
+ __func__, addr, addr + PMD_SIZE);
return 0;
+ }
prot = pgprot_4k_2_large(prot);
@@ -605,6 +627,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
return 1;
}
+/**
+ * pud_clear_huge - clear kernel PUD mapping when it is set
+ *
+ * Returns 1 on success and 0 on failure (no PUD map is found).
+ */
int pud_clear_huge(pud_t *pud)
{
if (pud_large(*pud)) {
@@ -615,6 +642,11 @@ int pud_clear_huge(pud_t *pud)
return 0;
}
+/**
+ * pmd_clear_huge - clear kernel PMD mapping when it is set
+ *
+ * Returns 1 on success and 0 on failure (no PMD map is found).
+ */
int pmd_clear_huge(pmd_t *pmd)
{
if (pmd_large(*pmd)) {
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6440221..4093216 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -8,7 +8,6 @@
* of the License.
*/
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
/*
* Calling convention :
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9875143..ddeff48 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (is_ereg(dst_reg))
EMIT1(0x41);
EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+
+ /* emit 'movzwl eax, ax' */
+ if (is_ereg(dst_reg))
+ EMIT3(0x45, 0x0F, 0xB7);
+ else
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
/* emit 'bswap eax' to swap lower 4 bytes */
@@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm32) {
+ case 16:
+ /* emit 'movzwl eax, ax' to zero extend 16-bit
+ * into 64 bit
+ */
+ if (is_ereg(dst_reg))
+ EMIT3(0x45, 0x0F, 0xB7);
+ else
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
+ break;
+ case 32:
+ /* emit 'mov eax, eax' to clear upper 32-bits */
+ if (is_ereg(dst_reg))
+ EMIT1(0x45);
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
+ break;
+ case 64:
+ /* nop */
+ break;
+ }
break;
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -938,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.cleanup_addr = proglen;
- for (pass = 0; pass < 10; pass++) {
+ /* JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large bpf programs
+ * may converge on the last pass. In such case do one more
+ * pass to emit the final image
+ */
+ for (pass = 0; pass < 10 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
image = NULL;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e469598..14a63ed 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
kfree(info);
}
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ * IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ * to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+ return (res->flags & IORESOURCE_IO) &&
+ res->start == 0xCF8 && res->end == 0xCFF;
+}
+
static void probe_pci_root_info(struct pci_root_info *info,
struct acpi_device *device,
int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
"no IO and memory resources present in _CRS\n");
else
resource_list_for_each_entry_safe(entry, tmp, list) {
- if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
- (entry->res->flags & IORESOURCE_DISABLED))
+ if ((entry->res->flags & IORESOURCE_DISABLED) ||
+ resource_is_pcicfg_ioport(entry->res))
resource_list_destroy_entry(entry);
else
entry->res->name = info->name;
@@ -462,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- struct pci_sysdata *sd = bridge->bus->sysdata;
-
- ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+ /*
+ * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+ * here, pci_create_root_bus() has been called by someone else and
+ * sysdata is likely to be different from what we expect. Let it go in
+ * that case.
+ */
+ if (!bridge->dev.parent) {
+ struct pci_sysdata *sd = bridge->bus->sysdata;
+ ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+ }
return 0;
}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 349c0d3..0a9f2ca 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
* Caller can followup with UC MINUS request and add a WC mtrr if there
* is a free mtrr slot.
*/
- if (!pat_enabled && write_combine)
+ if (!pat_enabled() && write_combine)
return -EINVAL;
- if (pat_enabled && write_combine)
+ if (pat_enabled() && write_combine)
prot |= cachemode2protval(_PAGE_CACHE_MODE_WC);
- else if (pat_enabled || boot_cpu_data.x86 > 3)
+ else if (pat_enabled() || boot_cpu_data.x86 > 3)
/*
* ioremap() and ioremap_nocache() defaults to UC MINUS for now.
* To avoid attribute conflicts, request UC MINUS here
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 852aa4c..2706230 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -208,6 +208,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
static int intel_mid_pci_irq_enable(struct pci_dev *dev)
{
+ struct irq_alloc_info info;
int polarity;
if (dev->irq_managed && dev->irq > 0)
@@ -217,14 +218,13 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
polarity = 0; /* active high */
else
polarity = 1; /* active low */
+ ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity);
/*
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
* IOAPIC RTE entries, so we just enable RTE for the device.
*/
- if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev)))
- return -EBUSY;
- if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
+ if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info) < 0)
return -EBUSY;
dev->irq_managed = 1;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 5dc6ca5..9bd1154 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -146,19 +146,20 @@ static void __init pirq_peer_trick(void)
/*
* Code for querying and setting of IRQ routes on various interrupt routers.
+ * PIC Edge/Level Control Registers (ELCR) 0x4d0 & 0x4d1.
*/
-void eisa_set_level_irq(unsigned int irq)
+void elcr_set_level_irq(unsigned int irq)
{
unsigned char mask = 1 << (irq & 7);
unsigned int port = 0x4d0 + (irq >> 3);
unsigned char val;
- static u16 eisa_irq_mask;
+ static u16 elcr_irq_mask;
- if (irq >= 16 || (1 << irq) & eisa_irq_mask)
+ if (irq >= 16 || (1 << irq) & elcr_irq_mask)
return;
- eisa_irq_mask |= (1 << irq);
+ elcr_irq_mask |= (1 << irq);
printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq);
val = inb(port);
if (!(val & mask)) {
@@ -965,11 +966,11 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
} else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
msg = "found";
- eisa_set_level_irq(irq);
+ elcr_set_level_irq(irq);
} else if (newirq && r->set &&
(dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
- eisa_set_level_irq(newirq);
+ elcr_set_level_irq(newirq);
msg = "assigned";
irq = newirq;
}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
index 0b283d4..de73413 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
@@ -27,6 +27,7 @@ static struct platform_device wdt_dev = {
static int tangier_probe(struct platform_device *pdev)
{
int gsi;
+ struct irq_alloc_info info;
struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
if (!pdata)
@@ -34,8 +35,8 @@ static int tangier_probe(struct platform_device *pdev)
/* IOAPIC builds identity mapping between GSI and IRQ on MID */
gsi = pdata->irq;
- if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) ||
- mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) {
+ ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
+ if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) {
dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
gsi);
return -EINVAL;
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 3005f0c..01d54ea 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -81,26 +81,34 @@ static unsigned long __init intel_mid_calibrate_tsc(void)
return 0;
}
+static void __init intel_mid_setup_bp_timer(void)
+{
+ apbt_time_init();
+ setup_boot_APIC_clock();
+}
+
static void __init intel_mid_time_init(void)
{
sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
+
switch (intel_mid_timer_options) {
case INTEL_MID_TIMER_APBT_ONLY:
break;
case INTEL_MID_TIMER_LAPIC_APBT:
- x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ /* Use apbt and local apic */
+ x86_init.timers.setup_percpu_clockev = intel_mid_setup_bp_timer;
x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
- break;
+ return;
default:
if (!boot_cpu_has(X86_FEATURE_ARAT))
break;
+ /* Lapic only, no apbt */
x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
return;
}
- /* we need at least one APB timer */
- pre_init_apic_IRQ0();
- apbt_time_init();
+
+ x86_init.timers.setup_percpu_clockev = apbt_time_init;
}
static void intel_mid_arch_setup(void)
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index c14ad34..ce992e8 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -95,18 +95,16 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table)
pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n",
totallen, (u32)pentry->phys_addr,
pentry->freq_hz, pentry->irq);
- if (!pentry->irq)
- continue;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
-/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
- mp_irq.irqflag = 5;
- mp_irq.srcbus = MP_BUS_ISA;
- mp_irq.srcbusirq = pentry->irq; /* IRQ */
- mp_irq.dstapic = MP_APIC_ALL;
- mp_irq.dstirq = pentry->irq;
- mp_save_irq(&mp_irq);
- mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ /* triggering mode edge bit 2-3, active high polarity bit 0-1 */
+ mp_irq.irqflag = 5;
+ mp_irq.srcbus = MP_BUS_ISA;
+ mp_irq.srcbusirq = pentry->irq; /* IRQ */
+ mp_irq.dstapic = MP_APIC_ALL;
+ mp_irq.dstirq = pentry->irq;
+ mp_save_irq(&mp_irq);
+ mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
}
return 0;
@@ -177,7 +175,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
mp_save_irq(&mp_irq);
- mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
+ mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
}
return 0;
}
@@ -436,6 +434,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
struct devs_id *dev = NULL;
int num, i, ret;
int polarity;
+ struct irq_alloc_info info;
sb = (struct sfi_table_simple *)table;
num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
@@ -469,9 +468,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
polarity = 1;
}
- ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE);
- if (ret == 0)
- ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC);
+ ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity);
+ ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, &info);
WARN_ON(ret < 0);
}
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 2a8a74f..6c7111b 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -25,8 +25,8 @@
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/io.h>
-#include <linux/irqdomain.h>
+#include <asm/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -71,9 +71,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
-static struct irq_domain_ops sfi_ioapic_irqdomain_ops = {
- .map = mp_irqdomain_map,
-};
static int __init sfi_parse_ioapic(struct sfi_table_header *table)
{
@@ -82,7 +79,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
int i, num;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_STRICT,
- .ops = &sfi_ioapic_irqdomain_ops,
+ .ops = &mp_ioapic_irqdomain_ops,
};
sb = (struct sfi_table_simple *)table;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 0ce6736..8570abe 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -13,22 +13,37 @@
#include <linux/slab.h>
#include <linux/irq.h>
+#include <asm/irqdomain.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
#include <asm/uv/uv_hub.h>
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
-struct uv_irq_2_mmr_pnode{
- struct rb_node list;
+struct uv_irq_2_mmr_pnode {
unsigned long offset;
int pnode;
- int irq;
};
-static DEFINE_SPINLOCK(uv_irq_lock);
-static struct rb_root uv_irq_root;
+static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = cfg->dest_apicid;
-static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
+ uv_write_global_mmr64(info->pnode, info->offset, mmr_value);
+}
static void uv_noop(struct irq_data *data) { }
@@ -37,6 +52,23 @@ static void uv_ack_apic(struct irq_data *data)
ack_APIC_irq();
}
+static int
+uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ struct irq_data *parent = data->parent_data;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ int ret;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret >= 0) {
+ uv_program_mmr(cfg, data->chip_data);
+ send_cleanup_vector(cfg);
+ }
+
+ return ret;
+}
+
static struct irq_chip uv_irq_chip = {
.name = "UV-CORE",
.irq_mask = uv_noop,
@@ -45,189 +77,99 @@ static struct irq_chip uv_irq_chip = {
.irq_set_affinity = uv_set_irq_affinity,
};
-/*
- * Add offset and pnode information of the hub sourcing interrupts to the
- * rb tree for a specific irq.
- */
-static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
- struct rb_node **link = &uv_irq_root.rb_node;
- struct rb_node *parent = NULL;
- struct uv_irq_2_mmr_pnode *n;
- struct uv_irq_2_mmr_pnode *e;
- unsigned long irqflags;
-
- n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
- uv_blade_to_memory_nid(blade));
- if (!n)
+ struct uv_irq_2_mmr_pnode *chip_data;
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+ int ret;
+
+ if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV)
+ return -EINVAL;
+
+ chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL,
+ irq_data->node);
+ if (!chip_data)
return -ENOMEM;
- n->irq = irq;
- n->offset = offset;
- n->pnode = uv_blade_to_pnode(blade);
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- /* Find the right place in the rbtree: */
- while (*link) {
- parent = *link;
- e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
-
- if (unlikely(irq == e->irq)) {
- /* irq entry exists */
- e->pnode = uv_blade_to_pnode(blade);
- e->offset = offset;
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- kfree(n);
- return 0;
- }
-
- if (irq < e->irq)
- link = &(*link)->rb_left;
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret >= 0) {
+ if (info->uv_limit == UV_AFFINITY_CPU)
+ irq_set_status_flags(virq, IRQ_NO_BALANCING);
else
- link = &(*link)->rb_right;
+ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+ chip_data->pnode = uv_blade_to_pnode(info->uv_blade);
+ chip_data->offset = info->uv_offset;
+ irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data,
+ handle_percpu_irq, NULL, info->uv_name);
+ } else {
+ kfree(chip_data);
}
- /* Insert the node into the rbtree. */
- rb_link_node(&n->list, parent, link);
- rb_insert_color(&n->list, &uv_irq_root);
-
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return 0;
+ return ret;
}
-/* Retrieve offset and pnode information from the rb tree for a specific irq */
-int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- struct uv_irq_2_mmr_pnode *e;
- struct rb_node *n;
- unsigned long irqflags;
-
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- n = uv_irq_root.rb_node;
- while (n) {
- e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
-
- if (e->irq == irq) {
- *offset = e->offset;
- *pnode = e->pnode;
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return 0;
- }
-
- if (irq < e->irq)
- n = n->rb_left;
- else
- n = n->rb_right;
- }
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return -1;
+ struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+
+ BUG_ON(nr_irqs != 1);
+ kfree(irq_data->chip_data);
+ irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
+ irq_clear_status_flags(virq, IRQ_NO_BALANCING);
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
-static int
-arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset, int limit)
+static void uv_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg = irq_cfg(irq);
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode, err;
- unsigned int dest;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
- sizeof(unsigned long));
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
- if (err != 0)
- return err;
-
- if (limit == UV_AFFINITY_CPU)
- irq_set_status_flags(irq, IRQ_NO_BALANCING);
- else
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-
- irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
+ uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
-static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+static void uv_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
- sizeof(unsigned long));
-
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
}
-static int
-uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- struct irq_cfg *cfg = irqd_cfg(data);
- unsigned int dest;
- unsigned long mmr_value, mmr_offset;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;
-
- if (apic_set_affinity(data, mask, &dest))
- return -1;
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- /* Get previously stored MMR and pnode of hub sourcing interrupts */
- if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
- return -1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+static const struct irq_domain_ops uv_domain_ops = {
+ .alloc = uv_domain_alloc,
+ .free = uv_domain_free,
+ .activate = uv_domain_activate,
+ .deactivate = uv_domain_deactivate,
+};
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+static struct irq_domain *uv_get_irq_domain(void)
+{
+ static struct irq_domain *uv_domain;
+ static DEFINE_MUTEX(uv_lock);
+
+ mutex_lock(&uv_lock);
+ if (uv_domain == NULL) {
+ uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL);
+ if (uv_domain)
+ uv_domain->parent = x86_vector_domain;
+ }
+ mutex_unlock(&uv_lock);
- return IRQ_SET_MASK_OK_NOCOPY;
+ return uv_domain;
}
/*
@@ -238,19 +180,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
- int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade));
+ struct irq_alloc_info info;
+ struct irq_domain *domain = uv_get_irq_domain();
- if (!irq)
- return -EBUSY;
+ if (!domain)
+ return -ENOMEM;
- ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
- limit);
- if (ret == irq)
- uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
- else
- irq_free_hwirq(irq);
+ init_irq_alloc_info(&info, cpumask_of(cpu));
+ info.type = X86_IRQ_ALLOC_TYPE_UV;
+ info.uv_limit = limit;
+ info.uv_blade = mmr_blade;
+ info.uv_offset = mmr_offset;
+ info.uv_name = irq_name;
- return ret;
+ return irq_domain_alloc_irqs(domain, 1,
+ uv_blade_to_memory_nid(mmr_blade), &info);
}
EXPORT_SYMBOL_GPL(uv_setup_irq);
@@ -263,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*/
void uv_teardown_irq(unsigned int irq)
{
- struct uv_irq_2_mmr_pnode *e;
- struct rb_node *n;
- unsigned long irqflags;
-
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- n = uv_irq_root.rb_node;
- while (n) {
- e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
- if (e->irq == irq) {
- arch_disable_uv_irq(e->pnode, e->offset);
- rb_erase(n, &uv_irq_root);
- kfree(e);
- break;
- }
- if (irq < e->irq)
- n = n->rb_left;
- else
- n = n->rb_right;
- }
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- irq_free_hwirq(irq);
+ irq_domain_free_irqs(irq, 1);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 3c4469a..e2386cb 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -78,9 +78,9 @@ ENTRY(restore_image)
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
-loop:
+.Lloop:
testq %rdx, %rdx
- jz done
+ jz .Ldone
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
@@ -91,8 +91,8 @@ loop:
/* progress to the next pbe */
movq pbe_next(%rdx), %rdx
- jmp loop
-done:
+ jmp .Lloop
+.Ldone:
/* jump to the restore_registers address from the image header */
jmpq *%rax
/*
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index acb384d..a8fecc2 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -26,7 +26,7 @@ else
obj-y += syscalls_64.o vdso/
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
../lib/rwsem.o
endif
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a6..b9531d3 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578ef..a8f57a9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1181,10 +1181,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_tscp = native_read_tscp,
.iret = xen_iret,
- .irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
+#else
+ .irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
@@ -1467,6 +1468,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
+ u64 pat;
int rc;
if (!xen_start_info)
@@ -1574,8 +1576,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
* Modify the cache mode translation tables to match Xen's PAT
* configuration.
*/
-
- pat_init_cache_modes();
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ pat_init_cache_modes(pat);
/* keep using Xen gdt for now; no urgent need to change it */
@@ -1760,6 +1762,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
static void __init xen_hvm_guest_init(void)
{
+ if (xen_pv_domain())
+ return;
+
init_hvm_pv_info();
xen_hvm_init_shared_info();
@@ -1775,6 +1780,7 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
}
+#endif
static bool xen_nopv = false;
static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1790,11 @@ static __init int xen_parse_nopv(char *arg)
}
early_param("xen_nopv", xen_parse_nopv);
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
{
if (xen_nopv)
return 0;
- if (xen_pv_domain())
- return 0;
-
return xen_cpuid_base();
}
@@ -1809,11 +1812,19 @@ bool xen_hvm_need_lapic(void)
}
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
- .name = "Xen HVM",
- .detect = xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+ if (xen_pv_domain())
+ clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+ .name = "Xen",
+ .detect = xen_platform,
+#ifdef CONFIG_XEN_PVHVM
.init_platform = xen_hvm_guest_init,
+#endif
.x2apic_available = xen_x2apic_para_available,
+ .set_cpu_features = xen_set_cpu_features,
};
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b47124d..8b7f18e 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -67,6 +67,7 @@
#include <linux/seq_file.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <asm/cache.h>
#include <asm/setup.h>
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c..9e2ba5c 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
#include "xen-ops.h"
#include "debugfs.h"
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void xen_qlock_kick(int cpu)
+{
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+ int irq = __this_cpu_read(lock_kicker_irq);
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
+ return;
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+ barrier();
+
+ /*
+ * We check the byte value after clearing pending IRQ to make sure
+ * that we won't miss a wakeup event because of the clearing.
+ *
+ * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+ * So it is effectively a memory barrier for x86.
+ */
+ if (READ_ONCE(*byte) != val)
+ return;
+
+ /*
+ * If an interrupt happens here, it will leave the wakeup irq
+ * pending, which will cause xen_poll_irq() to return
+ * immediately.
+ */
+
+ /* Block until irq becomes pending (or perhaps a spurious wakeup) */
+ xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCKS */
+
enum xen_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
__ticket_t want;
};
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
static cpumask_t waiting_cpus;
-static bool xen_pvspin = true;
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
{
int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
}
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = xen_qlock_wait;
+ pv_lock_ops.kick = xen_qlock_kick;
+#else
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
}
/*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
}
early_param("xen_nopvspin", xen_parse_nopvspin);
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
static struct dentry *d_spin_debug;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d949769..53b4c08 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
tick_resume_local();
}
+static void xen_vcpu_notify_suspend(void *data)
+{
+ tick_suspend_local();
+}
+
void xen_arch_resume(void)
{
on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
}
+
+void xen_arch_suspend(void)
+{
+ on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 985fc3e..f22667a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,8 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
#include <xen/interface/xen.h>
@@ -47,29 +49,13 @@ ENTRY(xen_iret)
ENDPATCH(xen_iret)
RELOC(xen_iret, 1b+1)
-/*
- * sysexit is not used for 64-bit processes, so it's only ever used to
- * return to 32-bit compat userspace.
- */
-ENTRY(xen_sysexit)
- pushq $__USER32_DS
- pushq %rcx
- pushq $X86_EFLAGS_IF
- pushq $__USER32_CS
- pushq %rdx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysexit)
-RELOC(xen_sysexit, 1b+1)
-
ENTRY(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(rsp_scratch)
@@ -88,7 +74,7 @@ ENTRY(xen_sysret32)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)
@@ -128,7 +114,7 @@ RELOC(xen_sysret32, 1b+1)
/* Normal 64-bit system call target */
ENTRY(xen_syscall_target)
undo_xen_syscall
- jmp system_call_after_swapgs
+ jmp entry_SYSCALL_64_after_swapgs
ENDPROC(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
@@ -136,13 +122,13 @@ ENDPROC(xen_syscall_target)
/* 32-bit compat syscall target */
ENTRY(xen_syscall32_target)
undo_xen_syscall
- jmp ia32_cstar_target
+ jmp entry_SYSCALL_compat
ENDPROC(xen_syscall32_target)
/* 32-bit compat sysenter target */
ENTRY(xen_sysenter_target)
undo_xen_syscall
- jmp ia32_sysenter_target
+ jmp entry_SYSENTER_compat
ENDPROC(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e195c6..c20fe29 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
+#ifdef CONFIG_X86_32
__visible void xen_sysexit(void);
+#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 172a02a..ba78ccf 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
return -EINVAL;
}
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+}
+
#endif /* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index fe1600a..c39bb6e 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -59,6 +59,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
}
#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
OpenPOWER on IntegriCloud