summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig32
-rw-r--r--arch/alpha/kernel/process.c6
-rw-r--r--arch/alpha/kernel/smp.c1
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Kconfig.debug6
-rw-r--r--arch/arm/Makefile4
-rw-r--r--arch/arm/boot/compressed/head.S5
-rw-r--r--arch/arm/boot/dts/at91sam9260.dtsi3
-rw-r--r--arch/arm/boot/dts/at91sam9263.dtsi5
-rw-r--r--arch/arm/boot/dts/at91sam9g25ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9g45.dtsi5
-rw-r--r--arch/arm/boot/dts/at91sam9n12.dtsi4
-rw-r--r--arch/arm/boot/dts/at91sam9x5.dtsi4
-rw-r--r--arch/arm/configs/armadillo800eva_defconfig2
-rw-r--r--arch/arm/include/asm/assembler.h8
-rw-r--r--arch/arm/include/asm/dma-mapping.h7
-rw-r--r--arch/arm/include/asm/memory.h3
-rw-r--r--arch/arm/include/asm/tlb.h4
-rw-r--r--arch/arm/include/asm/uaccess.h58
-rw-r--r--arch/arm/include/asm/unistd.h2
-rw-r--r--arch/arm/kernel/calls.S1
-rw-r--r--arch/arm/kernel/hw_breakpoint.c62
-rw-r--r--arch/arm/kernel/smp_twd.c48
-rw-r--r--arch/arm/kernel/traps.c11
-rw-r--r--arch/arm/lib/delay.c1
-rw-r--r--arch/arm/lib/getuser.S23
-rw-r--r--arch/arm/lib/putuser.S6
-rw-r--r--arch/arm/mach-at91/at91rm9200_time.c2
-rw-r--r--arch/arm/mach-at91/at91sam9260_devices.c6
-rw-r--r--arch/arm/mach-at91/at91sam9261_devices.c6
-rw-r--r--arch/arm/mach-at91/at91sam9263_devices.c10
-rw-r--r--arch/arm/mach-at91/at91sam9g45_devices.c6
-rw-r--r--arch/arm/mach-at91/at91sam9rl_devices.c6
-rw-r--r--arch/arm/mach-at91/clock.c12
-rw-r--r--arch/arm/mach-gemini/irq.c1
-rw-r--r--arch/arm/mach-imx/clk-imx25.c8
-rw-r--r--arch/arm/mach-imx/clk-imx35.c6
-rw-r--r--arch/arm/mach-imx/mach-armadillo5x0.c3
-rw-r--r--arch/arm/mach-kirkwood/common.c7
-rw-r--r--arch/arm/mach-kirkwood/db88f6281-bp-setup.c1
-rw-r--r--arch/arm/mach-mxs/mach-mxs.c2
-rw-r--r--arch/arm/mach-omap2/Kconfig3
-rw-r--r--arch/arm/mach-omap2/Makefile2
-rw-r--r--arch/arm/mach-omap2/clock33xx_data.c14
-rw-r--r--arch/arm/mach-omap2/clockdomain2xxx_3xxx.c50
-rw-r--r--arch/arm/mach-omap2/cm-regbits-34xx.h1
-rw-r--r--arch/arm/mach-omap2/omap-wakeupgen.c2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c1
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c15
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_44xx_data.c12
-rw-r--r--arch/arm/mach-omap2/timer.c7
-rw-r--r--arch/arm/mach-orion5x/common.c7
-rw-r--r--arch/arm/mach-shmobile/board-armadillo800eva.c13
-rw-r--r--arch/arm/mach-shmobile/board-kzm9g.c4
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c3
-rw-r--r--arch/arm/mach-shmobile/board-marzen.c2
-rw-r--r--arch/arm/mach-shmobile/intc-sh73a0.c4
-rw-r--r--arch/arm/mach-tegra/board-harmony-power.c12
-rw-r--r--arch/arm/mm/context.c7
-rw-r--r--arch/arm/mm/dma-mapping.c116
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/mm/mmu.c8
-rw-r--r--arch/arm/plat-mxc/include/mach/mx25.h1
-rw-r--r--arch/arm/plat-omap/sram.c11
-rw-r--r--arch/arm/plat-samsung/clock.c10
-rw-r--r--arch/blackfin/Kconfig1
-rw-r--r--arch/blackfin/Makefile1
-rw-r--r--arch/blackfin/include/asm/smp.h2
-rw-r--r--arch/blackfin/mach-common/smp.c223
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/barrier.h27
-rw-r--r--arch/cris/kernel/process.c3
-rw-r--r--arch/frv/kernel/process.c3
-rw-r--r--arch/h8300/kernel/process.c3
-rw-r--r--arch/ia64/Kconfig12
-rw-r--r--arch/ia64/include/asm/switch_to.h8
-rw-r--r--arch/ia64/kernel/process.c3
-rw-r--r--arch/ia64/kernel/time.c66
-rw-r--r--arch/m32r/kernel/process.c3
-rw-r--r--arch/m68k/kernel/process.c3
-rw-r--r--arch/m68k/platform/coldfire/clk.c6
-rw-r--r--arch/mips/kernel/smp-cmp.c2
-rw-r--r--arch/mips/mm/gup.c2
-rw-r--r--arch/mips/mti-malta/malta-int.c9
-rw-r--r--arch/mips/mti-malta/malta-platform.c5
-rw-r--r--arch/mn10300/kernel/process.c3
-rw-r--r--arch/parisc/kernel/process.c3
-rw-r--r--arch/powerpc/boot/.gitignore4
-rw-r--r--arch/powerpc/include/asm/processor.h1
-rw-r--r--arch/powerpc/include/asm/time.h6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/entry_64.S23
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S3
-rw-r--r--arch/powerpc/kernel/idle_power7.S2
-rw-r--r--arch/powerpc/kernel/process.c15
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/kernel/sysfs.c10
-rw-r--r--arch/powerpc/kernel/time.c64
-rw-r--r--arch/powerpc/kernel/traps.c3
-rw-r--r--arch/powerpc/lib/code-patching.c2
-rw-r--r--arch/powerpc/mm/numa.c7
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype16
-rw-r--r--arch/powerpc/platforms/powernv/smp.c10
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c6
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Kconfig421
-rw-r--r--arch/s390/boot/compressed/Makefile1
-rw-r--r--arch/s390/boot/compressed/misc.c45
-rw-r--r--arch/s390/defconfig14
-rw-r--r--arch/s390/include/asm/appldata.h2
-rw-r--r--arch/s390/include/asm/chsc.h28
-rw-r--r--arch/s390/include/asm/cio.h28
-rw-r--r--arch/s390/include/asm/cmpxchg.h61
-rw-r--r--arch/s390/include/asm/cpu_mf.h4
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/css_chars.h39
-rw-r--r--arch/s390/include/asm/eadm.h124
-rw-r--r--arch/s390/include/asm/elf.h3
-rw-r--r--arch/s390/include/asm/etr.h8
-rw-r--r--arch/s390/include/asm/hugetlb.h24
-rw-r--r--arch/s390/include/asm/irq.h2
-rw-r--r--arch/s390/include/asm/isc.h1
-rw-r--r--arch/s390/include/asm/lowcore.h6
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/percpu.h50
-rw-r--r--arch/s390/include/asm/processor.h59
-rw-r--r--arch/s390/include/asm/ptrace.h12
-rw-r--r--arch/s390/include/asm/runtime_instr.h98
-rw-r--r--arch/s390/include/asm/scsw.h38
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/smp.h4
-rw-r--r--arch/s390/include/asm/string.h8
-rw-r--r--arch/s390/include/asm/switch_to.h6
-rw-r--r--arch/s390/include/asm/sysinfo.h39
-rw-r--r--arch/s390/include/asm/tlbflush.h2
-rw-r--r--arch/s390/include/asm/topology.h20
-rw-r--r--arch/s390/include/asm/uaccess.h15
-rw-r--r--arch/s390/include/asm/unistd.h4
-rw-r--r--arch/s390/kernel/Makefile12
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/cache.c385
-rw-r--r--arch/s390/kernel/compat_wrapper.S13
-rw-r--r--arch/s390/kernel/crash.c14
-rw-r--r--arch/s390/kernel/crash_dump.c3
-rw-r--r--arch/s390/kernel/dis.c58
-rw-r--r--arch/s390/kernel/early.c38
-rw-r--r--arch/s390/kernel/entry64.S17
-rw-r--r--arch/s390/kernel/irq.c56
-rw-r--r--arch/s390/kernel/kprobes.c2
-rw-r--r--arch/s390/kernel/lgr.c29
-rw-r--r--arch/s390/kernel/machine_kexec.c9
-rw-r--r--arch/s390/kernel/process.c8
-rw-r--r--arch/s390/kernel/processor.c7
-rw-r--r--arch/s390/kernel/ptrace.c70
-rw-r--r--arch/s390/kernel/runtime_instr.c150
-rw-r--r--arch/s390/kernel/s390_ksyms.c2
-rw-r--r--arch/s390/kernel/setup.c56
-rw-r--r--arch/s390/kernel/smp.c46
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/sysinfo.c351
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kernel/topology.c27
-rw-r--r--arch/s390/kernel/traps.c41
-rw-r--r--arch/s390/kernel/vdso.c8
-rw-r--r--arch/s390/kernel/vtime.c11
-rw-r--r--arch/s390/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/mem32.S92
-rw-r--r--arch/s390/lib/mem64.S88
-rw-r--r--arch/s390/lib/string.c56
-rw-r--r--arch/s390/lib/uaccess_pt.c142
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/extable.c81
-rw-r--r--arch/s390/mm/fault.c7
-rw-r--r--arch/s390/mm/gup.c37
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/pgtable.c6
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/net/Makefile4
-rw-r--r--arch/s390/net/bpf_jit.S130
-rw-r--r--arch/s390/net/bpf_jit_comp.c776
-rw-r--r--arch/s390/oprofile/init.c10
-rw-r--r--arch/score/kernel/process.c4
-rw-r--r--arch/sh/kernel/cpu/sh5/entry.S2
-rw-r--r--arch/sh/kernel/entry-common.S2
-rw-r--r--arch/sparc/kernel/module.c13
-rw-r--r--arch/tile/include/asm/topology.h1
-rw-r--r--arch/tile/include/gxio/iorpc_trio.h24
-rw-r--r--arch/um/drivers/mconsole_kern.c1
-rw-r--r--arch/um/include/asm/processor-generic.h9
-rw-r--r--arch/um/include/shared/common-offsets.h10
-rw-r--r--arch/um/include/shared/user.h11
-rw-r--r--arch/um/kernel/exec.c25
-rw-r--r--arch/um/kernel/process.c8
-rw-r--r--arch/um/kernel/signal.c6
-rw-r--r--arch/um/kernel/syscall.c24
-rw-r--r--arch/um/os-Linux/time.c2
-rw-r--r--arch/um/scripts/Makefile.rules2
-rw-r--r--arch/x86/Kconfig95
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile3
-rw-r--r--arch/x86/boot/compressed/eboot.c34
-rw-r--r--arch/x86/boot/compressed/eboot.h4
-rw-r--r--arch/x86/boot/header.S4
-rw-r--r--arch/x86/configs/i386_defconfig23
-rw-r--r--arch/x86/configs/x86_64_defconfig23
-rw-r--r--arch/x86/ia32/ia32_signal.c20
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/calling.h48
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/fpu-internal.h390
-rw-r--r--arch/x86/include/asm/ftrace.h56
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/hpet.h2
-rw-r--r--arch/x86/include/asm/i387.h29
-rw-r--r--arch/x86/include/asm/iommu_table.h6
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm.h16
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/mce.h13
-rw-r--r--arch/x86/include/asm/microcode.h10
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_regs.h33
-rw-r--r--arch/x86/include/asm/pgtable_types.h6
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/rcu.h32
-rw-r--r--arch/x86/include/asm/signal.h4
-rw-r--r--arch/x86/include/asm/svm.h205
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/thread_info.h10
-rw-r--r--arch/x86/include/asm/uprobes.h3
-rw-r--r--arch/x86/include/asm/vdso.h3
-rw-r--r--arch/x86/include/asm/vmx.h127
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/include/asm/xor_32.h56
-rw-r--r--arch/x86/include/asm/xor_64.h61
-rw-r--r--arch/x86/include/asm/xor_avx.h54
-rw-r--r--arch/x86/include/asm/xsave.h13
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/alternative.c111
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c67
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/cpu/common.c19
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c168
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c36
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h6
-rw-r--r--arch/x86/kernel/cpu/proc.c5
-rw-r--r--arch/x86/kernel/cpuid.c5
-rw-r--r--arch/x86/kernel/devicetree.c51
-rw-r--r--arch/x86/kernel/entry_32.S74
-rw-r--r--arch/x86/kernel/entry_64.S161
-rw-r--r--arch/x86/kernel/ftrace.c73
-rw-r--r--arch/x86/kernel/i387.c292
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/kprobes.c67
-rw-r--r--arch/x86/kernel/microcode_amd.c357
-rw-r--r--arch/x86/kernel/microcode_core.c70
-rw-r--r--arch/x86/kernel/microcode_intel.c3
-rw-r--r--arch/x86/kernel/msr.c5
-rw-r--r--arch/x86/kernel/perf_regs.c105
-rw-r--r--arch/x86/kernel/probe_roms.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/signal.c215
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/kernel/step.c53
-rw-r--r--arch/x86/kernel/traps.c174
-rw-r--r--arch/x86/kernel/uprobes.c52
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kernel/xsave.c517
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/trace.h89
-rw-r--r--arch/x86/kvm/vmx.c35
-rw-r--r--arch/x86/kvm/x86.c16
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c2
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c76
-rw-r--r--arch/x86/platform/efi/efi.c66
-rw-r--r--arch/x86/um/Kconfig1
-rw-r--r--arch/x86/um/shared/sysdep/kernel-offsets.h3
-rw-r--r--arch/x86/um/shared/sysdep/syscalls.h2
-rw-r--r--arch/x86/um/signal.c6
-rw-r--r--arch/x86/um/sys_call_table_32.c2
-rw-r--r--arch/x86/um/syscalls_32.c27
-rw-r--r--arch/x86/um/syscalls_64.c23
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c20
-rw-r--r--arch/x86/xen/p2m.c29
-rw-r--r--arch/x86/xen/setup.c4
-rw-r--r--arch/x86/xen/smp.c6
-rw-r--r--arch/xtensa/kernel/process.c3
317 files changed, 6934 insertions, 3382 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa1..a62965d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -222,6 +222,19 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
+config HAVE_PERF_REGS
+ bool
+ help
+ Support selective register dumps for perf events. This includes
+ bit-mapping of each registers and a unique architecture id.
+
+config HAVE_PERF_USER_STACK_DUMP
+ bool
+ help
+ Support user stack dumps for perf event samples. This needs
+ access to the user stack pointer which is not unified across
+ architectures.
+
config HAVE_ARCH_JUMP_LABEL
bool
@@ -281,4 +294,23 @@ config SECCOMP_FILTER
See Documentation/prctl/seccomp_filter.txt for details.
+config HAVE_RCU_USER_QS
+ bool
+ help
+ Provide kernel entry/exit hooks necessary for userspace
+ RCU extended quiescent state. Syscalls need to be wrapped inside
+ rcu_user_exit()-rcu_user_enter() through the slow path using
+ TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
+ are already protected inside rcu_irq_enter/rcu_irq_exit() but
+ preemption or signal handling on irq exit still need to be protected.
+
+config HAVE_VIRT_CPU_ACCOUNTING
+ bool
+
+config HAVE_IRQ_TIME_ACCOUNTING
+ bool
+ help
+ Archs need to ensure they use a high enough resolution clock to
+ support irq time accounting and then call enable_sched_clock_irqtime().
+
source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index d6fde98..83638aa 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -28,6 +28,7 @@
#include <linux/tty.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/reg.h>
#include <asm/uaccess.h>
@@ -54,9 +55,12 @@ cpu_idle(void)
/* FIXME -- EV6 and LCA45 know how to power down
the CPU. */
+ rcu_idle_enter();
while (!need_resched())
cpu_relax();
- schedule();
+
+ rcu_idle_exit();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 35ddc02..a41ad90 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -166,6 +166,7 @@ smp_callin(void)
DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
cpuid, current, current->active_mm));
+ preempt_disable();
/* Do nothing. */
cpu_idle();
}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c5f9ae5..2f88d8d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -6,7 +6,7 @@ config ARM
select HAVE_DMA_API_DEBUG
select HAVE_IDE if PCI || ISA || PCMCIA
select HAVE_DMA_ATTRS
- select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
+ select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_MEMBLOCK
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index f15f82b..e968a52 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -356,15 +356,15 @@ choice
is nothing connected to read from the DCC.
config DEBUG_SEMIHOSTING
- bool "Kernel low-level debug output via semihosting I"
+ bool "Kernel low-level debug output via semihosting I/O"
help
Semihosting enables code running on an ARM target to use
the I/O facilities on a host debugger/emulator through a
- simple SVC calls. The host debugger or emulator must have
+ simple SVC call. The host debugger or emulator must have
semihosting enabled for the special svc call to be trapped
otherwise the kernel will crash.
- This is known to work with OpenOCD, as wellas
+ This is known to work with OpenOCD, as well as
ARM's Fast Models, or any other controlling environment
that implements semihosting.
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 30eae87..a051dfb 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -284,10 +284,10 @@ zImage Image xipImage bootpImage uImage: vmlinux
zinstall uinstall install: vmlinux
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
-%.dtb:
+%.dtb: scripts
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
-dtbs:
+dtbs: scripts
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
# We use MRPROPER_FILES and CLEAN_FILES now
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index b8c64b8..bc67cbf 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -653,16 +653,21 @@ __armv7_mmu_cache_on:
mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ bic r0, r0, #1 << 28 @ clear SCTLR.TRE
orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
orr r0, r0, #0x003c @ write buffer
#ifdef CONFIG_MMU
#ifdef CONFIG_CPU_ENDIAN_BE8
orr r0, r0, #1 << 25 @ big-endian page tables
#endif
+ mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg
orrne r0, r0, #1 @ MMU enabled
movne r1, #0xfffffffd @ domain 0 = client
+ bic r6, r6, #1 << 31 @ 32-bit translation system
+ bic r6, r6, #3 << 0 @ use only ttbr0
mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
+ mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
#endif
mcr p15, 0, r0, c7, c5, 4 @ ISB
mcr p15, 0, r0, c1, c0, 0 @ load control register
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index 66389c1..7c95f76 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -104,6 +104,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioB: gpio@fffff600 {
@@ -113,6 +114,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioC: gpio@fffff800 {
@@ -122,6 +124,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
dbgu: serial@fffff200 {
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index b460d6c..195019b 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -95,6 +95,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioB: gpio@fffff400 {
@@ -104,6 +105,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioC: gpio@fffff600 {
@@ -113,6 +115,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioD: gpio@fffff800 {
@@ -122,6 +125,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioE: gpio@fffffa00 {
@@ -131,6 +135,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
dbgu: serial@ffffee00 {
diff --git a/arch/arm/boot/dts/at91sam9g25ek.dts b/arch/arm/boot/dts/at91sam9g25ek.dts
index 7829a4d..96514c1 100644
--- a/arch/arm/boot/dts/at91sam9g25ek.dts
+++ b/arch/arm/boot/dts/at91sam9g25ek.dts
@@ -15,7 +15,7 @@
compatible = "atmel,at91sam9g25ek", "atmel,at91sam9x5ek", "atmel,at91sam9x5", "atmel,at91sam9";
chosen {
- bootargs = "128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs";
+ bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs";
};
ahb {
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index bafa880..63751b1 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -113,6 +113,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioB: gpio@fffff400 {
@@ -122,6 +123,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioC: gpio@fffff600 {
@@ -131,6 +133,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioD: gpio@fffff800 {
@@ -140,6 +143,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioE: gpio@fffffa00 {
@@ -149,6 +153,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
dbgu: serial@ffffee00 {
diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index bfac0df..ef9336a 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -107,6 +107,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioB: gpio@fffff600 {
@@ -116,6 +117,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioC: gpio@fffff800 {
@@ -125,6 +127,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioD: gpio@fffffa00 {
@@ -134,6 +137,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
dbgu: serial@fffff200 {
diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 4a18c39..8a387a8 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -115,6 +115,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioB: gpio@fffff600 {
@@ -124,6 +125,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioC: gpio@fffff800 {
@@ -133,6 +135,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
pioD: gpio@fffffa00 {
@@ -142,6 +145,7 @@
#gpio-cells = <2>;
gpio-controller;
interrupt-controller;
+ #interrupt-cells = <2>;
};
dbgu: serial@fffff200 {
diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index 7d87184..90610c7 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig
@@ -33,7 +33,7 @@ CONFIG_AEABI=y
CONFIG_FORCE_MAX_ZONEORDER=13
CONFIG_ZBOOT_ROM_TEXT=0x0
CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096"
+CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096 rw"
CONFIG_CMDLINE_FORCE=y
CONFIG_KEXEC=y
CONFIG_VFP=y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 03fb936..5c8b3bf4 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -320,4 +320,12 @@
.size \name , . - \name
.endm
+ .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req
+#ifndef CONFIG_CPU_USE_DOMAINS
+ adds \tmp, \addr, #\size - 1
+ sbcccs \tmp, \tmp, \limit
+ bcs \bad
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 2ae842d..5c44dcb 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -203,6 +203,13 @@ static inline void dma_free_writecombine(struct device *dev, size_t size,
}
/*
+ * This can be called during early boot to increase the size of the atomic
+ * coherent DMA pool above the default value of 256KiB. It must be called
+ * before postcore_initcall.
+ */
+extern void __init init_dma_coherent_pool_size(unsigned long size);
+
+/*
* This can be called during boot to increase the size of the consistent
* DMA region above it's default value of 2MB. It must be called before the
* memory allocator is initialised, i.e. before any core_initcall.
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index e965f1b..5f6ddcc 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -187,6 +187,7 @@ static inline unsigned long __phys_to_virt(unsigned long x)
#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET)
#endif
#endif
+#endif /* __ASSEMBLY__ */
#ifndef PHYS_OFFSET
#ifdef PLAT_PHYS_OFFSET
@@ -196,6 +197,8 @@ static inline unsigned long __phys_to_virt(unsigned long x)
#endif
#endif
+#ifndef __ASSEMBLY__
+
/*
* PFNs are used to describe any physical page; this means
* PFN 0 == physical address 0.
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 314d466..99a1951 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -199,6 +199,9 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
{
pgtable_page_dtor(pte);
+#ifdef CONFIG_ARM_LPAE
+ tlb_add_flush(tlb, addr);
+#else
/*
* With the classic ARM MMU, a pte page has two corresponding pmd
* entries, each covering 1MB.
@@ -206,6 +209,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
addr &= PMD_MASK;
tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE);
tlb_add_flush(tlb, addr + SZ_1M);
+#endif
tlb_remove_page(tlb, pte);
}
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 479a635..77bd79f 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -101,28 +101,39 @@ extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
-#define __get_user_x(__r2,__p,__e,__s,__i...) \
+#define __GUP_CLOBBER_1 "lr", "cc"
+#ifdef CONFIG_CPU_USE_DOMAINS
+#define __GUP_CLOBBER_2 "ip", "lr", "cc"
+#else
+#define __GUP_CLOBBER_2 "lr", "cc"
+#endif
+#define __GUP_CLOBBER_4 "lr", "cc"
+
+#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
__asmeq("%0", "r0") __asmeq("%1", "r2") \
+ __asmeq("%3", "r1") \
"bl __get_user_" #__s \
: "=&r" (__e), "=r" (__r2) \
- : "0" (__p) \
- : __i, "cc")
+ : "0" (__p), "r" (__l) \
+ : __GUP_CLOBBER_##__s)
-#define get_user(x,p) \
+#define __get_user_check(x,p) \
({ \
+ unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
register unsigned long __r2 asm("r2"); \
+ register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
case 1: \
- __get_user_x(__r2, __p, __e, 1, "lr"); \
- break; \
+ __get_user_x(__r2, __p, __e, __l, 1); \
+ break; \
case 2: \
- __get_user_x(__r2, __p, __e, 2, "r3", "lr"); \
+ __get_user_x(__r2, __p, __e, __l, 2); \
break; \
case 4: \
- __get_user_x(__r2, __p, __e, 4, "lr"); \
+ __get_user_x(__r2, __p, __e, __l, 4); \
break; \
default: __e = __get_user_bad(); break; \
} \
@@ -130,42 +141,57 @@ extern int __get_user_4(void *);
__e; \
})
+#define get_user(x,p) \
+ ({ \
+ might_fault(); \
+ __get_user_check(x,p); \
+ })
+
extern int __put_user_1(void *, unsigned int);
extern int __put_user_2(void *, unsigned int);
extern int __put_user_4(void *, unsigned int);
extern int __put_user_8(void *, unsigned long long);
-#define __put_user_x(__r2,__p,__e,__s) \
+#define __put_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
__asmeq("%0", "r0") __asmeq("%2", "r2") \
+ __asmeq("%3", "r1") \
"bl __put_user_" #__s \
: "=&r" (__e) \
- : "0" (__p), "r" (__r2) \
+ : "0" (__p), "r" (__r2), "r" (__l) \
: "ip", "lr", "cc")
-#define put_user(x,p) \
+#define __put_user_check(x,p) \
({ \
+ unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __r2 asm("r2") = (x); \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
+ register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
case 1: \
- __put_user_x(__r2, __p, __e, 1); \
+ __put_user_x(__r2, __p, __e, __l, 1); \
break; \
case 2: \
- __put_user_x(__r2, __p, __e, 2); \
+ __put_user_x(__r2, __p, __e, __l, 2); \
break; \
case 4: \
- __put_user_x(__r2, __p, __e, 4); \
+ __put_user_x(__r2, __p, __e, __l, 4); \
break; \
case 8: \
- __put_user_x(__r2, __p, __e, 8); \
+ __put_user_x(__r2, __p, __e, __l, 8); \
break; \
default: __e = __put_user_bad(); break; \
} \
__e; \
})
+#define put_user(x,p) \
+ ({ \
+ might_fault(); \
+ __put_user_check(x,p); \
+ })
+
#else /* CONFIG_MMU */
/*
@@ -219,6 +245,7 @@ do { \
unsigned long __gu_addr = (unsigned long)(ptr); \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
+ might_fault(); \
switch (sizeof(*(ptr))) { \
case 1: __get_user_asm_byte(__gu_val,__gu_addr,err); break; \
case 2: __get_user_asm_half(__gu_val,__gu_addr,err); break; \
@@ -300,6 +327,7 @@ do { \
unsigned long __pu_addr = (unsigned long)(ptr); \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
+ might_fault(); \
switch (sizeof(*(ptr))) { \
case 1: __put_user_asm_byte(__pu_val,__pu_addr,err); break; \
case 2: __put_user_asm_half(__pu_val,__pu_addr,err); break; \
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 0cab47d..2fde5fd 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -404,6 +404,7 @@
#define __NR_setns (__NR_SYSCALL_BASE+375)
#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376)
#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377)
+ /* 378 for kcmp */
/*
* The following SWIs are ARM private.
@@ -483,6 +484,7 @@
*/
#define __IGNORE_fadvise64_64
#define __IGNORE_migrate_pages
+#define __IGNORE_kcmp
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_UNISTD_H */
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 463ff4a..e337879 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -387,6 +387,7 @@
/* 375 */ CALL(sys_setns)
CALL(sys_process_vm_readv)
CALL(sys_process_vm_writev)
+ CALL(sys_ni_syscall) /* reserved for sys_kcmp */
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index ba386bd..281bf33 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -159,6 +159,12 @@ static int debug_arch_supported(void)
arch >= ARM_DEBUG_ARCH_V7_1;
}
+/* Can we determine the watchpoint access type from the fsr? */
+static int debug_exception_updates_fsr(void)
+{
+ return 0;
+}
+
/* Determine number of WRP registers available. */
static int get_num_wrp_resources(void)
{
@@ -604,13 +610,14 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
/* Aligned */
break;
case 1:
- /* Allow single byte watchpoint. */
- if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
- break;
case 2:
/* Allow halfword watchpoints and breakpoints. */
if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
break;
+ case 3:
+ /* Allow single byte watchpoint. */
+ if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
+ break;
default:
ret = -EINVAL;
goto out;
@@ -619,18 +626,35 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
info->address &= ~alignment_mask;
info->ctrl.len <<= offset;
- /*
- * Currently we rely on an overflow handler to take
- * care of single-stepping the breakpoint when it fires.
- * In the case of userspace breakpoints on a core with V7 debug,
- * we can use the mismatch feature as a poor-man's hardware
- * single-step, but this only works for per-task breakpoints.
- */
- if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
- !core_has_mismatch_brps() || !bp->hw.bp_target)) {
- pr_warning("overflow handler required but none found\n");
- ret = -EINVAL;
+ if (!bp->overflow_handler) {
+ /*
+ * Mismatch breakpoints are required for single-stepping
+ * breakpoints.
+ */
+ if (!core_has_mismatch_brps())
+ return -EINVAL;
+
+ /* We don't allow mismatch breakpoints in kernel space. */
+ if (arch_check_bp_in_kernelspace(bp))
+ return -EPERM;
+
+ /*
+ * Per-cpu breakpoints are not supported by our stepping
+ * mechanism.
+ */
+ if (!bp->hw.bp_target)
+ return -EINVAL;
+
+ /*
+ * We only support specific access types if the fsr
+ * reports them.
+ */
+ if (!debug_exception_updates_fsr() &&
+ (info->ctrl.type == ARM_BREAKPOINT_LOAD ||
+ info->ctrl.type == ARM_BREAKPOINT_STORE))
+ return -EINVAL;
}
+
out:
return ret;
}
@@ -706,10 +730,12 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
goto unlock;
/* Check that the access type matches. */
- access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
- HW_BREAKPOINT_R;
- if (!(access & hw_breakpoint_type(wp)))
- goto unlock;
+ if (debug_exception_updates_fsr()) {
+ access = (fsr & ARM_FSR_ACCESS_MASK) ?
+ HW_BREAKPOINT_W : HW_BREAKPOINT_R;
+ if (!(access & hw_breakpoint_type(wp)))
+ goto unlock;
+ }
/* We have a winner. */
info->trigger = addr;
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index fef42b2..e1f9069 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -11,7 +11,6 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/clk.h>
-#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -96,7 +95,52 @@ static void twd_timer_stop(struct clock_event_device *clk)
disable_percpu_irq(clk->irq);
}
-#ifdef CONFIG_CPU_FREQ
+#ifdef CONFIG_COMMON_CLK
+
+/*
+ * Updates clockevent frequency when the cpu frequency changes.
+ * Called on the cpu that is changing frequency with interrupts disabled.
+ */
+static void twd_update_frequency(void *new_rate)
+{
+ twd_timer_rate = *((unsigned long *) new_rate);
+
+ clockevents_update_freq(*__this_cpu_ptr(twd_evt), twd_timer_rate);
+}
+
+static int twd_rate_change(struct notifier_block *nb,
+ unsigned long flags, void *data)
+{
+ struct clk_notifier_data *cnd = data;
+
+ /*
+ * The twd clock events must be reprogrammed to account for the new
+ * frequency. The timer is local to a cpu, so cross-call to the
+ * changing cpu.
+ */
+ if (flags == POST_RATE_CHANGE)
+ smp_call_function(twd_update_frequency,
+ (void *)&cnd->new_rate, 1);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block twd_clk_nb = {
+ .notifier_call = twd_rate_change,
+};
+
+static int twd_clk_init(void)
+{
+ if (twd_evt && *__this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk))
+ return clk_notifier_register(twd_clk, &twd_clk_nb);
+
+ return 0;
+}
+core_initcall(twd_clk_init);
+
+#elif defined (CONFIG_CPU_FREQ)
+
+#include <linux/cpufreq.h>
/*
* Updates clockevent frequency when the cpu frequency changes.
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index f794521..b0179b8 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -420,20 +420,23 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
#endif
instr = *(u32 *) pc;
} else if (thumb_mode(regs)) {
- get_user(instr, (u16 __user *)pc);
+ if (get_user(instr, (u16 __user *)pc))
+ goto die_sig;
if (is_wide_instruction(instr)) {
unsigned int instr2;
- get_user(instr2, (u16 __user *)pc+1);
+ if (get_user(instr2, (u16 __user *)pc+1))
+ goto die_sig;
instr <<= 16;
instr |= instr2;
}
- } else {
- get_user(instr, (u32 __user *)pc);
+ } else if (get_user(instr, (u32 __user *)pc)) {
+ goto die_sig;
}
if (call_undef_hook(regs, instr) == 0)
return;
+die_sig:
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index d6dacc6..395d5fb 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -59,6 +59,7 @@ void __init init_current_timer_delay(unsigned long freq)
{
pr_info("Switching to timer-based delay loop\n");
lpj_fine = freq / HZ;
+ loops_per_jiffy = lpj_fine;
arm_delay_ops.delay = __timer_delay;
arm_delay_ops.const_udelay = __timer_const_udelay;
arm_delay_ops.udelay = __timer_udelay;
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 11093a7..9b06bb4 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -16,8 +16,9 @@
* __get_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2, r3 contains the zero-extended value
+ * r2 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -27,33 +28,39 @@
* Note also that it is intended that __get_user_bad is not global.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/domain.h>
ENTRY(__get_user_1)
+ check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
-#ifdef CONFIG_THUMB2_KERNEL
-2: TUSER(ldrb) r2, [r0]
-3: TUSER(ldrb) r3, [r0, #1]
+ check_uaccess r0, 2, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb .req ip
+2: ldrbt r2, [r0], #1
+3: ldrbt rb, [r0], #0
#else
-2: TUSER(ldrb) r2, [r0], #1
-3: TUSER(ldrb) r3, [r0]
+rb .req r0
+2: ldrb r2, [r0]
+3: ldrb rb, [r0, #1]
#endif
#ifndef __ARMEB__
- orr r2, r2, r3, lsl #8
+ orr r2, r2, rb, lsl #8
#else
- orr r2, r3, r2, lsl #8
+ orr r2, rb, r2, lsl #8
#endif
mov r0, #0
mov pc, lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad
4: TUSER(ldr) r2, [r0]
mov r0, #0
mov pc, lr
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 7db2599..3d73dcb 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -16,6 +16,7 @@
* __put_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* r2, r3 contains the value
* Outputs: r0 is the error code
* lr corrupted
@@ -27,16 +28,19 @@
* Note also that it is intended that __put_user_bad is not global.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/domain.h>
ENTRY(__put_user_1)
+ check_uaccess r0, 1, r1, ip, __put_user_bad
1: TUSER(strb) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
+ check_uaccess r0, 2, r1, ip, __put_user_bad
mov ip, r2, lsr #8
#ifdef CONFIG_THUMB2_KERNEL
#ifndef __ARMEB__
@@ -60,12 +64,14 @@ ENTRY(__put_user_2)
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
+ check_uaccess r0, 4, r1, ip, __put_user_bad
4: TUSER(str) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
+ check_uaccess r0, 8, r1, ip, __put_user_bad
#ifdef CONFIG_THUMB2_KERNEL
5: TUSER(str) r2, [r0]
6: TUSER(str) r3, [r0, #4]
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index 104ca40..aaa443b 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -197,7 +197,7 @@ void __init at91rm9200_timer_init(void)
at91_st_read(AT91_ST_SR);
/* Make IRQs happen for the system timer */
- setup_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
+ setup_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
/* The 32KiHz "Slow Clock" (tick every 30517.58 nanoseconds) is used
* directly for the clocksource and all clockevents, after adjusting
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index 7b9c2ba..bce572a 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -726,6 +726,8 @@ static struct resource rtt_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
},
};
@@ -744,10 +746,12 @@ static void __init at91_add_device_rtt_rtc(void)
* The second resource is needed:
* GPBR will serve as the storage for RTC time offset
*/
- at91sam9260_rtt_device.num_resources = 2;
+ at91sam9260_rtt_device.num_resources = 3;
rtt_resources[1].start = AT91SAM9260_BASE_GPBR +
4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
rtt_resources[1].end = rtt_resources[1].start + 3;
+ rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+ rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
}
#else
static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 8df5c1bd..bc2590d 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -609,6 +609,8 @@ static struct resource rtt_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
}
};
@@ -626,10 +628,12 @@ static void __init at91_add_device_rtt_rtc(void)
* The second resource is needed:
* GPBR will serve as the storage for RTC time offset
*/
- at91sam9261_rtt_device.num_resources = 2;
+ at91sam9261_rtt_device.num_resources = 3;
rtt_resources[1].start = AT91SAM9261_BASE_GPBR +
4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
rtt_resources[1].end = rtt_resources[1].start + 3;
+ rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+ rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
}
#else
static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index eb6bbf8..9b6ca73 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -990,6 +990,8 @@ static struct resource rtt0_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
}
};
@@ -1006,6 +1008,8 @@ static struct resource rtt1_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
}
};
@@ -1027,14 +1031,14 @@ static void __init at91_add_device_rtt_rtc(void)
* The second resource is needed only for the chosen RTT:
* GPBR will serve as the storage for RTC time offset
*/
- at91sam9263_rtt0_device.num_resources = 2;
+ at91sam9263_rtt0_device.num_resources = 3;
at91sam9263_rtt1_device.num_resources = 1;
pdev = &at91sam9263_rtt0_device;
r = rtt0_resources;
break;
case 1:
at91sam9263_rtt0_device.num_resources = 1;
- at91sam9263_rtt1_device.num_resources = 2;
+ at91sam9263_rtt1_device.num_resources = 3;
pdev = &at91sam9263_rtt1_device;
r = rtt1_resources;
break;
@@ -1047,6 +1051,8 @@ static void __init at91_add_device_rtt_rtc(void)
pdev->name = "rtc-at91sam9";
r[1].start = AT91SAM9263_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
r[1].end = r[1].start + 3;
+ r[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+ r[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
}
#else
static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 0607399..1b47319 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -1293,6 +1293,8 @@ static struct resource rtt_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
}
};
@@ -1310,10 +1312,12 @@ static void __init at91_add_device_rtt_rtc(void)
* The second resource is needed:
* GPBR will serve as the storage for RTC time offset
*/
- at91sam9g45_rtt_device.num_resources = 2;
+ at91sam9g45_rtt_device.num_resources = 3;
rtt_resources[1].start = AT91SAM9G45_BASE_GPBR +
4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
rtt_resources[1].end = rtt_resources[1].start + 3;
+ rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+ rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
}
#else
static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index f09fff9..b3d365d 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -688,6 +688,8 @@ static struct resource rtt_resources[] = {
.flags = IORESOURCE_MEM,
}, {
.flags = IORESOURCE_MEM,
+ }, {
+ .flags = IORESOURCE_IRQ,
}
};
@@ -705,10 +707,12 @@ static void __init at91_add_device_rtt_rtc(void)
* The second resource is needed:
* GPBR will serve as the storage for RTC time offset
*/
- at91sam9rl_rtt_device.num_resources = 2;
+ at91sam9rl_rtt_device.num_resources = 3;
rtt_resources[1].start = AT91SAM9RL_BASE_GPBR +
4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
rtt_resources[1].end = rtt_resources[1].start + 3;
+ rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+ rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
}
#else
static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index de2ec6b..188c829 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -63,6 +63,12 @@ EXPORT_SYMBOL_GPL(at91_pmc_base);
#define cpu_has_300M_plla() (cpu_is_at91sam9g10())
+#define cpu_has_240M_plla() (cpu_is_at91sam9261() \
+ || cpu_is_at91sam9263() \
+ || cpu_is_at91sam9rl())
+
+#define cpu_has_210M_plla() (cpu_is_at91sam9260())
+
#define cpu_has_pllb() (!(cpu_is_at91sam9rl() \
|| cpu_is_at91sam9g45() \
|| cpu_is_at91sam9x5() \
@@ -706,6 +712,12 @@ static int __init at91_pmc_init(unsigned long main_clock)
} else if (cpu_has_800M_plla()) {
if (plla.rate_hz > 800000000)
pll_overclock = true;
+ } else if (cpu_has_240M_plla()) {
+ if (plla.rate_hz > 240000000)
+ pll_overclock = true;
+ } else if (cpu_has_210M_plla()) {
+ if (plla.rate_hz > 210000000)
+ pll_overclock = true;
} else {
if (plla.rate_hz > 209000000)
pll_overclock = true;
diff --git a/arch/arm/mach-gemini/irq.c b/arch/arm/mach-gemini/irq.c
index ca70e5f..020852d 100644
--- a/arch/arm/mach-gemini/irq.c
+++ b/arch/arm/mach-gemini/irq.c
@@ -17,6 +17,7 @@
#include <linux/sched.h>
#include <asm/irq.h>
#include <asm/mach/irq.h>
+#include <asm/system_misc.h>
#include <mach/hardware.h>
#define IRQ_SOURCE(base_addr) (base_addr + 0x00)
diff --git a/arch/arm/mach-imx/clk-imx25.c b/arch/arm/mach-imx/clk-imx25.c
index fdd8cc8..d20d479 100644
--- a/arch/arm/mach-imx/clk-imx25.c
+++ b/arch/arm/mach-imx/clk-imx25.c
@@ -222,10 +222,8 @@ int __init mx25_clocks_init(void)
clk_register_clkdev(clk[lcdc_ipg], "ipg", "imx-fb.0");
clk_register_clkdev(clk[lcdc_ahb], "ahb", "imx-fb.0");
clk_register_clkdev(clk[wdt_ipg], NULL, "imx2-wdt.0");
- clk_register_clkdev(clk[ssi1_ipg_per], "per", "imx-ssi.0");
- clk_register_clkdev(clk[ssi1_ipg], "ipg", "imx-ssi.0");
- clk_register_clkdev(clk[ssi2_ipg_per], "per", "imx-ssi.1");
- clk_register_clkdev(clk[ssi2_ipg], "ipg", "imx-ssi.1");
+ clk_register_clkdev(clk[ssi1_ipg], NULL, "imx-ssi.0");
+ clk_register_clkdev(clk[ssi2_ipg], NULL, "imx-ssi.1");
clk_register_clkdev(clk[esdhc1_ipg_per], "per", "sdhci-esdhc-imx25.0");
clk_register_clkdev(clk[esdhc1_ipg], "ipg", "sdhci-esdhc-imx25.0");
clk_register_clkdev(clk[esdhc1_ahb], "ahb", "sdhci-esdhc-imx25.0");
@@ -243,6 +241,6 @@ int __init mx25_clocks_init(void)
clk_register_clkdev(clk[sdma_ahb], "ahb", "imx35-sdma");
clk_register_clkdev(clk[iim_ipg], "iim", NULL);
- mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54);
+ mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), MX25_INT_GPT1);
return 0;
}
diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c
index c6422fb..65fb8bc 100644
--- a/arch/arm/mach-imx/clk-imx35.c
+++ b/arch/arm/mach-imx/clk-imx35.c
@@ -230,10 +230,8 @@ int __init mx35_clocks_init()
clk_register_clkdev(clk[ipu_gate], NULL, "mx3_sdc_fb");
clk_register_clkdev(clk[owire_gate], NULL, "mxc_w1");
clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma");
- clk_register_clkdev(clk[ipg], "ipg", "imx-ssi.0");
- clk_register_clkdev(clk[ssi1_div_post], "per", "imx-ssi.0");
- clk_register_clkdev(clk[ipg], "ipg", "imx-ssi.1");
- clk_register_clkdev(clk[ssi2_div_post], "per", "imx-ssi.1");
+ clk_register_clkdev(clk[ssi1_gate], NULL, "imx-ssi.0");
+ clk_register_clkdev(clk[ssi2_gate], NULL, "imx-ssi.1");
/* i.mx35 has the i.mx21 type uart */
clk_register_clkdev(clk[uart1_gate], "per", "imx21-uart.0");
clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.0");
diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c
index 2c6ab32..5985ed1 100644
--- a/arch/arm/mach-imx/mach-armadillo5x0.c
+++ b/arch/arm/mach-imx/mach-armadillo5x0.c
@@ -526,7 +526,8 @@ static void __init armadillo5x0_init(void)
imx31_add_mxc_nand(&armadillo5x0_nand_board_info);
/* set NAND page size to 2k if not configured via boot mode pins */
- __raw_writel(__raw_readl(MXC_CCM_RCSR) | (1 << 30), MXC_CCM_RCSR);
+ __raw_writel(__raw_readl(mx3_ccm_base + MXC_CCM_RCSR) |
+ (1 << 30), mx3_ccm_base + MXC_CCM_RCSR);
/* RTC */
/* Get RTC IRQ and register the chip */
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 3226077..1201191 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -517,6 +517,13 @@ void __init kirkwood_wdt_init(void)
void __init kirkwood_init_early(void)
{
orion_time_set_base(TIMER_VIRT_BASE);
+
+ /*
+ * Some Kirkwood devices allocate their coherent buffers from atomic
+ * context. Increase size of atomic coherent pool to make sure such
+ * the allocations won't fail.
+ */
+ init_dma_coherent_pool_size(SZ_1M);
}
int kirkwood_tclk;
diff --git a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
index d933593..be90b7d 100644
--- a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
+++ b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/sizes.h>
#include <linux/platform_device.h>
#include <linux/mtd/partitions.h>
#include <linux/ata_platform.h>
diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 8dabfe8..ff886e0 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -261,7 +261,7 @@ static void __init apx4devkit_init(void)
enable_clk_enet_out();
if (IS_BUILTIN(CONFIG_PHYLIB))
- phy_register_fixup_for_uid(PHY_ID_KS8051, MICREL_PHY_ID_MASK,
+ phy_register_fixup_for_uid(PHY_ID_KSZ8051, MICREL_PHY_ID_MASK,
apx4devkit_phy_fixup);
mxsfb_pdata.mode_list = apx4devkit_video_modes;
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index fcd4e85..346fd26 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -232,10 +232,11 @@ config MACH_OMAP3_PANDORA
select OMAP_PACKAGE_CBB
select REGULATOR_FIXED_VOLTAGE if REGULATOR
-config MACH_OMAP3_TOUCHBOOK
+config MACH_TOUCHBOOK
bool "OMAP3 Touch Book"
depends on ARCH_OMAP3
default y
+ select OMAP_PACKAGE_CBB
config MACH_OMAP_3430SDP
bool "OMAP 3430 SDP board"
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index f6a24b3..34c2c7f 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -255,7 +255,7 @@ obj-$(CONFIG_MACH_OMAP_3630SDP) += board-zoom-display.o
obj-$(CONFIG_MACH_CM_T35) += board-cm-t35.o
obj-$(CONFIG_MACH_CM_T3517) += board-cm-t3517.o
obj-$(CONFIG_MACH_IGEP0020) += board-igep0020.o
-obj-$(CONFIG_MACH_OMAP3_TOUCHBOOK) += board-omap3touchbook.o
+obj-$(CONFIG_MACH_TOUCHBOOK) += board-omap3touchbook.o
obj-$(CONFIG_MACH_OMAP_4430SDP) += board-4430sdp.o
obj-$(CONFIG_MACH_OMAP4_PANDA) += board-omap4panda.o
diff --git a/arch/arm/mach-omap2/clock33xx_data.c b/arch/arm/mach-omap2/clock33xx_data.c
index 25bbcc7..ae27de8 100644
--- a/arch/arm/mach-omap2/clock33xx_data.c
+++ b/arch/arm/mach-omap2/clock33xx_data.c
@@ -1036,13 +1036,13 @@ static struct omap_clk am33xx_clks[] = {
CLK(NULL, "mmu_fck", &mmu_fck, CK_AM33XX),
CLK(NULL, "smartreflex0_fck", &smartreflex0_fck, CK_AM33XX),
CLK(NULL, "smartreflex1_fck", &smartreflex1_fck, CK_AM33XX),
- CLK(NULL, "gpt1_fck", &timer1_fck, CK_AM33XX),
- CLK(NULL, "gpt2_fck", &timer2_fck, CK_AM33XX),
- CLK(NULL, "gpt3_fck", &timer3_fck, CK_AM33XX),
- CLK(NULL, "gpt4_fck", &timer4_fck, CK_AM33XX),
- CLK(NULL, "gpt5_fck", &timer5_fck, CK_AM33XX),
- CLK(NULL, "gpt6_fck", &timer6_fck, CK_AM33XX),
- CLK(NULL, "gpt7_fck", &timer7_fck, CK_AM33XX),
+ CLK(NULL, "timer1_fck", &timer1_fck, CK_AM33XX),
+ CLK(NULL, "timer2_fck", &timer2_fck, CK_AM33XX),
+ CLK(NULL, "timer3_fck", &timer3_fck, CK_AM33XX),
+ CLK(NULL, "timer4_fck", &timer4_fck, CK_AM33XX),
+ CLK(NULL, "timer5_fck", &timer5_fck, CK_AM33XX),
+ CLK(NULL, "timer6_fck", &timer6_fck, CK_AM33XX),
+ CLK(NULL, "timer7_fck", &timer7_fck, CK_AM33XX),
CLK(NULL, "usbotg_fck", &usbotg_fck, CK_AM33XX),
CLK(NULL, "ieee5000_fck", &ieee5000_fck, CK_AM33XX),
CLK(NULL, "wdt1_fck", &wdt1_fck, CK_AM33XX),
diff --git a/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c b/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c
index a0d68db..f99e65c 100644
--- a/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c
+++ b/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c
@@ -241,6 +241,52 @@ static void omap3_clkdm_deny_idle(struct clockdomain *clkdm)
_clkdm_del_autodeps(clkdm);
}
+static int omap3xxx_clkdm_clk_enable(struct clockdomain *clkdm)
+{
+ bool hwsup = false;
+
+ if (!clkdm->clktrctrl_mask)
+ return 0;
+
+ hwsup = omap2_cm_is_clkdm_in_hwsup(clkdm->pwrdm.ptr->prcm_offs,
+ clkdm->clktrctrl_mask);
+
+ if (hwsup) {
+ /* Disable HW transitions when we are changing deps */
+ _disable_hwsup(clkdm);
+ _clkdm_add_autodeps(clkdm);
+ _enable_hwsup(clkdm);
+ } else {
+ if (clkdm->flags & CLKDM_CAN_FORCE_WAKEUP)
+ omap3_clkdm_wakeup(clkdm);
+ }
+
+ return 0;
+}
+
+static int omap3xxx_clkdm_clk_disable(struct clockdomain *clkdm)
+{
+ bool hwsup = false;
+
+ if (!clkdm->clktrctrl_mask)
+ return 0;
+
+ hwsup = omap2_cm_is_clkdm_in_hwsup(clkdm->pwrdm.ptr->prcm_offs,
+ clkdm->clktrctrl_mask);
+
+ if (hwsup) {
+ /* Disable HW transitions when we are changing deps */
+ _disable_hwsup(clkdm);
+ _clkdm_del_autodeps(clkdm);
+ _enable_hwsup(clkdm);
+ } else {
+ if (clkdm->flags & CLKDM_CAN_FORCE_SLEEP)
+ omap3_clkdm_sleep(clkdm);
+ }
+
+ return 0;
+}
+
struct clkdm_ops omap2_clkdm_operations = {
.clkdm_add_wkdep = omap2_clkdm_add_wkdep,
.clkdm_del_wkdep = omap2_clkdm_del_wkdep,
@@ -267,6 +313,6 @@ struct clkdm_ops omap3_clkdm_operations = {
.clkdm_wakeup = omap3_clkdm_wakeup,
.clkdm_allow_idle = omap3_clkdm_allow_idle,
.clkdm_deny_idle = omap3_clkdm_deny_idle,
- .clkdm_clk_enable = omap2_clkdm_clk_enable,
- .clkdm_clk_disable = omap2_clkdm_clk_disable,
+ .clkdm_clk_enable = omap3xxx_clkdm_clk_enable,
+ .clkdm_clk_disable = omap3xxx_clkdm_clk_disable,
};
diff --git a/arch/arm/mach-omap2/cm-regbits-34xx.h b/arch/arm/mach-omap2/cm-regbits-34xx.h
index 766338f..975f6bd 100644
--- a/arch/arm/mach-omap2/cm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/cm-regbits-34xx.h
@@ -67,6 +67,7 @@
#define OMAP3430_EN_IVA2_DPLL_MASK (0x7 << 0)
/* CM_IDLEST_IVA2 */
+#define OMAP3430_ST_IVA2_SHIFT 0
#define OMAP3430_ST_IVA2_MASK (1 << 0)
/* CM_IDLEST_PLL_IVA2 */
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 05fdebf..330d4c6 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -46,7 +46,7 @@
static void __iomem *wakeupgen_base;
static void __iomem *sar_base;
static DEFINE_SPINLOCK(wakeupgen_lock);
-static unsigned int irq_target_cpu[NR_IRQS];
+static unsigned int irq_target_cpu[MAX_IRQS];
static unsigned int irq_banks = MAX_NR_REG_BANKS;
static unsigned int max_irqs = MAX_IRQS;
static unsigned int omap_secure_apis;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 6ca8e51..37afbd1 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1889,6 +1889,7 @@ static int _enable(struct omap_hwmod *oh)
_enable_sysc(oh);
}
} else {
+ _omap4_disable_module(oh);
_disable_clocks(oh);
pr_debug("omap_hwmod: %s: _wait_target_ready: %d\n",
oh->name, r);
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index c9e3820..ce7e606 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -100,9 +100,9 @@ static struct omap_hwmod omap3xxx_mpu_hwmod = {
/* IVA2 (IVA2) */
static struct omap_hwmod_rst_info omap3xxx_iva_resets[] = {
- { .name = "logic", .rst_shift = 0 },
- { .name = "seq0", .rst_shift = 1 },
- { .name = "seq1", .rst_shift = 2 },
+ { .name = "logic", .rst_shift = 0, .st_shift = 8 },
+ { .name = "seq0", .rst_shift = 1, .st_shift = 9 },
+ { .name = "seq1", .rst_shift = 2, .st_shift = 10 },
};
static struct omap_hwmod omap3xxx_iva_hwmod = {
@@ -112,6 +112,15 @@ static struct omap_hwmod omap3xxx_iva_hwmod = {
.rst_lines = omap3xxx_iva_resets,
.rst_lines_cnt = ARRAY_SIZE(omap3xxx_iva_resets),
.main_clk = "iva2_ck",
+ .prcm = {
+ .omap2 = {
+ .module_offs = OMAP3430_IVA2_MOD,
+ .prcm_reg_id = 1,
+ .module_bit = OMAP3430_CM_FCLKEN_IVA2_EN_IVA2_SHIFT,
+ .idlest_reg_id = 1,
+ .idlest_idle_bit = OMAP3430_ST_IVA2_SHIFT,
+ }
+ },
};
/* timer class */
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 242aee4..afb6091 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -4210,7 +4210,7 @@ static struct omap_hwmod_ocp_if omap44xx_dsp__iva = {
};
/* dsp -> sl2if */
-static struct omap_hwmod_ocp_if omap44xx_dsp__sl2if = {
+static struct omap_hwmod_ocp_if __maybe_unused omap44xx_dsp__sl2if = {
.master = &omap44xx_dsp_hwmod,
.slave = &omap44xx_sl2if_hwmod,
.clk = "dpll_iva_m5x2_ck",
@@ -4828,7 +4828,7 @@ static struct omap_hwmod_ocp_if omap44xx_l3_main_2__iss = {
};
/* iva -> sl2if */
-static struct omap_hwmod_ocp_if omap44xx_iva__sl2if = {
+static struct omap_hwmod_ocp_if __maybe_unused omap44xx_iva__sl2if = {
.master = &omap44xx_iva_hwmod,
.slave = &omap44xx_sl2if_hwmod,
.clk = "dpll_iva_m5x2_ck",
@@ -5362,7 +5362,7 @@ static struct omap_hwmod_ocp_if omap44xx_l4_wkup__scrm = {
};
/* l3_main_2 -> sl2if */
-static struct omap_hwmod_ocp_if omap44xx_l3_main_2__sl2if = {
+static struct omap_hwmod_ocp_if __maybe_unused omap44xx_l3_main_2__sl2if = {
.master = &omap44xx_l3_main_2_hwmod,
.slave = &omap44xx_sl2if_hwmod,
.clk = "l3_div_ck",
@@ -6032,7 +6032,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
&omap44xx_l4_abe__dmic,
&omap44xx_l4_abe__dmic_dma,
&omap44xx_dsp__iva,
- &omap44xx_dsp__sl2if,
+ /* &omap44xx_dsp__sl2if, */
&omap44xx_l4_cfg__dsp,
&omap44xx_l3_main_2__dss,
&omap44xx_l4_per__dss,
@@ -6068,7 +6068,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
&omap44xx_l4_per__i2c4,
&omap44xx_l3_main_2__ipu,
&omap44xx_l3_main_2__iss,
- &omap44xx_iva__sl2if,
+ /* &omap44xx_iva__sl2if, */
&omap44xx_l3_main_2__iva,
&omap44xx_l4_wkup__kbd,
&omap44xx_l4_cfg__mailbox,
@@ -6099,7 +6099,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
&omap44xx_l4_cfg__cm_core,
&omap44xx_l4_wkup__prm,
&omap44xx_l4_wkup__scrm,
- &omap44xx_l3_main_2__sl2if,
+ /* &omap44xx_l3_main_2__sl2if, */
&omap44xx_l4_abe__slimbus1,
&omap44xx_l4_abe__slimbus1_dma,
&omap44xx_l4_per__slimbus2,
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 2ff6d41..2ba4f57 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -260,6 +260,7 @@ static u32 notrace dmtimer_read_sched_clock(void)
return 0;
}
+#ifdef CONFIG_OMAP_32K_TIMER
/* Setup free-running counter for clocksource */
static int __init omap2_sync32k_clocksource_init(void)
{
@@ -299,6 +300,12 @@ static int __init omap2_sync32k_clocksource_init(void)
return ret;
}
+#else
+static inline int omap2_sync32k_clocksource_init(void)
+{
+ return -ENODEV;
+}
+#endif
static void __init omap2_gptimer_clocksource_init(int gptimer_id,
const char *fck_source)
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 410291c..a6cd14a 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -204,6 +204,13 @@ void __init orion5x_wdt_init(void)
void __init orion5x_init_early(void)
{
orion_time_set_base(TIMER_VIRT_BASE);
+
+ /*
+ * Some Orion5x devices allocate their coherent buffers from atomic
+ * context. Increase size of atomic coherent pool to make sure such
+ * the allocations won't fail.
+ */
+ init_dma_coherent_pool_size(SZ_1M);
}
int orion5x_tclk;
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index cf10f92..453a6e5 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -520,13 +520,14 @@ static struct platform_device hdmi_lcdc_device = {
};
/* GPIO KEY */
-#define GPIO_KEY(c, g, d) { .code = c, .gpio = g, .desc = d, .active_low = 1 }
+#define GPIO_KEY(c, g, d, ...) \
+ { .code = c, .gpio = g, .desc = d, .active_low = 1, __VA_ARGS__ }
static struct gpio_keys_button gpio_buttons[] = {
- GPIO_KEY(KEY_POWER, GPIO_PORT99, "SW1"),
- GPIO_KEY(KEY_BACK, GPIO_PORT100, "SW2"),
- GPIO_KEY(KEY_MENU, GPIO_PORT97, "SW3"),
- GPIO_KEY(KEY_HOME, GPIO_PORT98, "SW4"),
+ GPIO_KEY(KEY_POWER, GPIO_PORT99, "SW3", .wakeup = 1),
+ GPIO_KEY(KEY_BACK, GPIO_PORT100, "SW4"),
+ GPIO_KEY(KEY_MENU, GPIO_PORT97, "SW5"),
+ GPIO_KEY(KEY_HOME, GPIO_PORT98, "SW6"),
};
static struct gpio_keys_platform_data gpio_key_info = {
@@ -901,8 +902,8 @@ static struct platform_device *eva_devices[] __initdata = {
&camera_device,
&ceu0_device,
&fsi_device,
- &fsi_hdmi_device,
&fsi_wm8978_device,
+ &fsi_hdmi_device,
};
static void __init eva_clock_init(void)
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 53b7ea9..3b8a017 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -346,11 +346,11 @@ static struct resource sh_mmcif_resources[] = {
.flags = IORESOURCE_MEM,
},
[1] = {
- .start = gic_spi(141),
+ .start = gic_spi(140),
.flags = IORESOURCE_IRQ,
},
[2] = {
- .start = gic_spi(140),
+ .start = gic_spi(141),
.flags = IORESOURCE_IRQ,
},
};
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index 7ea2b31..c129542 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -695,6 +695,7 @@ static struct platform_device usbhs0_device = {
* - J30 "open"
* - modify usbhs1_get_id() USBHS_HOST -> USBHS_GADGET
* - add .get_vbus = usbhs_get_vbus in usbhs1_private
+ * - check usbhs0_device(pio)/usbhs1_device(irq) order in mackerel_devices.
*/
#define IRQ8 evt2irq(0x0300)
#define USB_PHY_MODE (1 << 4)
@@ -1325,8 +1326,8 @@ static struct platform_device *mackerel_devices[] __initdata = {
&nor_flash_device,
&smc911x_device,
&lcdc_device,
- &usbhs1_device,
&usbhs0_device,
+ &usbhs1_device,
&leds_device,
&fsi_device,
&fsi_ak4643_device,
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index 3a528cf..fcf5a47 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -67,7 +67,7 @@ static struct smsc911x_platform_config smsc911x_platdata = {
static struct platform_device eth_device = {
.name = "smsc911x",
- .id = 0,
+ .id = -1,
.dev = {
.platform_data = &smsc911x_platdata,
},
diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c
index ee44740..588555a 100644
--- a/arch/arm/mach-shmobile/intc-sh73a0.c
+++ b/arch/arm/mach-shmobile/intc-sh73a0.c
@@ -259,9 +259,9 @@ static int sh73a0_set_wake(struct irq_data *data, unsigned int on)
return 0; /* always allow wakeup */
}
-#define RELOC_BASE 0x1000
+#define RELOC_BASE 0x1200
-/* INTCA IRQ pins at INTCS + 0x1000 to make space for GIC+INTC handling */
+/* INTCA IRQ pins at INTCS + RELOC_BASE to make space for GIC+INTC handling */
#define INTCS_VECT_RELOC(n, vect) INTCS_VECT((n), (vect) + RELOC_BASE)
INTC_IRQ_PINS_32(intca_irq_pins, 0xe6900000,
diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c
index b7344be..94486e7 100644
--- a/arch/arm/mach-tegra/board-harmony-power.c
+++ b/arch/arm/mach-tegra/board-harmony-power.c
@@ -67,6 +67,13 @@ static struct regulator_init_data ldo0_data = {
}, \
}
+static struct regulator_init_data sys_data = {
+ .supply_regulator = "vdd_5v0",
+ .constraints = {
+ .name = "vdd_sys",
+ },
+};
+
HARMONY_REGULATOR_INIT(sm0, "vdd_sm0", "vdd_sys", 725, 1500, 1);
HARMONY_REGULATOR_INIT(sm1, "vdd_sm1", "vdd_sys", 725, 1500, 1);
HARMONY_REGULATOR_INIT(sm2, "vdd_sm2", "vdd_sys", 3000, 4550, 1);
@@ -74,7 +81,7 @@ HARMONY_REGULATOR_INIT(ldo1, "vdd_ldo1", "vdd_sm2", 725, 1500, 1);
HARMONY_REGULATOR_INIT(ldo2, "vdd_ldo2", "vdd_sm2", 725, 1500, 0);
HARMONY_REGULATOR_INIT(ldo3, "vdd_ldo3", "vdd_sm2", 1250, 3300, 1);
HARMONY_REGULATOR_INIT(ldo4, "vdd_ldo4", "vdd_sm2", 1700, 2475, 1);
-HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", NULL, 1250, 3300, 1);
+HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", "vdd_sys", 1250, 3300, 1);
HARMONY_REGULATOR_INIT(ldo6, "vdd_ldo6", "vdd_sm2", 1250, 3300, 0);
HARMONY_REGULATOR_INIT(ldo7, "vdd_ldo7", "vdd_sm2", 1250, 3300, 0);
HARMONY_REGULATOR_INIT(ldo8, "vdd_ldo8", "vdd_sm2", 1250, 3300, 0);
@@ -88,6 +95,7 @@ HARMONY_REGULATOR_INIT(ldo9, "vdd_ldo9", "vdd_sm2", 1250, 3300, 1);
}
static struct tps6586x_subdev_info tps_devs[] = {
+ TPS_REG(SYS, &sys_data),
TPS_REG(SM_0, &sm0_data),
TPS_REG(SM_1, &sm1_data),
TPS_REG(SM_2, &sm2_data),
@@ -120,7 +128,7 @@ static struct i2c_board_info __initdata harmony_regulators[] = {
int __init harmony_regulator_init(void)
{
- regulator_register_always_on(0, "vdd_sys",
+ regulator_register_always_on(0, "vdd_5v0",
NULL, 0, 5000000);
if (machine_is_harmony()) {
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 119bc52..4e07eec 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -63,10 +63,11 @@ static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
pid = task_pid_nr(thread->task) << ASID_BITS;
asm volatile(
" mrc p15, 0, %0, c13, c0, 1\n"
- " bfi %1, %0, #0, %2\n"
- " mcr p15, 0, %1, c13, c0, 1\n"
+ " and %0, %0, %2\n"
+ " orr %0, %0, %1\n"
+ " mcr p15, 0, %0, c13, c0, 1\n"
: "=r" (contextidr), "+r" (pid)
- : "I" (ASID_BITS));
+ : "I" (~ASID_MASK));
isb();
return NOTIFY_OK;
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4e7d118..13f555d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -267,17 +267,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
vunmap(cpu_addr);
}
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+
struct dma_pool {
size_t size;
spinlock_t lock;
unsigned long *bitmap;
unsigned long nr_pages;
void *vaddr;
- struct page *page;
+ struct page **pages;
};
static struct dma_pool atomic_pool = {
- .size = SZ_256K,
+ .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
};
static int __init early_coherent_pool(char *p)
@@ -287,6 +289,21 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
+void __init init_dma_coherent_pool_size(unsigned long size)
+{
+ /*
+ * Catch any attempt to set the pool size too late.
+ */
+ BUG_ON(atomic_pool.vaddr);
+
+ /*
+ * Set architecture specific coherent pool size only if
+ * it has not been changed by kernel command line parameter.
+ */
+ if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE)
+ atomic_pool.size = size;
+}
+
/*
* Initialise the coherent pool for atomic allocations.
*/
@@ -297,6 +314,7 @@ static int __init atomic_pool_init(void)
unsigned long nr_pages = pool->size >> PAGE_SHIFT;
unsigned long *bitmap;
struct page *page;
+ struct page **pages;
void *ptr;
int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
@@ -304,21 +322,33 @@ static int __init atomic_pool_init(void)
if (!bitmap)
goto no_bitmap;
+ pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto no_pages;
+
if (IS_ENABLED(CONFIG_CMA))
ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
else
ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
&page, NULL);
if (ptr) {
+ int i;
+
+ for (i = 0; i < nr_pages; i++)
+ pages[i] = page + i;
+
spin_lock_init(&pool->lock);
pool->vaddr = ptr;
- pool->page = page;
+ pool->pages = pages;
pool->bitmap = bitmap;
pool->nr_pages = nr_pages;
pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
(unsigned)pool->size / 1024);
return 0;
}
+
+ kfree(pages);
+no_pages:
kfree(bitmap);
no_bitmap:
pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
@@ -443,27 +473,45 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
if (pageno < pool->nr_pages) {
bitmap_set(pool->bitmap, pageno, count);
ptr = pool->vaddr + PAGE_SIZE * pageno;
- *ret_page = pool->page + pageno;
+ *ret_page = pool->pages[pageno];
+ } else {
+ pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+ "Please increase it with coherent_pool= kernel parameter!\n",
+ (unsigned)pool->size / 1024);
}
spin_unlock_irqrestore(&pool->lock, flags);
return ptr;
}
+static bool __in_atomic_pool(void *start, size_t size)
+{
+ struct dma_pool *pool = &atomic_pool;
+ void *end = start + size;
+ void *pool_start = pool->vaddr;
+ void *pool_end = pool->vaddr + pool->size;
+
+ if (start < pool_start || start >= pool_end)
+ return false;
+
+ if (end <= pool_end)
+ return true;
+
+ WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+ start, end - 1, pool_start, pool_end - 1);
+
+ return false;
+}
+
static int __free_from_pool(void *start, size_t size)
{
struct dma_pool *pool = &atomic_pool;
unsigned long pageno, count;
unsigned long flags;
- if (start < pool->vaddr || start > pool->vaddr + pool->size)
+ if (!__in_atomic_pool(start, size))
return 0;
- if (start + size > pool->vaddr + pool->size) {
- WARN(1, "freeing wrong coherent size from pool\n");
- return 0;
- }
-
pageno = (start - pool->vaddr) >> PAGE_SHIFT;
count = size >> PAGE_SHIFT;
@@ -1090,10 +1138,22 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
return 0;
}
+static struct page **__atomic_get_pages(void *addr)
+{
+ struct dma_pool *pool = &atomic_pool;
+ struct page **pages = pool->pages;
+ int offs = (addr - pool->vaddr) >> PAGE_SHIFT;
+
+ return pages + offs;
+}
+
static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
{
struct vm_struct *area;
+ if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+ return __atomic_get_pages(cpu_addr);
+
if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
return cpu_addr;
@@ -1103,6 +1163,34 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
return NULL;
}
+static void *__iommu_alloc_atomic(struct device *dev, size_t size,
+ dma_addr_t *handle)
+{
+ struct page *page;
+ void *addr;
+
+ addr = __alloc_from_pool(size, &page);
+ if (!addr)
+ return NULL;
+
+ *handle = __iommu_create_mapping(dev, &page, size);
+ if (*handle == DMA_ERROR_CODE)
+ goto err_mapping;
+
+ return addr;
+
+err_mapping:
+ __free_from_pool(addr, size);
+ return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, struct page **pages,
+ dma_addr_t handle, size_t size)
+{
+ __iommu_remove_mapping(dev, handle, size);
+ __free_from_pool(page_address(pages[0]), size);
+}
+
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
{
@@ -1113,6 +1201,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
*handle = DMA_ERROR_CODE;
size = PAGE_ALIGN(size);
+ if (gfp & GFP_ATOMIC)
+ return __iommu_alloc_atomic(dev, size, handle);
+
pages = __iommu_alloc_buffer(dev, size, gfp);
if (!pages)
return NULL;
@@ -1179,6 +1270,11 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
}
+ if (__in_atomic_pool(cpu_addr, size)) {
+ __iommu_free_atomic(dev, pages, handle, size);
+ return;
+ }
+
if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
unmap_kernel_range((unsigned long)cpu_addr, size);
vunmap(cpu_addr);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 6776160..a8ee92d 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -55,6 +55,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
/* permanent static mappings from iotable_init() */
#define VM_ARM_STATIC_MAPPING 0x40000000
+/* empty mapping */
+#define VM_ARM_EMPTY_MAPPING 0x20000000
+
/* mapping type (attributes) for permanent static mappings */
#define VM_ARM_MTYPE(mt) ((mt) << 20)
#define VM_ARM_MTYPE_MASK (0x1f << 20)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4c2d045..c2fa21d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -807,7 +807,7 @@ static void __init pmd_empty_section_gap(unsigned long addr)
vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
vm->addr = (void *)addr;
vm->size = SECTION_SIZE;
- vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+ vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
vm->caller = pmd_empty_section_gap;
vm_area_add_early(vm);
}
@@ -820,7 +820,7 @@ static void __init fill_pmd_gaps(void)
/* we're still single threaded hence no lock needed here */
for (vm = vmlist; vm; vm = vm->next) {
- if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING)))
continue;
addr = (unsigned long)vm->addr;
if (addr < next)
@@ -961,8 +961,8 @@ void __init sanity_check_meminfo(void)
* Check whether this memory bank would partially overlap
* the vmalloc area.
*/
- if (__va(bank->start + bank->size) > vmalloc_min ||
- __va(bank->start + bank->size) < __va(bank->start)) {
+ if (__va(bank->start + bank->size - 1) >= vmalloc_min ||
+ __va(bank->start + bank->size - 1) <= __va(bank->start)) {
unsigned long newsize = vmalloc_min - __va(bank->start);
printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
"to -%.8llx (vmalloc region overlap).\n",
diff --git a/arch/arm/plat-mxc/include/mach/mx25.h b/arch/arm/plat-mxc/include/mach/mx25.h
index 627d94f..ec46640 100644
--- a/arch/arm/plat-mxc/include/mach/mx25.h
+++ b/arch/arm/plat-mxc/include/mach/mx25.h
@@ -98,6 +98,7 @@
#define MX25_INT_UART1 (NR_IRQS_LEGACY + 45)
#define MX25_INT_GPIO2 (NR_IRQS_LEGACY + 51)
#define MX25_INT_GPIO1 (NR_IRQS_LEGACY + 52)
+#define MX25_INT_GPT1 (NR_IRQS_LEGACY + 54)
#define MX25_INT_FEC (NR_IRQS_LEGACY + 57)
#define MX25_DMA_REQ_SSI2_RX1 22
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index 766181c..024f3b0 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -68,6 +68,7 @@
static unsigned long omap_sram_start;
static void __iomem *omap_sram_base;
+static unsigned long omap_sram_skip;
static unsigned long omap_sram_size;
static void __iomem *omap_sram_ceil;
@@ -106,6 +107,7 @@ static int is_sram_locked(void)
*/
static void __init omap_detect_sram(void)
{
+ omap_sram_skip = SRAM_BOOTLOADER_SZ;
if (cpu_class_is_omap2()) {
if (is_sram_locked()) {
if (cpu_is_omap34xx()) {
@@ -113,6 +115,7 @@ static void __init omap_detect_sram(void)
if ((omap_type() == OMAP2_DEVICE_TYPE_EMU) ||
(omap_type() == OMAP2_DEVICE_TYPE_SEC)) {
omap_sram_size = 0x7000; /* 28K */
+ omap_sram_skip += SZ_16K;
} else {
omap_sram_size = 0x8000; /* 32K */
}
@@ -175,8 +178,10 @@ static void __init omap_map_sram(void)
return;
#ifdef CONFIG_OMAP4_ERRATA_I688
+ if (cpu_is_omap44xx()) {
omap_sram_start += PAGE_SIZE;
omap_sram_size -= SZ_16K;
+ }
#endif
if (cpu_is_omap34xx()) {
/*
@@ -203,8 +208,8 @@ static void __init omap_map_sram(void)
* Looks like we need to preserve some bootloader code at the
* beginning of SRAM for jumping to flash for reboot to work...
*/
- memset_io(omap_sram_base + SRAM_BOOTLOADER_SZ, 0,
- omap_sram_size - SRAM_BOOTLOADER_SZ);
+ memset_io(omap_sram_base + omap_sram_skip, 0,
+ omap_sram_size - omap_sram_skip);
}
/*
@@ -218,7 +223,7 @@ void *omap_sram_push_address(unsigned long size)
{
unsigned long available, new_ceil = (unsigned long)omap_sram_ceil;
- available = omap_sram_ceil - (omap_sram_base + SRAM_BOOTLOADER_SZ);
+ available = omap_sram_ceil - (omap_sram_base + omap_sram_skip);
if (size > available) {
pr_err("Not enough space in SRAM\n");
diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c
index 65c5eca..d1116e2 100644
--- a/arch/arm/plat-samsung/clock.c
+++ b/arch/arm/plat-samsung/clock.c
@@ -144,6 +144,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate)
int clk_set_rate(struct clk *clk, unsigned long rate)
{
+ unsigned long flags;
int ret;
if (IS_ERR(clk))
@@ -159,9 +160,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
if (clk->ops == NULL || clk->ops->set_rate == NULL)
return -EINVAL;
- spin_lock(&clocks_lock);
+ spin_lock_irqsave(&clocks_lock, flags);
ret = (clk->ops->set_rate)(clk, rate);
- spin_unlock(&clocks_lock);
+ spin_unlock_irqrestore(&clocks_lock, flags);
return ret;
}
@@ -173,17 +174,18 @@ struct clk *clk_get_parent(struct clk *clk)
int clk_set_parent(struct clk *clk, struct clk *parent)
{
+ unsigned long flags;
int ret = 0;
if (IS_ERR(clk))
return -EINVAL;
- spin_lock(&clocks_lock);
+ spin_lock_irqsave(&clocks_lock, flags);
if (clk->ops && clk->ops->set_parent)
ret = (clk->ops->set_parent)(clk, parent);
- spin_unlock(&clocks_lock);
+ spin_unlock_irqrestore(&clocks_lock, flags);
return ret;
}
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index f348619..c7092e6 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -38,6 +38,7 @@ config BLACKFIN
select GENERIC_ATOMIC64
select GENERIC_IRQ_PROBE
select IRQ_PER_CPU if SMP
+ select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
select GENERIC_SMP_IDLE_THREAD
select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile
index d3d7e64..66cf000 100644
--- a/arch/blackfin/Makefile
+++ b/arch/blackfin/Makefile
@@ -20,7 +20,6 @@ endif
KBUILD_AFLAGS += $(call cc-option,-mno-fdpic)
KBUILD_CFLAGS_MODULE += -mlong-calls
LDFLAGS += -m elf32bfin
-KALLSYMS += --symbol-prefix=_
KBUILD_DEFCONFIG := BF537-STAMP_defconfig
diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h
index dc3d144..9631598 100644
--- a/arch/blackfin/include/asm/smp.h
+++ b/arch/blackfin/include/asm/smp.h
@@ -18,6 +18,8 @@
#define raw_smp_processor_id() blackfin_core_id()
extern void bfin_relocate_coreb_l1_mem(void);
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#if defined(CONFIG_SMP) && defined(CONFIG_ICACHE_FLUSH_L1)
asmlinkage void blackfin_icache_flush_range_l1(unsigned long *ptr);
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 00bbe67..a401513 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -48,10 +48,13 @@ unsigned long blackfin_iflush_l1_entry[NR_CPUS];
struct blackfin_initial_pda __cpuinitdata initial_pda_coreb;
-#define BFIN_IPI_TIMER 0
-#define BFIN_IPI_RESCHEDULE 1
-#define BFIN_IPI_CALL_FUNC 2
-#define BFIN_IPI_CPU_STOP 3
+enum ipi_message_type {
+ BFIN_IPI_TIMER,
+ BFIN_IPI_RESCHEDULE,
+ BFIN_IPI_CALL_FUNC,
+ BFIN_IPI_CALL_FUNC_SINGLE,
+ BFIN_IPI_CPU_STOP,
+};
struct blackfin_flush_data {
unsigned long start;
@@ -60,35 +63,20 @@ struct blackfin_flush_data {
void *secondary_stack;
-
-struct smp_call_struct {
- void (*func)(void *info);
- void *info;
- int wait;
- cpumask_t *waitmask;
-};
-
static struct blackfin_flush_data smp_flush_data;
static DEFINE_SPINLOCK(stop_lock);
-struct ipi_message {
- unsigned long type;
- struct smp_call_struct call_struct;
-};
-
/* A magic number - stress test shows this is safe for common cases */
#define BFIN_IPI_MSGQ_LEN 5
/* Simple FIFO buffer, overflow leads to panic */
-struct ipi_message_queue {
- spinlock_t lock;
+struct ipi_data {
unsigned long count;
- unsigned long head; /* head of the queue */
- struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN];
+ unsigned long bits;
};
-static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue);
+static DEFINE_PER_CPU(struct ipi_data, bfin_ipi);
static void ipi_cpu_stop(unsigned int cpu)
{
@@ -129,28 +117,6 @@ static void ipi_flush_icache(void *info)
blackfin_icache_flush_range(fdata->start, fdata->end);
}
-static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
-{
- int wait;
- void (*func)(void *info);
- void *info;
- func = msg->call_struct.func;
- info = msg->call_struct.info;
- wait = msg->call_struct.wait;
- func(info);
- if (wait) {
-#ifdef __ARCH_SYNC_CORE_DCACHE
- /*
- * 'wait' usually means synchronization between CPUs.
- * Invalidate D cache in case shared data was changed
- * by func() to ensure cache coherence.
- */
- resync_core_dcache();
-#endif
- cpumask_clear_cpu(cpu, msg->call_struct.waitmask);
- }
-}
-
/* Use IRQ_SUPPLE_0 to request reschedule.
* When returning from interrupt to user space,
* there is chance to reschedule */
@@ -172,152 +138,95 @@ void ipi_timer(void)
static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
{
- struct ipi_message *msg;
- struct ipi_message_queue *msg_queue;
+ struct ipi_data *bfin_ipi_data;
unsigned int cpu = smp_processor_id();
- unsigned long flags;
+ unsigned long pending;
+ unsigned long msg;
platform_clear_ipi(cpu, IRQ_SUPPLE_1);
- msg_queue = &__get_cpu_var(ipi_msg_queue);
-
- spin_lock_irqsave(&msg_queue->lock, flags);
-
- while (msg_queue->count) {
- msg = &msg_queue->ipi_message[msg_queue->head];
- switch (msg->type) {
- case BFIN_IPI_TIMER:
- ipi_timer();
- break;
- case BFIN_IPI_RESCHEDULE:
- scheduler_ipi();
- break;
- case BFIN_IPI_CALL_FUNC:
- ipi_call_function(cpu, msg);
- break;
- case BFIN_IPI_CPU_STOP:
- ipi_cpu_stop(cpu);
- break;
- default:
- printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
- cpu, msg->type);
- break;
- }
- msg_queue->head++;
- msg_queue->head %= BFIN_IPI_MSGQ_LEN;
- msg_queue->count--;
+ bfin_ipi_data = &__get_cpu_var(bfin_ipi);
+
+ while ((pending = xchg(&bfin_ipi_data->bits, 0)) != 0) {
+ msg = 0;
+ do {
+ msg = find_next_bit(&pending, BITS_PER_LONG, msg + 1);
+ switch (msg) {
+ case BFIN_IPI_TIMER:
+ ipi_timer();
+ break;
+ case BFIN_IPI_RESCHEDULE:
+ scheduler_ipi();
+ break;
+ case BFIN_IPI_CALL_FUNC:
+ generic_smp_call_function_interrupt();
+ break;
+
+ case BFIN_IPI_CALL_FUNC_SINGLE:
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ case BFIN_IPI_CPU_STOP:
+ ipi_cpu_stop(cpu);
+ break;
+ }
+ } while (msg < BITS_PER_LONG);
+
+ smp_mb();
}
- spin_unlock_irqrestore(&msg_queue->lock, flags);
return IRQ_HANDLED;
}
-static void ipi_queue_init(void)
+static void bfin_ipi_init(void)
{
unsigned int cpu;
- struct ipi_message_queue *msg_queue;
+ struct ipi_data *bfin_ipi_data;
for_each_possible_cpu(cpu) {
- msg_queue = &per_cpu(ipi_msg_queue, cpu);
- spin_lock_init(&msg_queue->lock);
- msg_queue->count = 0;
- msg_queue->head = 0;
+ bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
+ bfin_ipi_data->bits = 0;
+ bfin_ipi_data->count = 0;
}
}
-static inline void smp_send_message(cpumask_t callmap, unsigned long type,
- void (*func) (void *info), void *info, int wait)
+void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
{
unsigned int cpu;
- struct ipi_message_queue *msg_queue;
- struct ipi_message *msg;
- unsigned long flags, next_msg;
- cpumask_t waitmask; /* waitmask is shared by all cpus */
-
- cpumask_copy(&waitmask, &callmap);
- for_each_cpu(cpu, &callmap) {
- msg_queue = &per_cpu(ipi_msg_queue, cpu);
- spin_lock_irqsave(&msg_queue->lock, flags);
- if (msg_queue->count < BFIN_IPI_MSGQ_LEN) {
- next_msg = (msg_queue->head + msg_queue->count)
- % BFIN_IPI_MSGQ_LEN;
- msg = &msg_queue->ipi_message[next_msg];
- msg->type = type;
- if (type == BFIN_IPI_CALL_FUNC) {
- msg->call_struct.func = func;
- msg->call_struct.info = info;
- msg->call_struct.wait = wait;
- msg->call_struct.waitmask = &waitmask;
- }
- msg_queue->count++;
- } else
- panic("IPI message queue overflow\n");
- spin_unlock_irqrestore(&msg_queue->lock, flags);
+ struct ipi_data *bfin_ipi_data;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ for_each_cpu(cpu, cpumask) {
+ bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
+ smp_mb();
+ set_bit(msg, &bfin_ipi_data->bits);
+ bfin_ipi_data->count++;
platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
}
- if (wait) {
- while (!cpumask_empty(&waitmask))
- blackfin_dcache_invalidate_range(
- (unsigned long)(&waitmask),
- (unsigned long)(&waitmask));
-#ifdef __ARCH_SYNC_CORE_DCACHE
- /*
- * Invalidate D cache in case shared data was changed by
- * other processors to ensure cache coherence.
- */
- resync_core_dcache();
-#endif
- }
+ local_irq_restore(flags);
}
-int smp_call_function(void (*func)(void *info), void *info, int wait)
+void arch_send_call_function_single_ipi(int cpu)
{
- cpumask_t callmap;
-
- preempt_disable();
- cpumask_copy(&callmap, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &callmap);
- if (!cpumask_empty(&callmap))
- smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
-
- preempt_enable();
-
- return 0;
+ send_ipi(cpumask_of(cpu), BFIN_IPI_CALL_FUNC_SINGLE);
}
-EXPORT_SYMBOL_GPL(smp_call_function);
-int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
- int wait)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
- unsigned int cpu = cpuid;
- cpumask_t callmap;
-
- if (cpu_is_offline(cpu))
- return 0;
- cpumask_clear(&callmap);
- cpumask_set_cpu(cpu, &callmap);
-
- smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
-
- return 0;
+ send_ipi(mask, BFIN_IPI_CALL_FUNC);
}
-EXPORT_SYMBOL_GPL(smp_call_function_single);
void smp_send_reschedule(int cpu)
{
- cpumask_t callmap;
- /* simply trigger an ipi */
-
- cpumask_clear(&callmap);
- cpumask_set_cpu(cpu, &callmap);
-
- smp_send_message(callmap, BFIN_IPI_RESCHEDULE, NULL, NULL, 0);
+ send_ipi(cpumask_of(cpu), BFIN_IPI_RESCHEDULE);
return;
}
void smp_send_msg(const struct cpumask *mask, unsigned long type)
{
- smp_send_message(*mask, type, NULL, NULL, 0);
+ send_ipi(mask, type);
}
void smp_timer_broadcast(const struct cpumask *mask)
@@ -333,7 +242,7 @@ void smp_send_stop(void)
cpumask_copy(&callmap, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &callmap);
if (!cpumask_empty(&callmap))
- smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0);
+ send_ipi(&callmap, BFIN_IPI_CPU_STOP);
preempt_enable();
@@ -436,7 +345,7 @@ void __init smp_prepare_boot_cpu(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
platform_prepare_cpus(max_cpus);
- ipi_queue_init();
+ bfin_ipi_init();
platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
}
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 3af601e..f08e891 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
generic-y += atomic.h
generic-y += auxvec.h
+generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += cputime.h
diff --git a/arch/c6x/include/asm/barrier.h b/arch/c6x/include/asm/barrier.h
deleted file mode 100644
index 538240e..0000000
--- a/arch/c6x/include/asm/barrier.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Port on Texas Instruments TMS320C6x architecture
- *
- * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
- * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef _ASM_C6X_BARRIER_H
-#define _ASM_C6X_BARRIER_H
-
-#define nop() asm("NOP\n");
-
-#define mb() barrier()
-#define rmb() barrier()
-#define wmb() barrier()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
-
-#endif /* _ASM_C6X_BARRIER_H */
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 66fd017..7f65be6 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/elfcore.h>
#include <linux/mqueue.h>
#include <linux/reboot.h>
+#include <linux/rcupdate.h>
//#define DEBUG
@@ -74,6 +75,7 @@ void cpu_idle (void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
/*
@@ -86,6 +88,7 @@ void cpu_idle (void)
idle = default_idle;
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index ff95f50..2eb7fa5 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
+#include <linux/rcupdate.h>
#include <asm/asm-offsets.h>
#include <asm/uaccess.h>
@@ -69,12 +70,14 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
check_pgt_cache();
if (!frv_dma_inprogress && idle)
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0e9c315..f153ed1 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -36,6 +36,7 @@
#include <linux/reboot.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
@@ -78,8 +79,10 @@ void (*idle)(void) = default_idle;
void cpu_idle(void)
{
while (1) {
+ rcu_idle_enter();
while (!need_resched())
idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf57..3c720ef 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -25,6 +25,7 @@ config IA64
select HAVE_GENERIC_HARDIRQS
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
@@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"
-config VIRT_CPU_ACCOUNTING
- bool "Deterministic task and CPU time accounting"
- default n
- help
- Select this option to enable more accurate task and CPU time
- accounting. This is done by reading a CPU counter on each
- kernel entry and exit and on transitions within the kernel
- between system, softirq and hardirq state, so there is a
- small performance impact.
- If in doubt, say N here.
-
config SMP
bool "Symmetric multi-processing support"
select USE_GENERIC_SMP_HELPERS
diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h
index cb2412f..d38c7ea 100644
--- a/arch/ia64/include/asm/switch_to.h
+++ b/arch/ia64/include/asm/switch_to.h
@@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task);
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
-# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
-#else
-# define IA64_ACCOUNT_ON_SWITCH(p,n)
-#endif
-
#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(unsigned long, pfm_syst_info);
# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
@@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
|| PERFMON_IS_SYSWIDE())
#define __switch_to(prev,next,last) do { \
- IA64_ACCOUNT_ON_SWITCH(prev, next); \
if (IA64_HAS_EXTRA_STATE(prev)) \
ia64_save_extra(prev); \
if (IA64_HAS_EXTRA_STATE(next)) \
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index dd6fc14..3e316ec 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -29,6 +29,7 @@
#include <linux/kdebug.h>
#include <linux/utsname.h>
#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
#include <asm/cpu.h>
#include <asm/delay.h>
@@ -279,6 +280,7 @@ cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
if (can_do_pal_halt) {
current_thread_info()->status &= ~TS_POLLING;
/*
@@ -309,6 +311,7 @@ cpu_idle (void)
normal_xtp();
#endif
}
+ rcu_idle_exit();
schedule_preempt_disabled();
check_pgt_cache();
if (cpu_is_offline(cpu))
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ecc904b..80ff9ac 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -83,32 +83,36 @@ static struct clocksource *itc_clocksource;
extern cputime_t cycle_to_cputime(u64 cyc);
+static void vtime_account_user(struct task_struct *tsk)
+{
+ cputime_t delta_utime;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ if (ti->ac_utime) {
+ delta_utime = cycle_to_cputime(ti->ac_utime);
+ account_user_time(tsk, delta_utime, delta_utime);
+ ti->ac_utime = 0;
+ }
+}
+
/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
-void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+void vtime_task_switch(struct task_struct *prev)
{
struct thread_info *pi = task_thread_info(prev);
- struct thread_info *ni = task_thread_info(next);
- cputime_t delta_stime, delta_utime;
- __u64 now;
+ struct thread_info *ni = task_thread_info(current);
- now = ia64_get_itc();
-
- delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
if (idle_task(smp_processor_id()) != prev)
- account_system_time(prev, 0, delta_stime, delta_stime);
+ vtime_account_system(prev);
else
- account_idle_time(delta_stime);
+ vtime_account_idle(prev);
- if (pi->ac_utime) {
- delta_utime = cycle_to_cputime(pi->ac_utime);
- account_user_time(prev, delta_utime, delta_utime);
- }
+ vtime_account_user(prev);
- pi->ac_stamp = ni->ac_stamp = now;
+ pi->ac_stamp = ni->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}
@@ -116,29 +120,32 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
-void account_system_vtime(struct task_struct *tsk)
+static cputime_t vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
- unsigned long flags;
cputime_t delta_stime;
__u64 now;
- local_irq_save(flags);
-
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
- if (irq_count() || idle_task(smp_processor_id()) != tsk)
- account_system_time(tsk, 0, delta_stime, delta_stime);
- else
- account_idle_time(delta_stime);
ti->ac_stime = 0;
-
ti->ac_stamp = now;
- local_irq_restore(flags);
+ return delta_stime;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ cputime_t delta = vtime_delta(tsk);
+
+ account_system_time(tsk, 0, delta, delta);
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ account_idle_time(vtime_delta(tsk));
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
/*
* Called from the timer interrupt handler to charge accumulated user time
@@ -146,14 +153,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
- struct thread_info *ti = task_thread_info(p);
- cputime_t delta_utime;
-
- if (ti->ac_utime) {
- delta_utime = cycle_to_cputime(ti->ac_utime);
- account_user_time(p, delta_utime, delta_utime);
- ti->ac_utime = 0;
- }
+ vtime_account_user(p);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 3a4a32b..384e63f 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -26,6 +26,7 @@
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
#include <asm/io.h>
#include <asm/uaccess.h>
@@ -82,6 +83,7 @@ void cpu_idle (void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void) = pm_idle;
@@ -90,6 +92,7 @@ void cpu_idle (void)
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index c488e3c..ac2892e 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
@@ -75,8 +76,10 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c
index 75f9ee9..9cd13b4 100644
--- a/arch/m68k/platform/coldfire/clk.c
+++ b/arch/m68k/platform/coldfire/clk.c
@@ -146,9 +146,3 @@ struct clk_ops clk_ops1 = {
};
#endif /* MCFPM_PPMCR1 */
#endif /* MCFPM_PPMCR0 */
-
-struct clk *devm_clk_get(struct device *dev, const char *id)
-{
- return NULL;
-}
-EXPORT_SYMBOL(devm_clk_get);
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index e7e03ec..afc379c 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -102,7 +102,7 @@ static void cmp_init_secondary(void)
c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE;
#endif
#ifdef CONFIG_MIPS_MT_SMTC
- c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC;
+ c->tc_id = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT;
#endif
}
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 33aadbc..dcfd573 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -152,6 +152,8 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
+ if (PageTail(page))
+ get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 7b13a4c..fea823f 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -273,16 +273,19 @@ asmlinkage void plat_irq_dispatch(void)
unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM;
int irq;
+ if (unlikely(!pending)) {
+ spurious_interrupt();
+ return;
+ }
+
irq = irq_ffs(pending);
if (irq == MIPSCPU_INT_I8259A)
malta_hw0_irqdispatch();
else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()]))
malta_ipi_irqdispatch();
- else if (irq >= 0)
- do_IRQ(MIPS_CPU_IRQ_BASE + irq);
else
- spurious_interrupt();
+ do_IRQ(MIPS_CPU_IRQ_BASE + irq);
}
#ifdef CONFIG_MIPS_MT_SMP
diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c
index 4c35301..80562b8 100644
--- a/arch/mips/mti-malta/malta-platform.c
+++ b/arch/mips/mti-malta/malta-platform.c
@@ -138,11 +138,6 @@ static int __init malta_add_devices(void)
if (err)
return err;
- /*
- * Set RTC to BCD mode to support current alarm code.
- */
- CMOS_WRITE(CMOS_READ(RTC_CONTROL) & ~RTC_DM_BINARY, RTC_CONTROL);
-
return 0;
}
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 7dab0cd..e9cceba 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -107,6 +108,7 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
for (;;) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
@@ -121,6 +123,7 @@ void cpu_idle(void)
}
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 2c05a929..8c6b6b6 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -48,6 +48,7 @@
#include <linux/unistd.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
+#include <linux/rcupdate.h>
#include <asm/io.h>
#include <asm/asm-offsets.h>
@@ -69,8 +70,10 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
barrier();
+ rcu_idle_exit();
schedule_preempt_disabled();
check_pgt_cache();
}
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 1c1aadc..c32ae5c 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -1,10 +1,6 @@
addnote
empty.c
hack-coff
-infblock.c
-infblock.h
-infcodes.c
-infcodes.h
inffast.c
inffast.h
inffixed.h
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 53b6dfa..54b73a2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -386,6 +386,7 @@ extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
+extern void power7_nap(void);
#ifdef CONFIG_PSERIES_IDLE
extern void update_smt_snooze_delay(int snooze);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 3b4b4a8..c1f2676 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -197,12 +197,6 @@ struct cpu_usage {
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
-#define account_process_vtime(tsk) account_process_tick(tsk, 0)
-#else
-#define account_process_vtime(tsk) do { } while (0)
-#endif
-
extern void secondary_cpu_time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 85b05c4..e899572 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
+ DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit));
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5b25c80..a892680 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -28,6 +28,8 @@ void doorbell_setup_this_cpu(void)
void doorbell_cause_ipi(int cpu, unsigned long data)
{
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ mb();
ppc_msgsnd(PPC_DBELL, 0, data);
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4b01a25..b40e0b4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -370,6 +370,12 @@ _GLOBAL(ret_from_fork)
li r3,0
b syscall_exit
+ .section ".toc","aw"
+DSCR_DEFAULT:
+ .tc dscr_default[TC],dscr_default
+
+ .section ".text"
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
@@ -509,9 +515,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
mr r1,r8 /* start using new stack pointer */
std r7,PACAKSAVE(r13)
- ld r6,_CCR(r1)
- mtcrf 0xFF,r6
-
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
ld r0,THREAD_VRSAVE(r4)
@@ -520,14 +523,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_PPC64
BEGIN_FTR_SECTION
+ lwz r6,THREAD_DSCR_INHERIT(r4)
+ ld r7,DSCR_DEFAULT@toc(2)
ld r0,THREAD_DSCR(r4)
- cmpd r0,r25
- beq 1f
+ cmpwi r6,0
+ bne 1f
+ ld r0,0(r7)
+1: cmpd r0,r25
+ beq 2f
mtspr SPRN_DSCR,r0
-1:
+2:
END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
#endif
+ ld r6,_CCR(r1)
+ mtcrf 0xFF,r6
+
/* r3-r13 are destroyed -- Cort */
REST_8GPRS(14, r1)
REST_10GPRS(22, r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e894515..39aa97d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -186,7 +186,7 @@ hardware_interrupt_hv:
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
- MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer)
+ STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
@@ -486,6 +486,7 @@ machine_check_common:
STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
+ STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 7140d83..e11863f 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -28,7 +28,9 @@ _GLOBAL(power7_idle)
lwz r4,ADDROFF(powersave_nap)(r3)
cmpwi 0,r4,0
beqlr
+ /* fall through */
+_GLOBAL(power7_nap)
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
* stick a pointer to it in PACAR1. We really only
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 710f400..e9cb51f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
local_irq_save(flags);
- account_system_vtime(current);
- account_process_vtime(current);
-
/*
* We can't take a PMU exception inside _switch() since there is a
* window where the kernel stack SLB and the kernel stack are out
@@ -802,16 +799,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_DSCR)) {
- if (current->thread.dscr_inherit) {
- p->thread.dscr_inherit = 1;
- p->thread.dscr = current->thread.dscr;
- } else if (0 != dscr_default) {
- p->thread.dscr_inherit = 1;
- p->thread.dscr = dscr_default;
- } else {
- p->thread.dscr_inherit = 0;
- p->thread.dscr = 0;
- }
+ p->thread.dscr_inherit = current->thread.dscr_inherit;
+ p->thread.dscr = current->thread.dscr;
}
#endif
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0321007..8d4214a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -198,8 +198,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
+ /*
+ * Order previous accesses before accesses in the IPI handler.
+ */
+ smp_mb();
message[msg] = 1;
- mb();
+ /*
+ * cause_ipi functions are required to include a full barrier
+ * before doing whatever causes the IPI.
+ */
smp_ops->cause_ipi(cpu, info->data);
}
@@ -211,7 +218,7 @@ irqreturn_t smp_ipi_demux(void)
mb(); /* order any irq clear */
do {
- all = xchg_local(&info->messages, 0);
+ all = xchg(&info->messages, 0);
#ifdef __BIG_ENDIAN
if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 3529446..8302af6 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -194,6 +194,14 @@ static ssize_t show_dscr_default(struct device *dev,
return sprintf(buf, "%lx\n", dscr_default);
}
+static void update_dscr(void *dummy)
+{
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = dscr_default;
+ mtspr(SPRN_DSCR, dscr_default);
+ }
+}
+
static ssize_t __used store_dscr_default(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
@@ -206,6 +214,8 @@ static ssize_t __used store_dscr_default(struct device *dev,
return -EINVAL;
dscr_default = val;
+ on_each_cpu(update_dscr, NULL, 1);
+
return count;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index be171ee..eaa9d0e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -291,13 +291,12 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-void account_system_vtime(struct task_struct *tsk)
+static u64 vtime_delta(struct task_struct *tsk,
+ u64 *sys_scaled, u64 *stolen)
{
- u64 now, nowscaled, delta, deltascaled;
- unsigned long flags;
- u64 stolen, udelta, sys_scaled, user_scaled;
+ u64 now, nowscaled, deltascaled;
+ u64 udelta, delta, user_scaled;
- local_irq_save(flags);
now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
@@ -305,7 +304,7 @@ void account_system_vtime(struct task_struct *tsk)
deltascaled = nowscaled - get_paca()->startspurr;
get_paca()->startspurr = nowscaled;
- stolen = calculate_stolen_time(now);
+ *stolen = calculate_stolen_time(now);
delta = get_paca()->system_time;
get_paca()->system_time = 0;
@@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk)
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- sys_scaled = delta;
+ *sys_scaled = delta;
user_scaled = udelta;
if (deltascaled != delta + udelta) {
if (udelta) {
- sys_scaled = deltascaled * delta / (delta + udelta);
- user_scaled = deltascaled - sys_scaled;
+ *sys_scaled = deltascaled * delta / (delta + udelta);
+ user_scaled = deltascaled - *sys_scaled;
} else {
- sys_scaled = deltascaled;
+ *sys_scaled = deltascaled;
}
}
get_paca()->user_time_scaled += user_scaled;
- if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
- account_system_time(tsk, 0, delta, sys_scaled);
- if (stolen)
- account_steal_time(stolen);
- } else {
- account_idle_time(delta + stolen);
- }
- local_irq_restore(flags);
+ return delta;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_system_time(tsk, 0, delta, sys_scaled);
+ if (stolen)
+ account_steal_time(stolen);
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_idle_time(delta + stolen);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
/*
* Transfer the user and system times accumulated in the paca
* by the exception entry and exit code to the generic process
* user and system time records.
* Must be called with interrupts disabled.
- * Assumes that account_system_vtime() has been called recently
+ * Assumes that vtime_account() has been called recently
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
@@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}
+void vtime_task_switch(struct task_struct *prev)
+{
+ vtime_account(prev);
+ account_process_tick(prev, 0);
+}
+
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif
@@ -535,6 +550,15 @@ void timer_interrupt(struct pt_regs * regs)
trace_timer_interrupt_exit(regs);
}
+/*
+ * Hypervisor decrementer interrupts shouldn't occur but are sometimes
+ * left pending on exit from a KVM guest. We don't need to do anything
+ * to clear them, as they are edge-triggered.
+ */
+void hdec_interrupt(struct pt_regs *regs)
+{
+}
+
#ifdef CONFIG_SUSPEND
static void generic_suspend_disable_irqs(void)
{
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1589723..ae0843f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -972,8 +972,9 @@ static int emulate_instruction(struct pt_regs *regs)
cpu_has_feature(CPU_FTR_DSCR)) {
PPC_WARN_EMULATED(mtdscr, regs);
rd = (instword >> 21) & 0x1f;
- mtspr(SPRN_DSCR, regs->gpr[rd]);
+ current->thread.dscr = regs->gpr[rd];
current->thread.dscr_inherit = 1;
+ mtspr(SPRN_DSCR, current->thread.dscr);
return 0;
}
#endif
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index dd223b3..17e5b23 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -20,7 +20,7 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
- err = __put_user(instr, addr);
+ __put_user_size(instr, addr, 4, err);
if (err)
return err;
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 39b1597..59213cf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1436,11 +1436,11 @@ static long vphn_get_associativity(unsigned long cpu,
/*
* Update the node maps and sysfs entries for each cpu whose home node
- * has changed.
+ * has changed. Returns 1 when the topology has changed, and 0 otherwise.
*/
int arch_update_cpu_topology(void)
{
- int cpu, nid, old_nid;
+ int cpu, nid, old_nid, changed = 0;
unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
struct device *dev;
@@ -1466,9 +1466,10 @@ int arch_update_cpu_topology(void)
dev = get_cpu_device(cpu);
if (dev)
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ changed = 1;
}
- return 1;
+ return changed;
}
static void topology_work_fn(struct work_struct *work)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 30fd01d..72afd28 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
+ select HAVE_VIRT_CPU_ACCOUNTING
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
@@ -337,21 +338,6 @@ config PPC_MM_SLICES
default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default n
-config VIRT_CPU_ACCOUNTING
- bool "Deterministic task and CPU time accounting"
- depends on PPC64
- default y
- help
- Select this option to enable more accurate task and CPU time
- accounting. This is done by reading a CPU counter on each
- kernel entry and exit and on transitions within the kernel
- between system, softirq and hardirq state, so there is a
- small performance impact. This also enables accounting of
- stolen time on logically-partitioned systems running on
- IBM POWER5-based machines.
-
- If in doubt, say Y here.
-
config PPC_HAVE_PMU_SUPPORT
bool
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 3ef4625..7698b6e 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -106,14 +106,6 @@ static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
- /* If powersave_nap is enabled, use NAP mode, else just
- * spin aimlessly
- */
- if (!powersave_nap) {
- generic_mach_cpu_die();
- return;
- }
-
/* Standard hot unplug procedure */
local_irq_disable();
idle_task_exit();
@@ -128,7 +120,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
- power7_idle();
+ power7_nap();
if (!generic_check_cpu_restart(cpu)) {
DBG("CPU%d Unexpected exit while offline !\n", cpu);
/* We may be getting an IPI, so we re-enable
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 14469cf..df0fc58 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -65,7 +65,11 @@ static inline void icp_hv_set_xirr(unsigned int value)
static inline void icp_hv_set_qirr(int n_cpu , u8 value)
{
int hw_cpu = get_hard_smp_processor_id(n_cpu);
- long rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
+ long rc;
+
+ /* Make sure all previous accesses are ordered before IPI sending */
+ mb();
+ rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
if (rc != H_SUCCESS) {
pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 9858476..cc45d25 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -5,3 +5,4 @@ obj-$(CONFIG_CRYPTO_HW) += crypto/
obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-$(CONFIG_MATHEMU) += math-emu/
+obj-y += net/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e..f9acddd 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -49,10 +49,13 @@ config GENERIC_LOCKBREAK
config PGSTE
def_bool y if KVM
-config VIRT_CPU_ACCOUNTING
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+config KEXEC
+ def_bool y
+
+config AUDIT_ARCH
def_bool y
config S390
@@ -84,11 +87,15 @@ config S390
select HAVE_KERNEL_XZ
select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+ select HAVE_BPF_JIT if 64BIT && PACK_STACK
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_CMPXCHG_LOCAL
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select BUILDTIME_EXTABLE_SORT
select ARCH_INLINE_SPIN_TRYLOCK
@@ -133,9 +140,79 @@ source "init/Kconfig"
source "kernel/Kconfig.freezer"
-menu "Base setup"
+menu "Processor type and features"
+
+config HAVE_MARCH_Z900_FEATURES
+ def_bool n
+
+config HAVE_MARCH_Z990_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z900_FEATURES
+
+config HAVE_MARCH_Z9_109_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z990_FEATURES
+
+config HAVE_MARCH_Z10_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z9_109_FEATURES
+
+config HAVE_MARCH_Z196_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z10_FEATURES
+
+choice
+ prompt "Processor type"
+ default MARCH_G5
+
+config MARCH_G5
+ bool "System/390 model G5 and G6"
+ depends on !64BIT
+ help
+ Select this to build a 31 bit kernel that works
+ on all ESA/390 and z/Architecture machines.
-comment "Processor type and features"
+config MARCH_Z900
+ bool "IBM zSeries model z800 and z900"
+ select HAVE_MARCH_Z900_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z800/z900 (2064 and
+ 2066 series). This will enable some optimizations that are not
+ available on older ESA/390 (31 Bit) only CPUs.
+
+config MARCH_Z990
+ bool "IBM zSeries model z890 and z990"
+ select HAVE_MARCH_Z990_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z890/z990 (2084 and
+ 2086 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z9_109
+ bool "IBM System z9"
+ select HAVE_MARCH_Z9_109_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z9 (2094 and
+ 2096 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z10
+ bool "IBM System z10"
+ select HAVE_MARCH_Z10_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z10 (2097 and
+ 2098 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z196
+ bool "IBM zEnterprise 114 and 196"
+ select HAVE_MARCH_Z196_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM zEnterprise 114 and 196
+ (2818 and 2817 series). The kernel will be slightly faster but will
+ not work on older machines.
+
+endchoice
config 64BIT
def_bool y
@@ -147,6 +224,24 @@ config 64BIT
config 32BIT
def_bool y if !64BIT
+config COMPAT
+ def_bool y
+ prompt "Kernel support for 31 bit emulation"
+ depends on 64BIT
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
+ select ARCH_WANT_OLD_COMPAT_IPC
+ help
+ Select this option if you want to enable your system kernel to
+ handle system-calls from ELF binaries for 31 bit ESA. This option
+ (and some other stuff like libraries and such) is needed for
+ executing 31 bit applications. It is safe to say "Y".
+
+config SYSVIPC_COMPAT
+ def_bool y if COMPAT && SYSVIPC
+
+config KEYS_COMPAT
+ def_bool y if COMPAT && KEYS
+
config SMP
def_bool y
prompt "Symmetric multi-processing support"
@@ -202,6 +297,8 @@ config SCHED_BOOK
Book scheduler support improves the CPU scheduler's decision making
when dealing with machines that have several books.
+source kernel/Kconfig.preempt
+
config MATHEMU
def_bool y
prompt "IEEE FPU emulation"
@@ -211,100 +308,35 @@ config MATHEMU
on older ESA/390 machines. Say Y unless you know your machine doesn't
need this.
-config COMPAT
- def_bool y
- prompt "Kernel support for 31 bit emulation"
- depends on 64BIT
- select COMPAT_BINFMT_ELF if BINFMT_ELF
- select ARCH_WANT_OLD_COMPAT_IPC
- help
- Select this option if you want to enable your system kernel to
- handle system-calls from ELF binaries for 31 bit ESA. This option
- (and some other stuff like libraries and such) is needed for
- executing 31 bit applications. It is safe to say "Y".
+source kernel/Kconfig.hz
-config SYSVIPC_COMPAT
- def_bool y if COMPAT && SYSVIPC
+endmenu
-config KEYS_COMPAT
- def_bool y if COMPAT && KEYS
+menu "Memory setup"
-config AUDIT_ARCH
+config ARCH_SPARSEMEM_ENABLE
def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_VMEMMAP
+ select SPARSEMEM_STATIC if !64BIT
-config HAVE_MARCH_Z900_FEATURES
- def_bool n
-
-config HAVE_MARCH_Z990_FEATURES
- def_bool n
- select HAVE_MARCH_Z900_FEATURES
-
-config HAVE_MARCH_Z9_109_FEATURES
- def_bool n
- select HAVE_MARCH_Z990_FEATURES
-
-config HAVE_MARCH_Z10_FEATURES
- def_bool n
- select HAVE_MARCH_Z9_109_FEATURES
-
-config HAVE_MARCH_Z196_FEATURES
- def_bool n
- select HAVE_MARCH_Z10_FEATURES
-
-comment "Code generation options"
-
-choice
- prompt "Processor type"
- default MARCH_G5
-
-config MARCH_G5
- bool "System/390 model G5 and G6"
- depends on !64BIT
- help
- Select this to build a 31 bit kernel that works
- on all ESA/390 and z/Architecture machines.
-
-config MARCH_Z900
- bool "IBM zSeries model z800 and z900"
- select HAVE_MARCH_Z900_FEATURES if 64BIT
- help
- Select this to enable optimizations for model z800/z900 (2064 and
- 2066 series). This will enable some optimizations that are not
- available on older ESA/390 (31 Bit) only CPUs.
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
-config MARCH_Z990
- bool "IBM zSeries model z890 and z990"
- select HAVE_MARCH_Z990_FEATURES if 64BIT
- help
- Select this to enable optimizations for model z890/z990 (2084 and
- 2086 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
-config MARCH_Z9_109
- bool "IBM System z9"
- select HAVE_MARCH_Z9_109_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM System z9 (2094 and
- 2096 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y if SPARSEMEM
-config MARCH_Z10
- bool "IBM System z10"
- select HAVE_MARCH_Z10_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM System z10 (2097 and
- 2098 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
-config MARCH_Z196
- bool "IBM zEnterprise 114 and 196"
- select HAVE_MARCH_Z196_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM zEnterprise 114 and 196
- (2818 and 2817 series). The kernel will be slightly faster but will
- not work on older machines.
+config FORCE_MAX_ZONEORDER
+ int
+ default "9"
-endchoice
+source "mm/Kconfig"
config PACK_STACK
def_bool y
@@ -368,34 +400,9 @@ config WARN_DYNAMIC_STACK
Say N if you are unsure.
-comment "Kernel preemption"
-
-source "kernel/Kconfig.preempt"
-
-config ARCH_SPARSEMEM_ENABLE
- def_bool y
- select SPARSEMEM_VMEMMAP_ENABLE
- select SPARSEMEM_VMEMMAP
- select SPARSEMEM_STATIC if !64BIT
-
-config ARCH_SPARSEMEM_DEFAULT
- def_bool y
-
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y if SPARSEMEM
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
-
-config ARCH_HIBERNATION_POSSIBLE
- def_bool y if 64BIT
-
-source "mm/Kconfig"
+endmenu
-comment "I/O subsystem configuration"
+menu "I/O subsystem"
config QDIO
def_tristate y
@@ -426,13 +433,102 @@ config CHSC_SCH
If unsure, say N.
-comment "Misc"
+config SCM_BUS
+ def_bool y
+ depends on 64BIT
+ prompt "SCM bus driver"
+ help
+ Bus driver for Storage Class Memory.
+
+config EADM_SCH
+ def_tristate m
+ prompt "Support for EADM subchannels"
+ depends on SCM_BUS
+ help
+ This driver allows usage of EADM subchannels. EADM subchannels act
+ as a communication vehicle for SCM increments.
+
+ To compile this driver as a module, choose M here: the
+ module will be called eadm_sch.
+
+endmenu
+
+menu "Dump support"
+
+config CRASH_DUMP
+ bool "kernel crash dumps"
+ depends on 64BIT && SMP
+ select KEXEC
+ help
+ Generate crash dump after being started by kexec.
+ Crash dump kernels are loaded in the main kernel with kexec-tools
+ into a specially reserved region and then later executed after
+ a crash by kdump/kexec.
+ For more details see Documentation/kdump/kdump.txt
+
+config ZFCPDUMP
+ def_bool n
+ prompt "zfcpdump support"
+ select SMP
+ help
+ Select this option if you want to build an zfcpdump enabled kernel.
+ Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+
+endmenu
+
+menu "Executable file formats / Emulations"
source "fs/Kconfig.binfmt"
-config FORCE_MAX_ZONEORDER
- int
- default "9"
+config SECCOMP
+ def_bool y
+ prompt "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y.
+
+endmenu
+
+menu "Power Management"
+
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y if 64BIT
+
+source "kernel/power/Kconfig"
+
+endmenu
+
+source "net/Kconfig"
+
+config PCMCIA
+ def_bool n
+
+config CCW
+ def_bool y
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/s390/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+menu "Virtualization"
config PFAULT
def_bool y
@@ -448,8 +544,8 @@ config PFAULT
this option.
config SHARED_KERNEL
- def_bool y
- prompt "VM shared kernel support"
+ bool "VM shared kernel support"
+ depends on !JUMP_LABEL
help
Select this option, if you want to share the text segment of the
Linux kernel between different VM guests. This reduces memory
@@ -544,8 +640,6 @@ config APPLDATA_NET_SUM
This can also be compiled as a module, which will be called
appldata_net_sum.o.
-source kernel/Kconfig.hz
-
config S390_HYPFS_FS
def_bool y
prompt "s390 hypervisor file system support"
@@ -554,90 +648,21 @@ config S390_HYPFS_FS
This is a virtual file system intended to provide accounting
information in an s390 hypervisor environment.
-config KEXEC
- def_bool n
- prompt "kexec system call"
- help
- kexec is a system call that implements the ability to shutdown your
- current kernel, and to start another kernel. It is like a reboot
- but is independent of hardware/microcode support.
-
-config CRASH_DUMP
- bool "kernel crash dumps"
- depends on 64BIT && SMP
- select KEXEC
- help
- Generate crash dump after being started by kexec.
- Crash dump kernels are loaded in the main kernel with kexec-tools
- into a specially reserved region and then later executed after
- a crash by kdump/kexec.
- For more details see Documentation/kdump/kdump.txt
-
-config ZFCPDUMP
- def_bool n
- prompt "zfcpdump support"
- select SMP
- help
- Select this option if you want to build an zfcpdump enabled kernel.
- Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+source "arch/s390/kvm/Kconfig"
config S390_GUEST
def_bool y
- prompt "s390 guest support for KVM (EXPERIMENTAL)"
+ prompt "s390 support for virtio devices (EXPERIMENTAL)"
depends on 64BIT && EXPERIMENTAL
select VIRTUALIZATION
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
help
- Select this option if you want to run the kernel as a guest under
- the KVM hypervisor. This will add detection for KVM as well as a
- virtio transport. If KVM is detected, the virtio console will be
- the default console.
-
-config SECCOMP
- def_bool y
- prompt "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y.
-
-endmenu
+ Enabling this option adds support for virtio based paravirtual device
+ drivers on s390.
-menu "Power Management"
-
-source "kernel/power/Kconfig"
+ Select this option if you want to run the kernel as a guest under
+ the KVM hypervisor.
endmenu
-
-source "net/Kconfig"
-
-config PCMCIA
- def_bool n
-
-config CCW
- def_bool y
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/s390/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 10e22c4..3ad8f61 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -11,6 +11,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
sizes.h head$(BITS).o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 465eca7..c4c6a1c 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -71,34 +71,37 @@ void *memset(void *s, int c, size_t n)
{
char *xs;
- if (c == 0)
- return __builtin_memset(s, 0, n);
-
- xs = (char *) s;
- if (n > 0)
- do {
- *xs++ = c;
- } while (--n > 0);
+ xs = s;
+ while (n--)
+ *xs++ = c;
return s;
}
-void *memcpy(void *__dest, __const void *__src, size_t __n)
+void *memcpy(void *dest, const void *src, size_t n)
{
- return __builtin_memcpy(__dest, __src, __n);
+ const char *s = src;
+ char *d = dest;
+
+ while (n--)
+ *d++ = *s++;
+ return dest;
}
-void *memmove(void *__dest, __const void *__src, size_t __n)
+void *memmove(void *dest, const void *src, size_t n)
{
- char *d;
- const char *s;
-
- if (__dest <= __src)
- return __builtin_memcpy(__dest, __src, __n);
- d = __dest + __n;
- s = __src + __n;
- while (__n--)
- *--d = *--s;
- return __dest;
+ const char *s = src;
+ char *d = dest;
+
+ if (d <= s) {
+ while (n--)
+ *d++ = *s++;
+ } else {
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ return dest;
}
static void error(char *x)
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index f39cd71..b74400e 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -16,8 +16,8 @@ CONFIG_CGROUPS=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
@@ -32,20 +32,19 @@ CONFIG_EXPERT=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
-CONFIG_BINFMT_MISC=m
-CONFIG_CMM=m
-CONFIG_HZ_100=y
CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -75,6 +74,7 @@ CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
+CONFIG_BPF_JIT=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=m
@@ -121,7 +121,6 @@ CONFIG_DEBUG_NOTIFIERS=y
CONFIG_RCU_TRACE=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_BLK_DEV_IO_TRACE=y
@@ -173,3 +172,4 @@ CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRC7=m
+CONFIG_CMM=m
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index f328294..32a7059 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -70,7 +70,7 @@ static inline int appldata_asm(struct appldata_product_id *id,
int ry;
if (!MACHINE_IS_VM)
- return -ENOSYS;
+ return -EOPNOTSUPP;
parm_list.diag = 0xdc;
parm_list.function = fn;
parm_list.parlist_length = sizeof(parm_list);
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index bf115b4..aea451f 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -125,32 +125,4 @@ struct chsc_cpd_info {
#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
-#ifdef __KERNEL__
-
-struct css_general_char {
- u64 : 12;
- u32 dynio : 1; /* bit 12 */
- u32 : 28;
- u32 aif : 1; /* bit 41 */
- u32 : 3;
- u32 mcss : 1; /* bit 45 */
- u32 fcs : 1; /* bit 46 */
- u32 : 1;
- u32 ext_mb : 1; /* bit 48 */
- u32 : 7;
- u32 aif_tdd : 1; /* bit 56 */
- u32 : 1;
- u32 qebsm : 1; /* bit 58 */
- u32 : 8;
- u32 aif_osa : 1; /* bit 67 */
- u32 : 14;
- u32 cib : 1; /* bit 82 */
- u32 : 5;
- u32 fcx : 1; /* bit 88 */
- u32 : 7;
-}__attribute__((packed));
-
-extern struct css_general_char css_general_characteristics;
-
-#endif /* __KERNEL__ */
#endif
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 77043aa..55bde60 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -80,6 +80,18 @@ struct erw {
} __attribute__ ((packed));
/**
+ * struct erw_eadm - EADM Subchannel extended report word
+ * @b: aob error
+ * @r: arsb error
+ */
+struct erw_eadm {
+ __u32 : 16;
+ __u32 b : 1;
+ __u32 r : 1;
+ __u32 : 14;
+} __packed;
+
+/**
* struct sublog - subchannel logout area
* @res0: reserved
* @esf: extended status flags
@@ -170,9 +182,22 @@ struct esw3 {
} __attribute__ ((packed));
/**
+ * struct esw_eadm - EADM Subchannel Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ */
+struct esw_eadm {
+ __u32 sublog;
+ struct erw_eadm erw;
+ __u32 : 32;
+ __u32 : 32;
+ __u32 : 32;
+} __packed;
+
+/**
* struct irb - interruption response block
* @scsw: subchannel status word
- * @esw: extened status word, 4 formats
+ * @esw: extened status word
* @ecw: extended control word
*
* The irb that is handed to the device driver when an interrupt occurs. For
@@ -191,6 +216,7 @@ struct irb {
struct esw1 esw1;
struct esw2 esw2;
struct esw3 esw3;
+ struct esw_eadm eadm;
} esw;
__u8 ecw[32];
} __attribute__ ((packed,aligned(4)));
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 8d798e9..0f636cb 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -7,7 +7,9 @@
#ifndef __ASM_CMPXCHG_H
#define __ASM_CMPXCHG_H
+#include <linux/mmdebug.h>
#include <linux/types.h>
+#include <linux/bug.h>
extern void __xchg_called_with_bad_pointer(void);
@@ -203,6 +205,65 @@ static inline unsigned long long __cmpxchg64(void *ptr,
})
#endif /* CONFIG_64BIT */
+#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
+({ \
+ register __typeof__(*(p1)) __old1 asm("2") = (o1); \
+ register __typeof__(*(p2)) __old2 asm("3") = (o2); \
+ register __typeof__(*(p1)) __new1 asm("4") = (n1); \
+ register __typeof__(*(p2)) __new2 asm("5") = (n2); \
+ int cc; \
+ asm volatile( \
+ insn " %[old],%[new],%[ptr]\n" \
+ " ipm %[cc]\n" \
+ " srl %[cc],28" \
+ : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \
+ : [new] "d" (__new1), "d" (__new2), \
+ [ptr] "Q" (*(p1)), "Q" (*(p2)) \
+ : "memory", "cc"); \
+ !cc; \
+})
+
+#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
+
+#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
+
+extern void __cmpxchg_double_called_with_bad_pointer(void);
+
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ int __ret; \
+ switch (sizeof(*(p1))) { \
+ case 4: \
+ __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \
+ break; \
+ case 8: \
+ __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \
+ break; \
+ default: \
+ __cmpxchg_double_called_with_bad_pointer(); \
+ } \
+ __ret; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ __typeof__(p1) __p1 = (p1); \
+ __typeof__(p2) __p2 = (p2); \
+ int __ret; \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
+ if (sizeof(long) == 4) \
+ __ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2); \
+ else \
+ __ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \
+ __ret; \
+})
+
+#define system_has_cmpxchg_double() 1
+
#include <asm-generic/cmpxchg-local.h>
static inline unsigned long __cmpxchg_local(void *ptr,
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index a3afecd..35f0020 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -21,11 +21,15 @@
#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
+#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */
+#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program
+ buffer full */
#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
CPU_MF_INT_SF_LSDA)
+#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
/* CPU measurement facility support */
static inline int cpum_cf_avail(void)
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 8709bde..023d5ae 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -12,6 +12,9 @@
#include <linux/spinlock.h>
#include <asm/div64.h>
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
new file mode 100644
index 0000000..a06ebc2
--- /dev/null
+++ b/arch/s390/include/asm/css_chars.h
@@ -0,0 +1,39 @@
+#ifndef _ASM_CSS_CHARS_H
+#define _ASM_CSS_CHARS_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+
+struct css_general_char {
+ u64 : 12;
+ u32 dynio : 1; /* bit 12 */
+ u32 : 4;
+ u32 eadm : 1; /* bit 17 */
+ u32 : 23;
+ u32 aif : 1; /* bit 41 */
+ u32 : 3;
+ u32 mcss : 1; /* bit 45 */
+ u32 fcs : 1; /* bit 46 */
+ u32 : 1;
+ u32 ext_mb : 1; /* bit 48 */
+ u32 : 7;
+ u32 aif_tdd : 1; /* bit 56 */
+ u32 : 1;
+ u32 qebsm : 1; /* bit 58 */
+ u32 : 8;
+ u32 aif_osa : 1; /* bit 67 */
+ u32 : 12;
+ u32 eadm_rf : 1; /* bit 80 */
+ u32 : 1;
+ u32 cib : 1; /* bit 82 */
+ u32 : 5;
+ u32 fcx : 1; /* bit 88 */
+ u32 : 19;
+ u32 alt_ssi : 1; /* bit 108 */
+} __packed;
+
+extern struct css_general_char css_general_characteristics;
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
new file mode 100644
index 0000000..8d48471
--- /dev/null
+++ b/arch/s390/include/asm/eadm.h
@@ -0,0 +1,124 @@
+#ifndef _ASM_S390_EADM_H
+#define _ASM_S390_EADM_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct arqb {
+ u64 data;
+ u16 fmt:4;
+ u16:12;
+ u16 cmd_code;
+ u16:16;
+ u16 msb_count;
+ u32 reserved[12];
+} __packed;
+
+#define ARQB_CMD_MOVE 1
+
+struct arsb {
+ u16 fmt:4;
+ u32:28;
+ u8 ef;
+ u8:8;
+ u8 ecbi;
+ u8:8;
+ u8 fvf;
+ u16:16;
+ u8 eqc;
+ u32:32;
+ u64 fail_msb;
+ u64 fail_aidaw;
+ u64 fail_ms;
+ u64 fail_scm;
+ u32 reserved[4];
+} __packed;
+
+struct msb {
+ u8 fmt:4;
+ u8 oc:4;
+ u8 flags;
+ u16:12;
+ u16 bs:4;
+ u32 blk_count;
+ u64 data_addr;
+ u64 scm_addr;
+ u64:64;
+} __packed;
+
+struct aidaw {
+ u8 flags;
+ u32 :24;
+ u32 :32;
+ u64 data_addr;
+} __packed;
+
+#define MSB_OC_CLEAR 0
+#define MSB_OC_READ 1
+#define MSB_OC_WRITE 2
+#define MSB_OC_RELEASE 3
+
+#define MSB_FLAG_BNM 0x80
+#define MSB_FLAG_IDA 0x40
+
+#define MSB_BS_4K 0
+#define MSB_BS_1M 1
+
+#define AOB_NR_MSB 124
+
+struct aob {
+ struct arqb request;
+ struct arsb response;
+ struct msb msb[AOB_NR_MSB];
+} __packed __aligned(PAGE_SIZE);
+
+struct aob_rq_header {
+ struct scm_device *scmdev;
+ char data[0];
+};
+
+struct scm_device {
+ u64 address;
+ u64 size;
+ unsigned int nr_max_block;
+ struct device dev;
+ struct {
+ unsigned int persistence:4;
+ unsigned int oper_state:4;
+ unsigned int data_state:4;
+ unsigned int rank:4;
+ unsigned int release:1;
+ unsigned int res_id:8;
+ } __packed attrs;
+};
+
+#define OP_STATE_GOOD 1
+#define OP_STATE_TEMP_ERR 2
+#define OP_STATE_PERM_ERR 3
+
+struct scm_driver {
+ struct device_driver drv;
+ int (*probe) (struct scm_device *scmdev);
+ int (*remove) (struct scm_device *scmdev);
+ void (*notify) (struct scm_device *scmdev);
+ void (*handler) (struct scm_device *scmdev, void *data, int error);
+};
+
+int scm_driver_register(struct scm_driver *scmdrv);
+void scm_driver_unregister(struct scm_driver *scmdrv);
+
+int scm_start_aob(struct aob *aob);
+void scm_irq_handler(struct aob *aob, int error);
+
+struct eadm_ops {
+ int (*eadm_start) (struct aob *aob);
+ struct module *owner;
+};
+
+int scm_get_ref(void);
+void scm_put_ref(void);
+
+void register_eadm_ops(struct eadm_ops *ops);
+void unregister_eadm_ops(struct eadm_ops *ops);
+
+#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 9b94a16..178ff96 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -101,6 +101,7 @@
#define HWCAP_S390_HPAGE 128
#define HWCAP_S390_ETF3EH 256
#define HWCAP_S390_HIGH_GPRS 512
+#define HWCAP_S390_TE 1024
/*
* These are used to set parameters in the core dumps.
@@ -212,4 +213,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int);
extern unsigned long arch_randomize_brk(struct mm_struct *mm);
#define arch_randomize_brk arch_randomize_brk
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
+
#endif
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index a24b03b..629b79a 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -140,7 +140,7 @@ struct etr_ptff_qto {
/* Inline assembly helper functions */
static inline int etr_setr(struct etr_eacr *ctrl)
{
- int rc = -ENOSYS;
+ int rc = -EOPNOTSUPP;
asm volatile(
" .insn s,0xb2160000,%1\n"
@@ -154,7 +154,7 @@ static inline int etr_setr(struct etr_eacr *ctrl)
/* Stores a format 1 aib with 64 bytes */
static inline int etr_stetr(struct etr_aib *aib)
{
- int rc = -ENOSYS;
+ int rc = -EOPNOTSUPP;
asm volatile(
" .insn s,0xb2170000,%1\n"
@@ -169,7 +169,7 @@ static inline int etr_stetr(struct etr_aib *aib)
static inline int etr_steai(struct etr_aib *aib, unsigned int func)
{
register unsigned int reg0 asm("0") = func;
- int rc = -ENOSYS;
+ int rc = -EOPNOTSUPP;
asm volatile(
" .insn s,0xb2b30000,%1\n"
@@ -190,7 +190,7 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
{
register unsigned int reg0 asm("0") = func;
register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
- int rc = -ENOSYS;
+ int rc = -EOPNOTSUPP;
asm volatile(
" .word 0x0104\n"
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 799ed0f..2d6e6e3 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -66,16 +66,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
return pte;
}
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = huge_ptep_get(ptep);
-
- mm->context.flush_mm = 1;
- pmd_clear((pmd_t *) ptep);
- return pte;
-}
-
static inline void __pmd_csp(pmd_t *pmdp)
{
register unsigned long reg2 asm("2") = pmd_val(*pmdp);
@@ -117,6 +107,15 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
__pmd_csp(pmdp);
}
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = huge_ptep_get(ptep);
+
+ huge_ptep_invalidate(mm, addr, ptep);
+ return pte;
+}
+
#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
({ \
int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
@@ -131,10 +130,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
({ \
pte_t __pte = huge_ptep_get(__ptep); \
if (pte_write(__pte)) { \
- (__mm)->context.flush_mm = 1; \
- if (atomic_read(&(__mm)->context.attach_count) > 1 || \
- (__mm) != current->active_mm) \
- huge_ptep_invalidate(__mm, __addr, __ptep); \
+ huge_ptep_invalidate(__mm, __addr, __ptep); \
set_huge_pte_at(__mm, __addr, __ptep, \
huge_pte_wrprotect(__pte)); \
} \
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 2b9d418..6703dd9 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -19,6 +19,7 @@ enum interruption_class {
EXTINT_IUC,
EXTINT_CMS,
EXTINT_CMC,
+ EXTINT_CMR,
IOINT_CIO,
IOINT_QAI,
IOINT_DAS,
@@ -30,6 +31,7 @@ enum interruption_class {
IOINT_CLW,
IOINT_CTC,
IOINT_APB,
+ IOINT_ADM,
IOINT_CSC,
NMI_NMI,
NR_IRQS,
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 1420a11..5ae6064 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -14,6 +14,7 @@
/* Regular I/O interrupts. */
#define IO_SCH_ISC 3 /* regular I/O subchannels */
#define CONSOLE_ISC 1 /* console I/O subchannel */
+#define EADM_SCH_ISC 4 /* EADM subchannels */
#define CHSC_SCH_ISC 7 /* CHSC subchannels */
/* Adapter interrupts. */
#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index aab5555..bbf8141 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -329,9 +329,13 @@ struct _lowcore {
__u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
__u64 cregs_save_area[16]; /* 0x1380 */
+ __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+
+ /* Transaction abort diagnostic block */
+ __u8 pgm_tdb[256]; /* 0x1800 */
/* align to the top of the prefix area */
- __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed;
#endif /* CONFIG_32BIT */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index b749c57..084e775 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -57,7 +57,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
pgd_t *pgd = mm->pgd;
S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
- if (addressing_mode != HOME_SPACE_MODE) {
+ if (s390_user_mode != HOME_SPACE_MODE) {
/* Load primary space page table origin. */
asm volatile(LCTL_OPCODE" 1,1,%0\n"
: : "m" (S390_lowcore.user_asce) );
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 6537e72..86fe0ee 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -20,7 +20,7 @@
#endif
#define arch_this_cpu_to_op(pcp, val, op) \
-do { \
+({ \
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ old__, new__, prev__; \
pcp_op_T__ *ptr__; \
@@ -39,13 +39,19 @@ do { \
} \
} while (prev__ != old__); \
preempt_enable(); \
-} while (0)
+ new__; \
+})
#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+
#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &)
#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &)
#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &)
@@ -61,7 +67,7 @@ do { \
#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
+#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
({ \
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ ret__; \
@@ -84,6 +90,44 @@ do { \
#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define arch_this_cpu_xchg(pcp, nval) \
+({ \
+ typeof(pcp) *ptr__; \
+ typeof(pcp) ret__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ ret__ = xchg(ptr__, nval); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#ifdef CONFIG_64BIT
+#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#endif
+
+#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
+({ \
+ typeof(pcp1) o1__ = (o1), n1__ = (n1); \
+ typeof(pcp2) o2__ = (o2), n2__ = (n2); \
+ typeof(pcp1) *p1__; \
+ typeof(pcp2) *p2__; \
+ int ret__; \
+ preempt_disable(); \
+ p1__ = __this_cpu_ptr(&(pcp1)); \
+ p2__ = __this_cpu_ptr(&(pcp2)); \
+ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
+#ifdef CONFIG_64BIT
+#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
+#endif
+
#include <asm-generic/percpu.h>
#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 11e4e32..f3e0aab 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -11,12 +11,15 @@
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
+#ifndef __ASSEMBLY__
+
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/setup.h>
+#include <asm/runtime_instr.h>
/*
* Default implementation of macro that returns current
@@ -75,11 +78,20 @@ struct thread_struct {
unsigned long gmap_addr; /* address of last gmap fault. */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
+ unsigned long per_flags; /* Flags to control debug behavior */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
+ /* cpu runtime instrumentation */
+ struct runtime_instr_cb *ri_cb;
+ int ri_signum;
+#ifdef CONFIG_64BIT
+ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+#endif
};
+#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */
+
typedef struct thread_struct thread_struct;
/*
@@ -130,6 +142,12 @@ struct task_struct;
struct mm_struct;
struct seq_file;
+#ifdef CONFIG_64BIT
+extern void show_cacheinfo(struct seq_file *m);
+#else
+static inline void show_cacheinfo(struct seq_file *m) { }
+#endif
+
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
@@ -140,6 +158,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
extern unsigned long thread_saved_pc(struct task_struct *t);
extern void show_code(struct pt_regs *regs);
+extern void print_fn_code(unsigned char *code, unsigned long len);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
@@ -331,23 +350,6 @@ extern void (*s390_base_ext_handler_fn)(void);
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
-/*
- * Helper macro for exception table entries
- */
-#ifndef CONFIG_64BIT
-#define EX_TABLE(_fault,_target) \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long " #_fault "," #_target "\n" \
- ".previous\n"
-#else
-#define EX_TABLE(_fault,_target) \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad " #_fault "," #_target "\n" \
- ".previous\n"
-#endif
-
extern int memcpy_real(void *, void *, size_t);
extern void memcpy_absolute(void *, void *, size_t);
@@ -358,4 +360,25 @@ extern void memcpy_absolute(void *, void *, size_t);
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
}
-#endif /* __ASM_S390_PROCESSOR_H */
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index d5f08ea..ce20a53 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -235,6 +235,7 @@ typedef struct
#define PSW_MASK_ASC 0x0000C000UL
#define PSW_MASK_CC 0x00003000UL
#define PSW_MASK_PM 0x00000F00UL
+#define PSW_MASK_RI 0x00000000UL
#define PSW_MASK_EA 0x00000000UL
#define PSW_MASK_BA 0x00000000UL
@@ -264,10 +265,11 @@ typedef struct
#define PSW_MASK_ASC 0x0000C00000000000UL
#define PSW_MASK_CC 0x0000300000000000UL
#define PSW_MASK_PM 0x00000F0000000000UL
+#define PSW_MASK_RI 0x0000008000000000UL
#define PSW_MASK_EA 0x0000000100000000UL
#define PSW_MASK_BA 0x0000000080000000UL
-#define PSW_MASK_USER 0x00003F0180000000UL
+#define PSW_MASK_USER 0x00003F8180000000UL
#define PSW_ADDR_AMODE 0x0000000000000000UL
#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
@@ -359,17 +361,19 @@ struct per_struct_kernel {
unsigned char access_id; /* PER trap access identification */
};
-#define PER_EVENT_MASK 0xE9000000UL
+#define PER_EVENT_MASK 0xEB000000UL
#define PER_EVENT_BRANCH 0x80000000UL
#define PER_EVENT_IFETCH 0x40000000UL
#define PER_EVENT_STORE 0x20000000UL
#define PER_EVENT_STORE_REAL 0x08000000UL
+#define PER_EVENT_TRANSACTION_END 0x02000000UL
#define PER_EVENT_NULLIFICATION 0x01000000UL
-#define PER_CONTROL_MASK 0x00a00000UL
+#define PER_CONTROL_MASK 0x00e00000UL
#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL
+#define PER_CONTROL_SUSPENSION 0x00400000UL
#define PER_CONTROL_ALTERATION 0x00200000UL
#endif
@@ -483,6 +487,8 @@ typedef struct
#define PTRACE_GET_LAST_BREAK 0x5006
#define PTRACE_PEEK_SYSTEM_CALL 0x5007
#define PTRACE_POKE_SYSTEM_CALL 0x5008
+#define PTRACE_ENABLE_TE 0x5009
+#define PTRACE_DISABLE_TE 0x5010
/*
* PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
new file mode 100644
index 0000000..830da73
--- /dev/null
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -0,0 +1,98 @@
+#ifndef _RUNTIME_INSTR_H
+#define _RUNTIME_INSTR_H
+
+#define S390_RUNTIME_INSTR_START 0x1
+#define S390_RUNTIME_INSTR_STOP 0x2
+
+struct runtime_instr_cb {
+ __u64 buf_current;
+ __u64 buf_origin;
+ __u64 buf_limit;
+
+ __u32 valid : 1;
+ __u32 pstate : 1;
+ __u32 pstate_set_buf : 1;
+ __u32 home_space : 1;
+ __u32 altered : 1;
+ __u32 : 3;
+ __u32 pstate_sample : 1;
+ __u32 sstate_sample : 1;
+ __u32 pstate_collect : 1;
+ __u32 sstate_collect : 1;
+ __u32 : 1;
+ __u32 halted_int : 1;
+ __u32 int_requested : 1;
+ __u32 buffer_full_int : 1;
+ __u32 key : 4;
+ __u32 : 9;
+ __u32 rgs : 3;
+
+ __u32 mode : 4;
+ __u32 next : 1;
+ __u32 mae : 1;
+ __u32 : 2;
+ __u32 call_type_br : 1;
+ __u32 return_type_br : 1;
+ __u32 other_type_br : 1;
+ __u32 bc_other_type : 1;
+ __u32 emit : 1;
+ __u32 tx_abort : 1;
+ __u32 : 2;
+ __u32 bp_xn : 1;
+ __u32 bp_xt : 1;
+ __u32 bp_ti : 1;
+ __u32 bp_ni : 1;
+ __u32 suppr_y : 1;
+ __u32 suppr_z : 1;
+
+ __u32 dc_miss_extra : 1;
+ __u32 lat_lev_ignore : 1;
+ __u32 ic_lat_lev : 4;
+ __u32 dc_lat_lev : 4;
+
+ __u64 reserved1;
+ __u64 scaling_factor;
+ __u64 rsic;
+ __u64 reserved2;
+} __packed __aligned(8);
+
+extern struct runtime_instr_cb runtime_instr_empty_cb;
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
+ : : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
+ : "=Q" (*cb) : : "cc");
+}
+
+static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
+{
+#ifdef CONFIG_64BIT
+ if (cb_prev)
+ store_runtime_instr_cb(cb_prev);
+#endif
+}
+
+static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
+ struct runtime_instr_cb *cb_prev)
+{
+#ifdef CONFIG_64BIT
+ if (cb_next)
+ load_runtime_instr_cb(cb_next);
+ else if (cb_prev)
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+#endif
+}
+
+#ifdef CONFIG_64BIT
+extern void exit_thread_runtime_instr(void);
+#else
+static inline void exit_thread_runtime_instr(void) { }
+#endif
+
+#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 4071d00..4af99cd 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -1,7 +1,7 @@
/*
* Helper functions for scsw access.
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2012
* Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
*/
@@ -9,7 +9,7 @@
#define _ASM_S390_SCSW_H_
#include <linux/types.h>
-#include <asm/chsc.h>
+#include <asm/css_chars.h>
#include <asm/cio.h>
/**
@@ -100,14 +100,46 @@ struct tm_scsw {
} __attribute__ ((packed));
/**
+ * struct eadm_scsw - subchannel status word for eadm subchannels
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @ectl: extended control
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @aob: AOB address
+ * @dstat: device status
+ * @cstat: subchannel status
+ */
+struct eadm_scsw {
+ u32 key:4;
+ u32:1;
+ u32 eswf:1;
+ u32 cc:2;
+ u32:6;
+ u32 ectl:1;
+ u32:2;
+ u32 fctl:3;
+ u32 actl:7;
+ u32 stctl:5;
+ u32 aob;
+ u32 dstat:8;
+ u32 cstat:8;
+ u32:16;
+} __packed;
+
+/**
* union scsw - subchannel status word
* @cmd: command-mode SCSW
* @tm: transport-mode SCSW
+ * @eadm: eadm SCSW
*/
union scsw {
struct cmd_scsw cmd;
struct tm_scsw tm;
-} __attribute__ ((packed));
+ struct eadm_scsw eadm;
+} __packed;
#define SCSW_FCTL_CLEAR_FUNC 0x1
#define SCSW_FCTL_HALT_FUNC 0x2
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e6859d1..87b47ca 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -60,7 +60,7 @@ void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr,
#define SECONDARY_SPACE_MODE 2
#define HOME_SPACE_MODE 3
-extern unsigned int addressing_mode;
+extern unsigned int s390_user_mode;
/*
* Machine features detected in head.S
@@ -80,6 +80,7 @@ extern unsigned int addressing_mode;
#define MACHINE_FLAG_LPAR (1UL << 12)
#define MACHINE_FLAG_SPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
+#define MACHINE_FLAG_TE (1UL << 15)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -98,6 +99,7 @@ extern unsigned int addressing_mode;
#define MACHINE_HAS_PFMF (0)
#define MACHINE_HAS_SPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
+#define MACHINE_HAS_TE (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -109,6 +111,7 @@ extern unsigned int addressing_mode;
#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF)
#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#endif /* CONFIG_64BIT */
#define ZFCPDUMP_HSA_SIZE (32UL<<20)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index ce26ac3..b64f15c 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -30,6 +30,8 @@ extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_yield(void);
extern void smp_stop_cpu(void);
+extern void smp_cpu_set_polarization(int cpu, int val);
+extern int smp_cpu_get_polarization(int cpu);
#else /* CONFIG_SMP */
@@ -43,7 +45,7 @@ static inline void smp_call_online_cpu(void (*func)(void *), void *data)
func(data);
}
-static inline int smp_find_processor_id(int address) { return 0; }
+static inline int smp_find_processor_id(u16 address) { return 0; }
static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 1bd1352..7e2dcd7 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -96,7 +96,6 @@ static inline char *strcat(char *dst, const char *src)
static inline char *strcpy(char *dst, const char *src)
{
-#if __GNUC__ < 4
register int r0 asm("0") = 0;
char *ret = dst;
@@ -106,14 +105,10 @@ static inline char *strcpy(char *dst, const char *src)
: "+&a" (dst), "+&a" (src) : "d" (r0)
: "cc", "memory");
return ret;
-#else
- return __builtin_strcpy(dst, src);
-#endif
}
static inline size_t strlen(const char *s)
{
-#if __GNUC__ < 4
register unsigned long r0 asm("0") = 0;
const char *tmp = s;
@@ -122,9 +117,6 @@ static inline size_t strlen(const char *s)
" jo 0b"
: "+d" (r0), "+a" (tmp) : : "cc");
return r0 - (unsigned long) s;
-#else
- return __builtin_strlen(s);
-#endif
}
static inline size_t strnlen(const char * s, size_t n)
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index f223068..f3a9e0f 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -80,21 +80,19 @@ static inline void restore_access_regs(unsigned int *acrs)
if (prev->mm) { \
save_fp_regs(&prev->thread.fp_regs); \
save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
} \
if (next->mm) { \
restore_fp_regs(&next->thread.fp_regs); \
restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
update_per_regs(next); \
} \
prev = __switch_to(prev,next); \
} while (0)
-extern void account_vtime(struct task_struct *, struct task_struct *);
-extern void account_tick_vtime(struct task_struct *);
-
#define finish_arch_switch(prev) do { \
set_fs(current->thread.mm_segment); \
- account_vtime(prev, current); \
} while (0)
#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 282ee36..f92428e 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -17,7 +17,10 @@
#include <asm/bitsperlong.h>
struct sysinfo_1_1_1 {
- unsigned short :16;
+ unsigned char p:1;
+ unsigned char :6;
+ unsigned char t:1;
+ unsigned char :8;
unsigned char ccr;
unsigned char cai;
char reserved_0[28];
@@ -30,9 +33,14 @@ struct sysinfo_1_1_1 {
char model[16];
char model_perm_cap[16];
char model_temp_cap[16];
- char model_cap_rating[4];
- char model_perm_cap_rating[4];
- char model_temp_cap_rating[4];
+ unsigned int model_cap_rating;
+ unsigned int model_perm_cap_rating;
+ unsigned int model_temp_cap_rating;
+ unsigned char typepct[5];
+ unsigned char reserved_2[3];
+ unsigned int ncr;
+ unsigned int npr;
+ unsigned int ntr;
};
struct sysinfo_1_2_1 {
@@ -47,8 +55,9 @@ struct sysinfo_1_2_2 {
char format;
char reserved_0[1];
unsigned short acc_offset;
- char reserved_1[24];
- unsigned int secondary_capability;
+ char reserved_1[20];
+ unsigned int nominal_cap;
+ unsigned int secondary_cap;
unsigned int capability;
unsigned short cpus_total;
unsigned short cpus_configured;
@@ -109,6 +118,8 @@ struct sysinfo_3_2_2 {
char reserved_544[3552];
};
+extern int topology_max_mnest;
+
#define TOPOLOGY_CPU_BITS 64
#define TOPOLOGY_NR_MAG 6
@@ -142,21 +153,7 @@ struct sysinfo_15_1_x {
union topology_entry tle[0];
};
-static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
- register int r0 asm("0") = (fc << 28) | sel1;
- register int r1 asm("1") = sel2;
-
- asm volatile(
- " stsi 0(%2)\n"
- "0: jz 2f\n"
- "1: lhi %0,%3\n"
- "2:\n"
- EX_TABLE(0b, 1b)
- : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
- : "cc", "memory");
- return r0;
-}
+int stsi(void *sysinfo, int fc, int sel1, int sel2);
/*
* Service level reporting interface.
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 9fde315..1d8fe2b 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -90,12 +90,10 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
{
- spin_lock(&mm->page_table_lock);
if (mm->context.flush_mm) {
__tlb_flush_mm(mm);
mm->context.flush_mm = 0;
}
- spin_unlock(&mm->page_table_lock);
}
/*
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 0837de8..9ca3053 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -2,8 +2,8 @@
#define _ASM_S390_TOPOLOGY_H
#include <linux/cpumask.h>
-#include <asm/sysinfo.h>
+struct sysinfo_15_1_x;
struct cpu;
#ifdef CONFIG_SCHED_BOOK
@@ -51,24 +51,6 @@ static inline void topology_expect_change(void) { }
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)
-extern int cpu_polarization[];
-
-static inline void cpu_set_polarization(int cpu, int val)
-{
-#ifdef CONFIG_SCHED_BOOK
- cpu_polarization[cpu] = val;
-#endif
-}
-
-static inline int cpu_read_polarization(int cpu)
-{
-#ifdef CONFIG_SCHED_BOOK
- return cpu_polarization[cpu];
-#else
- return POLARIZATION_HRZ;
-#endif
-}
-
#ifdef CONFIG_SCHED_BOOK
void s390_init_cpu_topology(void);
#else
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a8ab18b..34268df 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -76,9 +76,22 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
struct exception_table_entry
{
- unsigned long insn, fixup;
+ int insn, fixup;
};
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->fixup + x->fixup;
+}
+
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
struct uaccess_ops {
size_t (*copy_from_user)(size_t, const void __user *, void *);
size_t (*copy_from_user_small)(size_t, const void __user *, void *);
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 6756e78..4e64b5c 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -277,7 +277,9 @@
#define __NR_setns 339
#define __NR_process_vm_readv 340
#define __NR_process_vm_writev 341
-#define NR_syscalls 342
+#define __NR_s390_runtime_instr 342
+#define __NR_kcmp 343
+#define NR_syscalls 344
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9733b3f..4da52fe 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,10 +23,11 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \
processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \
debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \
- sysinfo.o jump_label.o lgr.o os_info.o
+ sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
+obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
extra-y += head.o vmlinux.lds
extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o)
@@ -48,12 +49,11 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
-# Kexec part
-S390_KEXEC_OBJS := machine_kexec.o crash.o
-S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
-obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS)
+ifdef CONFIG_64BIT
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
+obj-y += runtime_instr.o cache.o
+endif
# vdso
obj-$(CONFIG_64BIT) += vdso64/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 45ef1a7..fface87 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -157,6 +157,8 @@ int main(void)
DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
+ DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
+ DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
#endif /* CONFIG_32BIT */
return 0;
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 0000000..8df8d8a1
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,385 @@
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/notifier.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+
+struct cache {
+ unsigned long size;
+ unsigned int line_size;
+ unsigned int associativity;
+ unsigned int nr_sets;
+ unsigned int level : 3;
+ unsigned int type : 2;
+ unsigned int private : 1;
+ struct list_head list;
+};
+
+struct cache_dir {
+ struct kobject *kobj;
+ struct cache_index_dir *index;
+};
+
+struct cache_index_dir {
+ struct kobject kobj;
+ int cpu;
+ struct cache *cache;
+ struct cache_index_dir *next;
+};
+
+enum {
+ CACHE_SCOPE_NOTEXISTS,
+ CACHE_SCOPE_PRIVATE,
+ CACHE_SCOPE_SHARED,
+ CACHE_SCOPE_RESERVED,
+};
+
+enum {
+ CACHE_TYPE_SEPARATE,
+ CACHE_TYPE_DATA,
+ CACHE_TYPE_INSTRUCTION,
+ CACHE_TYPE_UNIFIED,
+};
+
+enum {
+ EXTRACT_TOPOLOGY,
+ EXTRACT_LINE_SIZE,
+ EXTRACT_SIZE,
+ EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+ CACHE_TI_UNIFIED = 0,
+ CACHE_TI_INSTRUCTION = 0,
+ CACHE_TI_DATA,
+};
+
+struct cache_info {
+ unsigned char : 4;
+ unsigned char scope : 2;
+ unsigned char type : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+
+union cache_topology {
+ struct cache_info ci[CACHE_MAX_LEVEL];
+ unsigned long long raw;
+};
+
+static const char * const cache_type_string[] = {
+ "Data",
+ "Instruction",
+ "Unified",
+};
+
+static struct cache_dir *cache_dir_cpu[NR_CPUS];
+static LIST_HEAD(cache_list);
+
+void show_cacheinfo(struct seq_file *m)
+{
+ struct cache *cache;
+ int index = 0;
+
+ list_for_each_entry(cache, &cache_list, list) {
+ seq_printf(m, "cache%-11d: ", index);
+ seq_printf(m, "level=%d ", cache->level);
+ seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+ seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared");
+ seq_printf(m, "size=%luK ", cache->size >> 10);
+ seq_printf(m, "line_size=%u ", cache->line_size);
+ seq_printf(m, "associativity=%d", cache->associativity);
+ seq_puts(m, "\n");
+ index++;
+ }
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+ unsigned long cmd, val;
+
+ cmd = ai << 4 | li << 1 | ti;
+ asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+ : "=d" (val) : "a" (cmd));
+ return val;
+}
+
+static int __init cache_add(int level, int private, int type)
+{
+ struct cache *cache;
+ int ti;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return -ENOMEM;
+ ti = type == CACHE_TYPE_DATA ? CACHE_TI_DATA : CACHE_TI_UNIFIED;
+ cache->size = ecag(EXTRACT_SIZE, level, ti);
+ cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+ cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+ cache->nr_sets = cache->size / cache->associativity;
+ cache->nr_sets /= cache->line_size;
+ cache->private = private;
+ cache->level = level + 1;
+ cache->type = type - 1;
+ list_add_tail(&cache->list, &cache_list);
+ return 0;
+}
+
+static void __init cache_build_info(void)
+{
+ struct cache *cache, *next;
+ union cache_topology ct;
+ int level, private, rc;
+
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ for (level = 0; level < CACHE_MAX_LEVEL; level++) {
+ switch (ct.ci[level].scope) {
+ case CACHE_SCOPE_NOTEXISTS:
+ case CACHE_SCOPE_RESERVED:
+ return;
+ case CACHE_SCOPE_SHARED:
+ private = 0;
+ break;
+ case CACHE_SCOPE_PRIVATE:
+ private = 1;
+ break;
+ }
+ if (ct.ci[level].type == CACHE_TYPE_SEPARATE) {
+ rc = cache_add(level, private, CACHE_TYPE_DATA);
+ rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION);
+ } else {
+ rc = cache_add(level, private, ct.ci[level].type);
+ }
+ if (rc)
+ goto error;
+ }
+ return;
+error:
+ list_for_each_entry_safe(cache, next, &cache_list, list) {
+ list_del(&cache->list);
+ kfree(cache);
+ }
+}
+
+static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu)
+{
+ struct cache_dir *cache_dir;
+ struct kobject *kobj = NULL;
+ struct device *dev;
+
+ dev = get_cpu_device(cpu);
+ if (!dev)
+ goto out;
+ kobj = kobject_create_and_add("cache", &dev->kobj);
+ if (!kobj)
+ goto out;
+ cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+ if (!cache_dir)
+ goto out;
+ cache_dir->kobj = kobj;
+ cache_dir_cpu[cpu] = cache_dir;
+ return cache_dir;
+out:
+ kobject_put(kobj);
+ return NULL;
+}
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj)
+{
+ return container_of(kobj, struct cache_index_dir, kobj);
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(kobj);
+ kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct kobj_attribute *kobj_attr;
+
+ kobj_attr = container_of(attr, struct kobj_attribute, attr);
+ return kobj_attr->show(kobj, kobj_attr, buf);
+}
+
+#define DEFINE_CACHE_ATTR(_name, _format, _value) \
+static ssize_t cache_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+{ \
+ struct cache_index_dir *index; \
+ \
+ index = kobj_to_cache_index_dir(kobj); \
+ return sprintf(buf, _format, _value); \
+} \
+static struct kobj_attribute cache_##_name##_attr = \
+ __ATTR(_name, 0444, cache_##_name##_show, NULL);
+
+DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10);
+DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size);
+DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets);
+DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity);
+DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]);
+DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level);
+
+static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf)
+{
+ struct cache_index_dir *index;
+ int len;
+
+ index = kobj_to_cache_index_dir(kobj);
+ len = type ?
+ cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) :
+ cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu));
+ len += sprintf(&buf[len], "\n");
+ return len;
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return shared_cpu_map_func(kobj, 0, buf);
+}
+static struct kobj_attribute cache_shared_cpu_map_attr =
+ __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+static ssize_t shared_cpu_list_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return shared_cpu_map_func(kobj, 1, buf);
+}
+static struct kobj_attribute cache_shared_cpu_list_attr =
+ __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
+static struct attribute *cache_index_default_attrs[] = {
+ &cache_type_attr.attr,
+ &cache_size_attr.attr,
+ &cache_number_of_sets_attr.attr,
+ &cache_ways_of_associativity_attr.attr,
+ &cache_level_attr.attr,
+ &cache_coherency_line_size_attr.attr,
+ &cache_shared_cpu_map_attr.attr,
+ &cache_shared_cpu_list_attr.attr,
+ NULL,
+};
+
+static const struct sysfs_ops cache_index_ops = {
+ .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+ .sysfs_ops = &cache_index_ops,
+ .release = cache_index_release,
+ .default_attrs = cache_index_default_attrs,
+};
+
+static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir,
+ struct cache *cache, int index,
+ int cpu)
+{
+ struct cache_index_dir *index_dir;
+ int rc;
+
+ index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+ if (!index_dir)
+ return -ENOMEM;
+ index_dir->cache = cache;
+ index_dir->cpu = cpu;
+ rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+ cache_dir->kobj, "index%d", index);
+ if (rc)
+ goto out;
+ index_dir->next = cache_dir->index;
+ cache_dir->index = index_dir;
+ return 0;
+out:
+ kfree(index_dir);
+ return rc;
+}
+
+static int __cpuinit cache_add_cpu(int cpu)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+ int rc, index = 0;
+
+ if (list_empty(&cache_list))
+ return 0;
+ cache_dir = cache_create_cache_dir(cpu);
+ if (!cache_dir)
+ return -ENOMEM;
+ list_for_each_entry(cache, &cache_list, list) {
+ if (!cache->private)
+ break;
+ rc = cache_create_index_dir(cache_dir, cache, index, cpu);
+ if (rc)
+ return rc;
+ index++;
+ }
+ return 0;
+}
+
+static void __cpuinit cache_remove_cpu(int cpu)
+{
+ struct cache_index_dir *index, *next;
+ struct cache_dir *cache_dir;
+
+ cache_dir = cache_dir_cpu[cpu];
+ if (!cache_dir)
+ return;
+ index = cache_dir->index;
+ while (index) {
+ next = index->next;
+ kobject_put(&index->kobj);
+ index = next;
+ }
+ kobject_put(cache_dir->kobj);
+ kfree(cache_dir);
+ cache_dir_cpu[cpu] = NULL;
+}
+
+static int __cpuinit cache_hotplug(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int rc = 0;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ rc = cache_add_cpu(cpu);
+ if (rc)
+ cache_remove_cpu(cpu);
+ break;
+ case CPU_DEAD:
+ cache_remove_cpu(cpu);
+ break;
+ }
+ return rc ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static int __init cache_init(void)
+{
+ int cpu;
+
+ if (!test_facility(34))
+ return 0;
+ cache_build_info();
+ for_each_online_cpu(cpu)
+ cache_add_cpu(cpu);
+ hotcpu_notifier(cache_hotplug, 0);
+ return 0;
+}
+device_initcall(cache_init);
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 2d82cfc..3afba80 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1646,3 +1646,16 @@ ENTRY(compat_sys_process_vm_writev_wrapper)
llgf %r0,164(%r15) # unsigned long
stg %r0,160(%r15)
jg compat_sys_process_vm_writev
+
+ENTRY(sys_s390_runtime_instr_wrapper)
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ jg sys_s390_runtime_instr
+
+ENTRY(sys_kcmp_wrapper)
+ lgfr %r2,%r2 # pid_t
+ lgfr %r3,%r3 # pid_t
+ lgfr %r4,%r4 # int
+ llgfr %r5,%r5 # unsigned long
+ llgfr %r6,%r6 # unsigned long
+ jg sys_kcmp
diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c
deleted file mode 100644
index 3819153..0000000
--- a/arch/s390/kernel/crash.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright IBM Corp. 2005
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/threads.h>
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index cc1172b..fb8d878 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,8 +13,9 @@
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/elf.h>
-#include <asm/ipl.h>
#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 619c5d3..cc84a24 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -315,6 +315,11 @@ enum {
LONG_INSN_POPCNT,
LONG_INSN_RISBHG,
LONG_INSN_RISBLG,
+ LONG_INSN_RINEXT,
+ LONG_INSN_RIEMIT,
+ LONG_INSN_TABORT,
+ LONG_INSN_TBEGIN,
+ LONG_INSN_TBEGINC,
};
static char *long_insn_name[] = {
@@ -329,7 +334,12 @@ static char *long_insn_name[] = {
[LONG_INSN_LLGHRL] = "llghrl",
[LONG_INSN_POPCNT] = "popcnt",
[LONG_INSN_RISBHG] = "risbhg",
- [LONG_INSN_RISBLG] = "risblk",
+ [LONG_INSN_RISBLG] = "risblg",
+ [LONG_INSN_RINEXT] = "rinext",
+ [LONG_INSN_RIEMIT] = "riemit",
+ [LONG_INSN_TABORT] = "tabort",
+ [LONG_INSN_TBEGIN] = "tbegin",
+ [LONG_INSN_TBEGINC] = "tbeginc",
};
static struct insn opcode[] = {
@@ -582,6 +592,17 @@ static struct insn opcode_a7[] = {
{ "", 0, INSTR_INVALID }
};
+static struct insn opcode_aa[] = {
+#ifdef CONFIG_64BIT
+ { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
+ { "rion", 0x01, INSTR_RI_RI },
+ { "tric", 0x02, INSTR_RI_RI },
+ { "rioff", 0x03, INSTR_RI_RI },
+ { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
static struct insn opcode_b2[] = {
#ifdef CONFIG_64BIT
{ "sske", 0x2b, INSTR_RRF_M0RR },
@@ -594,6 +615,9 @@ static struct insn opcode_b2[] = {
{ "lpswe", 0xb2, INSTR_S_RD },
{ "srnmt", 0xb9, INSTR_S_RD },
{ "lfas", 0xbd, INSTR_S_RD },
+ { "etndg", 0xec, INSTR_RRE_R0 },
+ { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
+ { "tend", 0xf8, INSTR_S_RD },
#endif
{ "stidp", 0x02, INSTR_S_RD },
{ "sck", 0x04, INSTR_S_RD },
@@ -1150,6 +1174,7 @@ static struct insn opcode_e3[] = {
{ "stfh", 0xcb, INSTR_RXY_RRRD },
{ "chf", 0xcd, INSTR_RXY_RRRD },
{ "clhf", 0xcf, INSTR_RXY_RRRD },
+ { "ntstg", 0x25, INSTR_RXY_RRRD },
#endif
{ "lrv", 0x1e, INSTR_RXY_RRRD },
{ "lrvh", 0x1f, INSTR_RXY_RRRD },
@@ -1173,6 +1198,8 @@ static struct insn opcode_e5[] = {
{ "mvhhi", 0x44, INSTR_SIL_RDI },
{ "mvhi", 0x4c, INSTR_SIL_RDI },
{ "mvghi", 0x48, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
+ { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
#endif
{ "lasp", 0x00, INSTR_SSE_RDRD },
{ "tprot", 0x01, INSTR_SSE_RDRD },
@@ -1210,6 +1237,9 @@ static struct insn opcode_eb[] = {
{ "cliy", 0x55, INSTR_SIY_URD },
{ "oiy", 0x56, INSTR_SIY_URD },
{ "xiy", 0x57, INSTR_SIY_URD },
+ { "lric", 0x60, INSTR_RSY_RDRM },
+ { "stric", 0x61, INSTR_RSY_RDRM },
+ { "mric", 0x62, INSTR_RSY_RDRM },
{ "icmh", 0x80, INSTR_RSE_RURD },
{ "icmh", 0x80, INSTR_RSY_RURD },
{ "icmy", 0x81, INSTR_RSY_RURD },
@@ -1408,6 +1438,9 @@ static struct insn *find_insn(unsigned char *code)
case 0xa7:
table = opcode_a7;
break;
+ case 0xaa:
+ table = opcode_aa;
+ break;
case 0xb2:
table = opcode_b2;
break;
@@ -1601,3 +1634,26 @@ void show_code(struct pt_regs *regs)
}
printk("\n");
}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+ char buffer[64], *ptr;
+ int opsize, i;
+
+ while (len) {
+ ptr = buffer;
+ opsize = insn_length(*code);
+ ptr += sprintf(ptr, "%p: ", code);
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[i]);
+ *ptr++ = '\t';
+ if (i < 4)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code, (unsigned long) code);
+ *ptr++ = '\n';
+ *ptr++ = 0;
+ printk(buffer);
+ code += opsize;
+ len -= opsize;
+ }
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 83c3271..7f47176 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -215,36 +215,54 @@ static noinline __init void init_kernel_storage_key(void)
PAGE_DEFAULT_KEY, 0);
}
-static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE);
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
static noinline __init void detect_machine_type(void)
{
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
/* Check current-configuration-level */
- if ((stsi(NULL, 0, 0, 0) >> 28) <= 2) {
+ if (stsi(NULL, 0, 0, 0) <= 2) {
S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
return;
}
/* Get virtual-machine cpu information. */
- if (stsi(&vmms, 3, 2, 2) == -ENOSYS || !vmms.count)
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
return;
/* Running under KVM? If not we assume z/VM */
- if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3))
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
else
S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
}
+static __init void setup_topology(void)
+{
+#ifdef CONFIG_64BIT
+ int max_mnest;
+
+ if (!test_facility(11))
+ return;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+ if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+ break;
+ }
+ topology_max_mnest = max_mnest;
+#endif
+}
+
static void early_pgm_check_handler(void)
{
- unsigned long addr;
const struct exception_table_entry *fixup;
+ unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
fixup = search_exception_tables(addr & PSW_ADDR_INSN);
if (!fixup)
disabled_wait(0);
- S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
}
static noinline __init void setup_lowcore_early(void)
@@ -267,12 +285,10 @@ static noinline __init void setup_facility_list(void)
static noinline __init void setup_hpage(void)
{
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (!test_facility(2) || !test_facility(8))
return;
S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE;
__ctl_set_bit(0, 23);
-#endif
}
static __init void detect_mvpg(void)
@@ -366,12 +382,12 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
if (test_facility(8))
S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
- if (test_facility(11))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
if (test_facility(27))
S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
if (test_facility(40))
S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
+ if (test_facility(50) && test_facility(73))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
#endif
}
@@ -441,7 +457,6 @@ static void __init setup_boot_command_line(void)
append_to_cmdline(append_ipl_scpdata);
}
-
/*
* Save ipl parameters, clear bss memory, initialize storage keys
* and create a kernel NSS at startup if the SAVESYS= parm is defined
@@ -468,6 +483,7 @@ void __init startup_init(void)
detect_diag44();
detect_machine_facilities();
setup_hpage();
+ setup_topology();
sclp_facilities_detect();
detect_memory_layout(memory_chunk);
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 349b7ee..7549985 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
@@ -412,6 +413,11 @@ ENTRY(pgm_check_handler)
1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
LAST_BREAK %r14
lg %r15,__LC_KERNEL_STACK
+ lg %r14,__TI_task(%r12)
+ lghi %r13,__LC_PGM_TDB
+ tm __LC_PGM_ILC+2,0x02 # check for transaction abort
+ jz 2f
+ mvc __THREAD_trap_tdb(256,%r14),0(%r13)
2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -422,13 +428,12 @@ ENTRY(pgm_check_handler)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jz 0f
- lg %r1,__TI_task(%r12)
tmhh %r8,0x0001 # kernel per event ?
jz pgm_kprobe
oi __TI_flags+7(%r12),_TIF_PER_TRAP
- mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
+ mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
@@ -1004,9 +1009,7 @@ sie_fault:
.Lhost_id:
.quad 0
- .section __ex_table,"a"
- .quad sie_loop,sie_fault
- .previous
+ EX_TABLE(sie_loop,sie_fault)
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index dd7630d..6cdc55b 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -30,33 +30,35 @@ struct irq_class {
};
static const struct irq_class intrclass_names[] = {
- {.name = "EXT" },
- {.name = "I/O" },
- {.name = "CLK", .desc = "[EXT] Clock Comparator" },
- {.name = "EXC", .desc = "[EXT] External Call" },
- {.name = "EMS", .desc = "[EXT] Emergency Signal" },
- {.name = "TMR", .desc = "[EXT] CPU Timer" },
- {.name = "TAL", .desc = "[EXT] Timing Alert" },
- {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" },
- {.name = "DSD", .desc = "[EXT] DASD Diag" },
- {.name = "VRT", .desc = "[EXT] Virtio" },
- {.name = "SCP", .desc = "[EXT] Service Call" },
- {.name = "IUC", .desc = "[EXT] IUCV" },
- {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling" },
- {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter" },
- {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" },
- {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" },
- {.name = "DAS", .desc = "[I/O] DASD" },
- {.name = "C15", .desc = "[I/O] 3215" },
- {.name = "C70", .desc = "[I/O] 3270" },
- {.name = "TAP", .desc = "[I/O] Tape" },
- {.name = "VMR", .desc = "[I/O] Unit Record Devices" },
- {.name = "LCS", .desc = "[I/O] LCS" },
- {.name = "CLW", .desc = "[I/O] CLAW" },
- {.name = "CTC", .desc = "[I/O] CTC" },
- {.name = "APB", .desc = "[I/O] AP Bus" },
- {.name = "CSC", .desc = "[I/O] CHSC Subchannel" },
- {.name = "NMI", .desc = "[NMI] Machine Check" },
+ [EXTERNAL_INTERRUPT] = {.name = "EXT"},
+ [IO_INTERRUPT] = {.name = "I/O"},
+ [EXTINT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"},
+ [EXTINT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"},
+ [EXTINT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"},
+ [EXTINT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"},
+ [EXTINT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"},
+ [EXTINT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ [EXTINT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"},
+ [EXTINT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"},
+ [EXTINT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"},
+ [EXTINT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"},
+ [EXTINT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ [EXTINT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ [EXTINT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+ [IOINT_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ [IOINT_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ [IOINT_DAS] = {.name = "DAS", .desc = "[I/O] DASD"},
+ [IOINT_C15] = {.name = "C15", .desc = "[I/O] 3215"},
+ [IOINT_C70] = {.name = "C70", .desc = "[I/O] 3270"},
+ [IOINT_TAP] = {.name = "TAP", .desc = "[I/O] Tape"},
+ [IOINT_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ [IOINT_LCS] = {.name = "LCS", .desc = "[I/O] LCS"},
+ [IOINT_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"},
+ [IOINT_CTC] = {.name = "CTC", .desc = "[I/O] CTC"},
+ [IOINT_APB] = {.name = "APB", .desc = "[I/O] AP Bus"},
+ [IOINT_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ [IOINT_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
};
/*
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 8aa634f..d1c7214 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -547,7 +547,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
*/
entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (entry) {
- regs->psw.addr = entry->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
return 1;
}
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index eca94e7..6ea6d69 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -51,16 +51,6 @@ static struct lgr_info lgr_info_cur;
static struct debug_info *lgr_dbf;
/*
- * Return number of valid stsi levels
- */
-static inline int stsi_0(void)
-{
- int rc = stsi(NULL, 0, 0, 0);
-
- return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
-}
-
-/*
* Copy buffer and then convert it to ASCII
*/
static void cpascii(char *dst, char *src, int size)
@@ -76,7 +66,7 @@ static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
{
struct sysinfo_1_1_1 *si = (void *) lgr_page;
- if (stsi(si, 1, 1, 1) == -ENOSYS)
+ if (stsi(si, 1, 1, 1))
return;
cpascii(lgr_info->manufacturer, si->manufacturer,
sizeof(si->manufacturer));
@@ -93,7 +83,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
{
struct sysinfo_2_2_2 *si = (void *) lgr_page;
- if (stsi(si, 2, 2, 2) == -ENOSYS)
+ if (stsi(si, 2, 2, 2))
return;
cpascii(lgr_info->name, si->name, sizeof(si->name));
memcpy(&lgr_info->lpar_number, &si->lpar_number,
@@ -108,7 +98,7 @@ static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
struct sysinfo_3_2_2 *si = (void *) lgr_page;
int i;
- if (stsi(si, 3, 2, 2) == -ENOSYS)
+ if (stsi(si, 3, 2, 2))
return;
for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
cpascii(lgr_info->vm[i].name, si->vm[i].name,
@@ -124,16 +114,17 @@ static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
*/
static void lgr_info_get(struct lgr_info *lgr_info)
{
+ int level;
+
memset(lgr_info, 0, sizeof(*lgr_info));
stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
- lgr_info->level = stsi_0();
- if (lgr_info->level == -ENOSYS)
- return;
- if (lgr_info->level >= 1)
+ level = stsi(NULL, 0, 0, 0);
+ lgr_info->level = level;
+ if (level >= 1)
lgr_stsi_1_1_1(lgr_info);
- if (lgr_info->level >= 2)
+ if (level >= 2)
lgr_stsi_2_2_2(lgr_info);
- if (lgr_info->level >= 3)
+ if (level >= 3)
lgr_stsi_3_2_2(lgr_info);
}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 493304b..b3de277 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
#include <asm/reset.h>
#include <asm/ipl.h>
#include <asm/diag.h>
+#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/os_info.h>
@@ -31,8 +32,6 @@ extern const unsigned long long relocate_kernel_len;
#ifdef CONFIG_CRASH_DUMP
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
-
/*
* Create ELF notes for one CPU
*/
@@ -159,7 +158,7 @@ int machine_kexec_prepare(struct kimage *image)
/* Can't replace kernel image since it is read-only. */
if (ipl_flags & IPL_NSS_VALID)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (image->type == KEXEC_TYPE_CRASH)
return machine_kexec_prepare_kdump();
@@ -191,6 +190,10 @@ void machine_shutdown(void)
{
}
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
/*
* Do normal kexec
*/
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7331753..5024be2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -26,10 +26,12 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/vtimer.h>
+#include <asm/exec.h>
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -132,6 +134,7 @@ EXPORT_SYMBOL(kernel_thread);
*/
void exit_thread(void)
{
+ exit_thread_runtime_instr();
}
void flush_thread(void)
@@ -170,6 +173,11 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ p->thread.ri_signum = 0;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
#ifndef CONFIG_64BIT
/*
* save fprs to current->thread.fp_regs to merge them with
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 572d4c9..753c41d 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -39,9 +39,9 @@ void __cpuinit cpu_init(void)
*/
static int show_cpuinfo(struct seq_file *m, void *v)
{
- static const char *hwcap_str[10] = {
+ static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs"
+ "edat", "etf3eh", "highgprs", "te"
};
unsigned long n = (unsigned long) v - 1;
int i;
@@ -54,10 +54,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
num_online_cpus(), loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ))%100);
seq_puts(m, "features\t: ");
- for (i = 0; i < 10; i++)
+ for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
seq_printf(m, "%s ", hwcap_str[i]);
seq_puts(m, "\n");
+ show_cacheinfo(m);
}
get_online_cpus();
if (cpu_online(n)) {
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e4be113..a314c57 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -42,6 +42,7 @@ enum s390_regset {
REGSET_GENERAL,
REGSET_FP,
REGSET_LAST_BREAK,
+ REGSET_TDB,
REGSET_SYSTEM_CALL,
REGSET_GENERAL_EXTENDED,
};
@@ -52,6 +53,22 @@ void update_per_regs(struct task_struct *task)
struct thread_struct *thread = &task->thread;
struct per_regs old, new;
+#ifdef CONFIG_64BIT
+ /* Take care of the enable/disable of transactional execution. */
+ if (MACHINE_HAS_TE) {
+ unsigned long cr0, cr0_new;
+
+ __ctl_store(cr0, 0, 0);
+ /* set or clear transaction execution bits 8 and 9. */
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr0_new = cr0 & ~(3UL << 54);
+ else
+ cr0_new = cr0 | (3UL << 54);
+ /* Only load control register 0 if necessary. */
+ if (cr0 != cr0_new)
+ __ctl_load(cr0_new, 0, 0);
+ }
+#endif
/* Copy user specified PER registers */
new.control = thread->per_user.control;
new.start = thread->per_user.start;
@@ -60,6 +77,10 @@ void update_per_regs(struct task_struct *task)
/* merge TIF_SINGLE_STEP into user specified PER registers. */
if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
new.control |= PER_EVENT_IFETCH;
+#ifdef CONFIG_64BIT
+ new.control |= PER_CONTROL_SUSPENSION;
+ new.control |= PER_EVENT_TRANSACTION_END;
+#endif
new.start = 0;
new.end = PSW_ADDR_INSN;
}
@@ -100,6 +121,7 @@ void ptrace_disable(struct task_struct *task)
memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
clear_tsk_thread_flag(task, TIF_PER_TRAP);
+ task->thread.per_flags = 0;
}
#ifndef CONFIG_64BIT
@@ -416,6 +438,16 @@ long arch_ptrace(struct task_struct *child, long request,
put_user(task_thread_info(child)->last_break,
(unsigned long __user *) data);
return 0;
+ case PTRACE_ENABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags &= ~PER_FLAG_NO_TE;
+ return 0;
+ case PTRACE_DISABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags |= PER_FLAG_NO_TE;
+ return 0;
default:
/* Removing high order bit from addr (only for 31 bit). */
addr &= PSW_ADDR_INSN;
@@ -903,6 +935,28 @@ static int s390_last_break_set(struct task_struct *target,
return 0;
}
+static int s390_tdb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ unsigned char *data;
+
+ if (!(regs->int_code & 0x200))
+ return -ENODATA;
+ data = target->thread.trap_tdb;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
#endif
static int s390_system_call_get(struct task_struct *target,
@@ -951,6 +1005,14 @@ static const struct user_regset s390_regsets[] = {
.get = s390_last_break_get,
.set = s390_last_break_set,
},
+ [REGSET_TDB] = {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
#endif
[REGSET_SYSTEM_CALL] = {
.core_note_type = NT_S390_SYSTEM_CALL,
@@ -1148,6 +1210,14 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_last_break_get,
.set = s390_compat_last_break_set,
},
+ [REGSET_TDB] = {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
[REGSET_SYSTEM_CALL] = {
.core_note_type = NT_S390_SYSTEM_CALL,
.n = 1,
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 0000000..61066f6
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+static int runtime_instr_avail(void)
+{
+ return test_facility(64);
+}
+
+static void disable_runtime_instr(void)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+
+ /*
+ * Make sure the RI bit is deleted from the PSW. If the user did not
+ * switch off RI before the system call the process will get a
+ * specification exception otherwise.
+ */
+ regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ cb->buf_limit = 0xfff;
+ if (s390_user_mode == HOME_SPACE_MODE)
+ cb->home_space = 1;
+ cb->int_requested = 1;
+ cb->pstate = 1;
+ cb->pstate_set_buf = 1;
+ cb->pstate_sample = 1;
+ cb->pstate_collect = 1;
+ cb->key = PAGE_DEFAULT_KEY;
+ cb->valid = 1;
+}
+
+void exit_thread_runtime_instr(void)
+{
+ struct task_struct *task = current;
+
+ if (!task->thread.ri_cb)
+ return;
+ disable_runtime_instr();
+ kfree(task->thread.ri_cb);
+ task->thread.ri_signum = 0;
+ task->thread.ri_cb = NULL;
+}
+
+static void runtime_instr_int_handler(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct siginfo info;
+
+ if (!(param32 & CPU_MF_INT_RI_MASK))
+ return;
+
+ kstat_cpu(smp_processor_id()).irqs[EXTINT_CMR]++;
+
+ if (!current->thread.ri_cb)
+ return;
+ if (current->thread.ri_signum < SIGRTMIN ||
+ current->thread.ri_signum > SIGRTMAX) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = current->thread.ri_signum;
+ info.si_code = SI_QUEUE;
+ if (param32 & CPU_MF_INT_RI_BUF_FULL)
+ info.si_int = ENOBUFS;
+ else if (param32 & CPU_MF_INT_RI_HALTED)
+ info.si_int = ECANCELED;
+ else
+ return; /* unknown reason */
+
+ send_sig_info(current->thread.ri_signum, &info, current);
+}
+
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+ struct runtime_instr_cb *cb;
+
+ if (!runtime_instr_avail())
+ return -EOPNOTSUPP;
+
+ if (command == S390_RUNTIME_INSTR_STOP) {
+ preempt_disable();
+ exit_thread_runtime_instr();
+ preempt_enable();
+ return 0;
+ }
+
+ if (command != S390_RUNTIME_INSTR_START ||
+ (signum < SIGRTMIN || signum > SIGRTMAX))
+ return -EINVAL;
+
+ if (!current->thread.ri_cb) {
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+ } else {
+ cb = current->thread.ri_cb;
+ memset(cb, 0, sizeof(*cb));
+ }
+
+ init_runtime_instr_cb(cb);
+ current->thread.ri_signum = signum;
+
+ /* now load the control block to make it available */
+ preempt_disable();
+ current->thread.ri_cb = cb;
+ load_runtime_instr_cb(cb);
+ preempt_enable();
+ return 0;
+}
+
+static int __init runtime_instr_init(void)
+{
+ int rc;
+
+ if (!runtime_instr_avail())
+ return 0;
+
+ measurement_alert_subclass_register();
+ rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
+ if (rc)
+ measurement_alert_subclass_unregister();
+ else
+ pr_info("Runtime instrumentation facility initialized\n");
+ return rc;
+}
+device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 57b5366..9bdbcef 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -8,3 +8,5 @@ EXPORT_SYMBOL(_mcount);
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
EXPORT_SYMBOL(sie64a);
#endif
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f86c81e..afa9fdb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -302,10 +302,10 @@ static int __init parse_vmalloc(char *arg)
}
early_param("vmalloc", parse_vmalloc);
-unsigned int addressing_mode = HOME_SPACE_MODE;
-EXPORT_SYMBOL_GPL(addressing_mode);
+unsigned int s390_user_mode = PRIMARY_SPACE_MODE;
+EXPORT_SYMBOL_GPL(s390_user_mode);
-static int set_amode_primary(void)
+static void __init set_user_mode_primary(void)
{
psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
@@ -313,48 +313,30 @@ static int set_amode_primary(void)
psw32_user_bits =
(psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
#endif
-
- if (MACHINE_HAS_MVCOS) {
- memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
- return 1;
- } else {
- memcpy(&uaccess, &uaccess_pt, sizeof(uaccess));
- return 0;
- }
-}
-
-/*
- * Switch kernel/user addressing modes?
- */
-static int __init early_parse_switch_amode(char *p)
-{
- addressing_mode = PRIMARY_SPACE_MODE;
- return 0;
+ uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt;
}
-early_param("switch_amode", early_parse_switch_amode);
static int __init early_parse_user_mode(char *p)
{
if (p && strcmp(p, "primary") == 0)
- addressing_mode = PRIMARY_SPACE_MODE;
+ s390_user_mode = PRIMARY_SPACE_MODE;
else if (!p || strcmp(p, "home") == 0)
- addressing_mode = HOME_SPACE_MODE;
+ s390_user_mode = HOME_SPACE_MODE;
else
return 1;
return 0;
}
early_param("user_mode", early_parse_user_mode);
-static void setup_addressing_mode(void)
+static void __init setup_addressing_mode(void)
{
- if (addressing_mode == PRIMARY_SPACE_MODE) {
- if (set_amode_primary())
- pr_info("Address spaces switched, "
- "mvcos available\n");
- else
- pr_info("Address spaces switched, "
- "mvcos not available\n");
- }
+ if (s390_user_mode != PRIMARY_SPACE_MODE)
+ return;
+ set_user_mode_primary();
+ if (MACHINE_HAS_MVCOS)
+ pr_info("Address spaces switched, mvcos available\n");
+ else
+ pr_info("Address spaces switched, mvcos not available\n");
}
void *restart_stack __attribute__((__section__(".data")));
@@ -602,9 +584,7 @@ static void __init setup_memory_end(void)
static void __init setup_vmcoreinfo(void)
{
-#ifdef CONFIG_KEXEC
mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-#endif
}
#ifdef CONFIG_CRASH_DUMP
@@ -974,12 +954,20 @@ static void __init setup_hwcaps(void)
if (MACHINE_HAS_HPAGE)
elf_hwcap |= HWCAP_S390_HPAGE;
+#if defined(CONFIG_64BIT)
/*
* 64-bit register support for 31-bit processes
* HWCAP_S390_HIGH_GPRS is bit 9.
*/
elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+ /*
+ * Transactional execution support HWCAP_S390_TE is bit 10.
+ */
+ if (test_facility(50) && test_facility(73))
+ elf_hwcap |= HWCAP_S390_TE;
+#endif
+
get_cpu_id(&cpu_id);
switch (cpu_id.machine) {
case 0x9672:
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 720fda1..ea431e5 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -66,7 +66,7 @@ struct pcpu {
unsigned long panic_stack; /* panic stack for the cpu */
unsigned long ec_mask; /* bit mask for ec_xxx functions */
int state; /* physical cpu state */
- u32 status; /* last status received via sigp */
+ int polarization; /* physical polarization */
u16 address; /* physical cpu address */
};
@@ -74,6 +74,10 @@ static u8 boot_cpu_type;
static u16 boot_cpu_address;
static struct pcpu pcpu_devices[NR_CPUS];
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices arreay.
+ */
DEFINE_MUTEX(smp_cpu_state_mutex);
/*
@@ -99,7 +103,7 @@ static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
int cc;
while (1) {
- cc = __pcpu_sigp(addr, order, parm, status);
+ cc = __pcpu_sigp(addr, order, parm, NULL);
if (cc != SIGP_CC_BUSY)
return cc;
cpu_relax();
@@ -111,7 +115,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
int cc, retry;
for (retry = 0; ; retry++) {
- cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status);
+ cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
if (cc != SIGP_CC_BUSY)
break;
if (retry >= 3)
@@ -122,16 +126,18 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
static inline int pcpu_stopped(struct pcpu *pcpu)
{
+ u32 uninitialized_var(status);
+
if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
- 0, &pcpu->status) != SIGP_CC_STATUS_STORED)
+ 0, &status) != SIGP_CC_STATUS_STORED)
return 0;
- return !!(pcpu->status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+ return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
}
static inline int pcpu_running(struct pcpu *pcpu)
{
if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
- 0, &pcpu->status) != SIGP_CC_STATUS_STORED)
+ 0, NULL) != SIGP_CC_STATUS_STORED)
return 1;
/* Status stored condition code is equivalent to cpu not running. */
return 0;
@@ -586,6 +592,16 @@ static inline void smp_get_save_area(int cpu, u16 address) { }
#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+void smp_cpu_set_polarization(int cpu, int val)
+{
+ pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+ return pcpu_devices[cpu].polarization;
+}
+
static struct sclp_cpu_info *smp_get_cpu_info(void)
{
static int use_sigp_detection;
@@ -628,7 +644,7 @@ static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info,
pcpu->address = info->cpu[i].address;
pcpu->state = (cpu >= info->configured) ?
CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
set_cpu_present(cpu, true);
if (sysfs_add && smp_add_present_cpu(cpu) != 0)
set_cpu_present(cpu, false);
@@ -796,7 +812,7 @@ void __init smp_prepare_boot_cpu(void)
pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE;
pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;
S390_lowcore.percpu_offset = __per_cpu_offset[0];
- cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
set_cpu_present(0, true);
set_cpu_online(0, true);
}
@@ -862,7 +878,7 @@ static ssize_t cpu_configure_store(struct device *dev,
if (rc)
break;
pcpu->state = CPU_STATE_STANDBY;
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
topology_expect_change();
break;
case 1:
@@ -872,7 +888,7 @@ static ssize_t cpu_configure_store(struct device *dev,
if (rc)
break;
pcpu->state = CPU_STATE_CONFIGURED;
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
topology_expect_change();
break;
default:
@@ -959,23 +975,17 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
struct device *s = &c->dev;
int err = 0;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
break;
case CPU_DEAD:
- case CPU_DEAD_FROZEN:
sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
break;
}
return notifier_from_errno(err);
}
-static struct notifier_block __cpuinitdata smp_cpu_nb = {
- .notifier_call = smp_cpu_notify,
-};
-
static int __devinit smp_add_present_cpu(int cpu)
{
struct cpu *c = &pcpu_devices[cpu].cpu;
@@ -1050,7 +1060,7 @@ static int __init s390_smp_init(void)
{
int cpu, rc;
- register_cpu_notifier(&smp_cpu_nb);
+ hotcpu_notifier(smp_cpu_notify, 0);
#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index bcab2f0..4817485 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -350,3 +350,5 @@ SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
+SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)
+SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index fa0eb23..62f89d9 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -22,17 +22,41 @@
#include <math-emu/soft-fp.h>
#include <math-emu/single.h>
-static inline int stsi_0(void)
+int topology_max_mnest;
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
{
- int rc = stsi(NULL, 0, 0, 0);
- return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
+ register int r0 asm("0") = (fc << 28) | sel1;
+ register int r1 asm("1") = sel2;
+ int rc = 0;
+
+ asm volatile(
+ " stsi 0(%3)\n"
+ "0: jz 2f\n"
+ "1: lhi %1,%4\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (r0), "+d" (rc)
+ : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
+ : "cc", "memory");
+ if (rc)
+ return rc;
+ return fc ? 0 : ((unsigned int) r0) >> 28;
}
+EXPORT_SYMBOL(stsi);
-static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len)
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
{
- if (stsi(info, 1, 1, 1) == -ENOSYS)
- return len;
+ int i;
+ if (stsi(info, 1, 1, 1))
+ return;
EBCASC(info->manufacturer, sizeof(info->manufacturer));
EBCASC(info->type, sizeof(info->type));
EBCASC(info->model, sizeof(info->model));
@@ -41,242 +65,197 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len)
EBCASC(info->model_capacity, sizeof(info->model_capacity));
EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
- len += sprintf(page + len, "Manufacturer: %-16.16s\n",
- info->manufacturer);
- len += sprintf(page + len, "Type: %-4.4s\n",
- info->type);
+ seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
+ seq_printf(m, "Type: %-4.4s\n", info->type);
+ /*
+ * Sigh: the model field has been renamed with System z9
+ * to model_capacity and a new model field has been added
+ * after the plant field. To avoid confusing older programs
+ * the "Model:" prints "model_capacity model" or just
+ * "model_capacity" if the model string is empty .
+ */
+ seq_printf(m, "Model: %-16.16s", info->model_capacity);
if (info->model[0] != '\0')
- /*
- * Sigh: the model field has been renamed with System z9
- * to model_capacity and a new model field has been added
- * after the plant field. To avoid confusing older programs
- * the "Model:" prints "model_capacity model" or just
- * "model_capacity" if the model string is empty .
- */
- len += sprintf(page + len,
- "Model: %-16.16s %-16.16s\n",
- info->model_capacity, info->model);
- else
- len += sprintf(page + len, "Model: %-16.16s\n",
- info->model_capacity);
- len += sprintf(page + len, "Sequence Code: %-16.16s\n",
- info->sequence);
- len += sprintf(page + len, "Plant: %-4.4s\n",
- info->plant);
- len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n",
- info->model_capacity, *(u32 *) info->model_cap_rating);
- if (info->model_perm_cap[0] != '\0')
- len += sprintf(page + len,
- "Model Perm. Capacity: %-16.16s %08u\n",
- info->model_perm_cap,
- *(u32 *) info->model_perm_cap_rating);
- if (info->model_temp_cap[0] != '\0')
- len += sprintf(page + len,
- "Model Temp. Capacity: %-16.16s %08u\n",
- info->model_temp_cap,
- *(u32 *) info->model_temp_cap_rating);
+ seq_printf(m, " %-16.16s", info->model);
+ seq_putc(m, '\n');
+ seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence);
+ seq_printf(m, "Plant: %-4.4s\n", info->plant);
+ seq_printf(m, "Model Capacity: %-16.16s %08u\n",
+ info->model_capacity, info->model_cap_rating);
+ if (info->model_perm_cap_rating)
+ seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+ info->model_perm_cap,
+ info->model_perm_cap_rating);
+ if (info->model_temp_cap_rating)
+ seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+ info->model_temp_cap,
+ info->model_temp_cap_rating);
+ if (info->ncr)
+ seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
+ if (info->npr)
+ seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+ if (info->ntr)
+ seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
if (info->cai) {
- len += sprintf(page + len,
- "Capacity Adj. Ind.: %d\n",
- info->cai);
- len += sprintf(page + len, "Capacity Ch. Reason: %d\n",
- info->ccr);
+ seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
+ seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
+ seq_printf(m, "Capacity Transient: %d\n", info->t);
+ }
+ if (info->p) {
+ for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+ seq_printf(m, "Type %d Percentage: %d\n",
+ i, info->typepct[i - 1]);
+ }
}
- return len;
}
-static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len)
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
{
static int max_mnest;
int i, rc;
- len += sprintf(page + len, "\n");
+ seq_putc(m, '\n');
if (!MACHINE_HAS_TOPOLOGY)
- return len;
- if (max_mnest) {
- stsi(info, 15, 1, max_mnest);
- } else {
- for (max_mnest = 6; max_mnest > 1; max_mnest--) {
- rc = stsi(info, 15, 1, max_mnest);
- if (rc != -ENOSYS)
- break;
- }
- }
- len += sprintf(page + len, "CPU Topology HW: ");
+ return;
+ if (stsi(info, 15, 1, topology_max_mnest))
+ return;
+ seq_printf(m, "CPU Topology HW: ");
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
- len += sprintf(page + len, " %d", info->mag[i]);
- len += sprintf(page + len, "\n");
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
#ifdef CONFIG_SCHED_MC
store_topology(info);
- len += sprintf(page + len, "CPU Topology SW: ");
+ seq_printf(m, "CPU Topology SW: ");
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
- len += sprintf(page + len, " %d", info->mag[i]);
- len += sprintf(page + len, "\n");
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
#endif
- return len;
}
-static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len)
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
{
struct sysinfo_1_2_2_extension *ext;
int i;
- if (stsi(info, 1, 2, 2) == -ENOSYS)
- return len;
+ if (stsi(info, 1, 2, 2))
+ return;
ext = (struct sysinfo_1_2_2_extension *)
((unsigned long) info + info->acc_offset);
-
- len += sprintf(page + len, "CPUs Total: %d\n",
- info->cpus_total);
- len += sprintf(page + len, "CPUs Configured: %d\n",
- info->cpus_configured);
- len += sprintf(page + len, "CPUs Standby: %d\n",
- info->cpus_standby);
- len += sprintf(page + len, "CPUs Reserved: %d\n",
- info->cpus_reserved);
-
- if (info->format == 1) {
- /*
- * Sigh 2. According to the specification the alternate
- * capability field is a 32 bit floating point number
- * if the higher order 8 bits are not zero. Printing
- * a floating point number in the kernel is a no-no,
- * always print the number as 32 bit unsigned integer.
- * The user-space needs to know about the strange
- * encoding of the alternate cpu capability.
- */
- len += sprintf(page + len, "Capability: %u %u\n",
- info->capability, ext->alt_capability);
- for (i = 2; i <= info->cpus_total; i++)
- len += sprintf(page + len,
- "Adjustment %02d-way: %u %u\n",
- i, info->adjustment[i-2],
- ext->alt_adjustment[i-2]);
-
- } else {
- len += sprintf(page + len, "Capability: %u\n",
- info->capability);
- for (i = 2; i <= info->cpus_total; i++)
- len += sprintf(page + len,
- "Adjustment %02d-way: %u\n",
- i, info->adjustment[i-2]);
+ seq_printf(m, "CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ /*
+ * Sigh 2. According to the specification the alternate
+ * capability field is a 32 bit floating point number
+ * if the higher order 8 bits are not zero. Printing
+ * a floating point number in the kernel is a no-no,
+ * always print the number as 32 bit unsigned integer.
+ * The user-space needs to know about the strange
+ * encoding of the alternate cpu capability.
+ */
+ seq_printf(m, "Capability: %u", info->capability);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_capability);
+ seq_putc(m, '\n');
+ if (info->nominal_cap)
+ seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap);
+ if (info->secondary_cap)
+ seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+ for (i = 2; i <= info->cpus_total; i++) {
+ seq_printf(m, "Adjustment %02d-way: %u",
+ i, info->adjustment[i-2]);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+ seq_putc(m, '\n');
}
-
- if (info->secondary_capability != 0)
- len += sprintf(page + len, "Secondary Capability: %d\n",
- info->secondary_capability);
- return len;
}
-static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len)
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
{
- if (stsi(info, 2, 2, 2) == -ENOSYS)
- return len;
-
+ if (stsi(info, 2, 2, 2))
+ return;
EBCASC(info->name, sizeof(info->name));
-
- len += sprintf(page + len, "\n");
- len += sprintf(page + len, "LPAR Number: %d\n",
- info->lpar_number);
-
- len += sprintf(page + len, "LPAR Characteristics: ");
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Number: %d\n", info->lpar_number);
+ seq_printf(m, "LPAR Characteristics: ");
if (info->characteristics & LPAR_CHAR_DEDICATED)
- len += sprintf(page + len, "Dedicated ");
+ seq_printf(m, "Dedicated ");
if (info->characteristics & LPAR_CHAR_SHARED)
- len += sprintf(page + len, "Shared ");
+ seq_printf(m, "Shared ");
if (info->characteristics & LPAR_CHAR_LIMITED)
- len += sprintf(page + len, "Limited ");
- len += sprintf(page + len, "\n");
-
- len += sprintf(page + len, "LPAR Name: %-8.8s\n",
- info->name);
-
- len += sprintf(page + len, "LPAR Adjustment: %d\n",
- info->caf);
-
- len += sprintf(page + len, "LPAR CPUs Total: %d\n",
- info->cpus_total);
- len += sprintf(page + len, "LPAR CPUs Configured: %d\n",
- info->cpus_configured);
- len += sprintf(page + len, "LPAR CPUs Standby: %d\n",
- info->cpus_standby);
- len += sprintf(page + len, "LPAR CPUs Reserved: %d\n",
- info->cpus_reserved);
- len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n",
- info->cpus_dedicated);
- len += sprintf(page + len, "LPAR CPUs Shared: %d\n",
- info->cpus_shared);
- return len;
+ seq_printf(m, "Limited ");
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Name: %-8.8s\n", info->name);
+ seq_printf(m, "LPAR Adjustment: %d\n", info->caf);
+ seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
+ seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
+ seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
}
-static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len)
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
{
int i;
- if (stsi(info, 3, 2, 2) == -ENOSYS)
- return len;
+ if (stsi(info, 3, 2, 2))
+ return;
for (i = 0; i < info->count; i++) {
EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
- len += sprintf(page + len, "\n");
- len += sprintf(page + len, "VM%02d Name: %-8.8s\n",
- i, info->vm[i].name);
- len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n",
- i, info->vm[i].cpi);
-
- len += sprintf(page + len, "VM%02d Adjustment: %d\n",
- i, info->vm[i].caf);
-
- len += sprintf(page + len, "VM%02d CPUs Total: %d\n",
- i, info->vm[i].cpus_total);
- len += sprintf(page + len, "VM%02d CPUs Configured: %d\n",
- i, info->vm[i].cpus_configured);
- len += sprintf(page + len, "VM%02d CPUs Standby: %d\n",
- i, info->vm[i].cpus_standby);
- len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n",
- i, info->vm[i].cpus_reserved);
+ seq_putc(m, '\n');
+ seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name);
+ seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+ seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf);
+ seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total);
+ seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+ seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
+ seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
}
- return len;
}
-static int proc_read_sysinfo(char *page, char **start,
- off_t off, int count,
- int *eof, void *data)
+static int sysinfo_show(struct seq_file *m, void *v)
{
- unsigned long info = get_zeroed_page(GFP_KERNEL);
- int level, len;
+ void *info = (void *)get_zeroed_page(GFP_KERNEL);
+ int level;
if (!info)
return 0;
-
- len = 0;
- level = stsi_0();
+ level = stsi(NULL, 0, 0, 0);
if (level >= 1)
- len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len);
-
+ stsi_1_1_1(m, info);
if (level >= 1)
- len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len);
-
+ stsi_15_1_x(m, info);
if (level >= 1)
- len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len);
-
+ stsi_1_2_2(m, info);
if (level >= 2)
- len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len);
-
+ stsi_2_2_2(m, info);
if (level >= 3)
- len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len);
+ stsi_3_2_2(m, info);
+ free_page((unsigned long)info);
+ return 0;
+}
- free_page(info);
- return len;
+static int sysinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sysinfo_show, NULL);
}
-static __init int create_proc_sysinfo(void)
+static const struct file_operations sysinfo_fops = {
+ .open = sysinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init sysinfo_create_proc(void)
{
- create_proc_read_entry("sysinfo", 0444, NULL,
- proc_read_sysinfo, NULL);
+ proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
return 0;
}
-device_initcall(create_proc_sysinfo);
+device_initcall(sysinfo_create_proc);
/*
* Service levels interface.
@@ -407,7 +386,7 @@ void s390_adjust_jiffies(void)
if (!info)
return;
- if (stsi(info, 1, 2, 2) != -ENOSYS) {
+ if (stsi(info, 1, 2, 2) == 0) {
/*
* Major sigh. The cpu capability encoding is "special".
* If the first 9 bits of info->capability are 0 then it
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dcec960..2db1011 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -329,7 +329,7 @@ static unsigned long clock_sync_flags;
* The synchronous get_clock function. It will write the current clock
* value to the clock pointer and return 0 if the clock is in sync with
* the external time source. If the clock mode is local it will return
- * -ENOSYS and -EAGAIN if the clock is not in sync with the external
+ * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
* reference.
*/
int get_sync_clock(unsigned long long *clock)
@@ -347,7 +347,7 @@ int get_sync_clock(unsigned long long *clock)
return 0;
if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
return -EACCES;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 05151e0..54d93f4 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/sysinfo.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -44,9 +45,6 @@ static struct mask_info book_info;
cpumask_t cpu_book_map[NR_CPUS];
unsigned char cpu_book_id[NR_CPUS];
-/* smp_cpu_state_mutex must be held when accessing this array */
-int cpu_polarization[NR_CPUS];
-
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
@@ -75,10 +73,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
{
unsigned int cpu;
- for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS);
- cpu < TOPOLOGY_CPU_BITS;
- cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1))
- {
+ for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) {
unsigned int rcpu;
int lcpu;
@@ -94,7 +89,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
} else {
cpu_core_id[lcpu] = core->id;
}
- cpu_set_polarization(lcpu, tl_cpu->pp);
+ smp_cpu_set_polarization(lcpu, tl_cpu->pp);
}
}
return core;
@@ -201,7 +196,7 @@ static void topology_update_polarization_simple(void)
mutex_lock(&smp_cpu_state_mutex);
for_each_possible_cpu(cpu)
- cpu_set_polarization(cpu, POLARIZATION_HRZ);
+ smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
mutex_unlock(&smp_cpu_state_mutex);
}
@@ -231,7 +226,7 @@ int topology_set_cpu_management(int fc)
if (rc)
return -EBUSY;
for_each_possible_cpu(cpu)
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
return rc;
}
@@ -250,12 +245,10 @@ static void update_cpu_core_map(void)
void store_topology(struct sysinfo_15_1_x *info)
{
- int rc;
-
- rc = stsi(info, 15, 1, 3);
- if (rc != -ENOSYS)
- return;
- stsi(info, 15, 1, 2);
+ if (topology_max_mnest >= 3)
+ stsi(info, 15, 1, 3);
+ else
+ stsi(info, 15, 1, 2);
}
int arch_update_cpu_topology(void)
@@ -415,7 +408,7 @@ static ssize_t cpu_polarization_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- switch (cpu_read_polarization(cpu)) {
+ switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
count = sprintf(buf, "horizontal\n");
break;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 01775c0..3d2b0fa3 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -57,6 +57,23 @@ static int kstack_depth_to_print = 12;
static int kstack_depth_to_print = 20;
#endif /* CONFIG_64BIT */
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+#ifdef CONFIG_64BIT
+ unsigned long address;
+
+ if (regs->int_code & 0x200)
+ address = *(unsigned long *)(current->thread.trap_tdb + 24);
+ else
+ address = regs->psw.addr;
+ return (void __user *)
+ ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+#else
+ return (void __user *)
+ ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+#endif
+}
+
/*
* For show_trace we have tree different stack to consider:
* - the panic stack which is used if the kernel stack has overflown
@@ -214,7 +231,6 @@ void show_registers(struct pt_regs *regs)
void show_regs(struct pt_regs *regs)
{
- print_modules();
printk("CPU: %d %s %s %.*s\n",
task_thread_info(current)->cpu, print_tainted(),
init_utsname()->release,
@@ -254,6 +270,7 @@ void die(struct pt_regs *regs, const char *str)
#endif
printk("\n");
notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+ print_modules();
show_regs(regs);
bust_spinlocks(0);
add_taint(TAINT_DIE);
@@ -285,12 +302,6 @@ int is_valid_bugaddr(unsigned long addr)
return 1;
}
-static inline void __user *get_psw_address(struct pt_regs *regs)
-{
- return (void __user *)
- ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-}
-
static void __kprobes do_trap(struct pt_regs *regs,
int si_signo, int si_code, char *str)
{
@@ -304,14 +315,14 @@ static void __kprobes do_trap(struct pt_regs *regs,
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
- info.si_addr = get_psw_address(regs);
+ info.si_addr = get_trap_ip(regs);
force_sig_info(si_signo, &info, current);
report_user_fault(regs, si_signo);
} else {
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (fixup)
- regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
else {
enum bug_trap_type btt;
@@ -381,6 +392,11 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
"translation exception")
+#ifdef CONFIG_64BIT
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+ "transaction constraint exception")
+#endif
+
static inline void do_fp_trap(struct pt_regs *regs, int fpc)
{
int si_code = 0;
@@ -408,7 +424,7 @@ static void __kprobes illegal_op(struct pt_regs *regs)
__u16 __user *location;
int signal = 0;
- location = get_psw_address(regs);
+ location = get_trap_ip(regs);
if (user_mode(regs)) {
if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
@@ -476,7 +492,7 @@ void specification_exception(struct pt_regs *regs)
__u16 __user *location = NULL;
int signal = 0;
- location = (__u16 __user *) get_psw_address(regs);
+ location = (__u16 __user *) get_trap_ip(regs);
if (user_mode(regs)) {
get_user(*((__u16 *) opcode), location);
@@ -525,7 +541,7 @@ static void data_exception(struct pt_regs *regs)
__u16 __user *location;
int signal = 0;
- location = get_psw_address(regs);
+ location = get_trap_ip(regs);
if (MACHINE_HAS_IEEE)
asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
@@ -641,6 +657,7 @@ void __init trap_init(void)
pgm_check_table[0x12] = &translation_exception;
pgm_check_table[0x13] = &special_op_exception;
#ifdef CONFIG_64BIT
+ pgm_check_table[0x18] = &transaction_exception;
pgm_check_table[0x38] = &do_asce_exception;
pgm_check_table[0x39] = &do_dat_exception;
pgm_check_table[0x3A] = &do_dat_exception;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 9a19ca3..d777628 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -85,7 +85,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
static void vdso_init_data(struct vdso_data *vd)
{
vd->ectg_available =
- addressing_mode != HOME_SPACE_MODE && test_facility(31);
+ s390_user_mode != HOME_SPACE_MODE && test_facility(31);
}
#ifdef CONFIG_64BIT
@@ -102,7 +102,7 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
lowcore->vdso_per_cpu_data = __LC_PASTE;
- if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
return 0;
segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
@@ -147,7 +147,7 @@ void vdso_free_per_cpu(struct _lowcore *lowcore)
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
- if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
return;
psal = (u32 *)(addr_t) lowcore->paste[4];
@@ -165,7 +165,7 @@ static void vdso_init_cr5(void)
{
unsigned long cr5;
- if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
return;
cr5 = offsetof(struct _lowcore, paste);
__ctl_load(cr5, 5, 5);
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 4fc97b4..7903344 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -99,7 +99,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
return virt_timer_forward(user + system);
}
-void account_vtime(struct task_struct *prev, struct task_struct *next)
+void vtime_task_switch(struct task_struct *prev)
{
struct thread_info *ti;
@@ -107,7 +107,7 @@ void account_vtime(struct task_struct *prev, struct task_struct *next)
ti = task_thread_info(prev);
ti->user_timer = S390_lowcore.user_timer;
ti->system_timer = S390_lowcore.system_timer;
- ti = task_thread_info(next);
+ ti = task_thread_info(current);
S390_lowcore.user_timer = ti->user_timer;
S390_lowcore.system_timer = ti->system_timer;
}
@@ -122,7 +122,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void account_system_vtime(struct task_struct *tsk)
+void vtime_account(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
@@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk)
virt_timer_forward(system);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(vtime_account);
void __kprobes vtime_stop_cpu(void)
{
@@ -378,9 +378,8 @@ static int __cpuinit s390_nohz_notify(struct notifier_block *self,
long cpu = (long) hcpu;
idle = &per_cpu(s390_idle, cpu);
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DYING:
- case CPU_DYING_FROZEN:
idle->nohz_delay = 0;
default:
break;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 78eb984..9b04a32 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -5,7 +5,7 @@ source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
def_bool y
- prompt "Virtualization"
+ prompt "KVM"
---help---
Say Y here to get to see options for using your Linux host to run other
operating systems inside virtual machines (guests).
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 60da903..310be61 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -211,7 +211,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
spin_unlock(&fi->lock);
/* deal with other level 3 hypervisors */
- if (stsi(mem, 3, 2, 2) == -ENOSYS)
+ if (stsi(mem, 3, 2, 2))
mem->count = 0;
if (mem->count < 8)
mem->count++;
@@ -259,7 +259,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
goto out_fail;
- if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+ if (stsi((void *) mem, fc, sel1, sel2))
goto out_mem;
break;
case 3:
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 761ab8b..6ab0d0b 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,7 @@
lib-y += delay.o string.o uaccess_std.o uaccess_pt.o
obj-y += usercopy.o
-obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o
+obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
+obj-$(CONFIG_64BIT) += mem64.o
lib-$(CONFIG_64BIT) += uaccess_mvcos.o
lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S
new file mode 100644
index 0000000..14ca924
--- /dev/null
+++ b/arch/s390/lib/mem32.S
@@ -0,0 +1,92 @@
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ * if (likely(c == 0))
+ * return __builtin_memset(s, 0, n);
+ * return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+ basr %r5,%r0
+.Lmemset_base:
+ ltr %r4,%r4
+ bzr %r14
+ ltr %r3,%r3
+ jnz .Lmemset_fill
+ ahi %r4,-1
+ lr %r3,%r4
+ srl %r3,8
+ ltr %r3,%r3
+ lr %r1,%r2
+ je .Lmemset_clear_rest
+.Lmemset_clear_loop:
+ xc 0(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brct %r3,.Lmemset_clear_loop
+.Lmemset_clear_rest:
+ ex %r4,.Lmemset_xc-.Lmemset_base(%r5)
+ br %r14
+.Lmemset_fill:
+ stc %r3,0(%r2)
+ chi %r4,1
+ lr %r1,%r2
+ ber %r14
+ ahi %r4,-2
+ lr %r3,%r4
+ srl %r3,8
+ ltr %r3,%r3
+ je .Lmemset_fill_rest
+.Lmemset_fill_loop:
+ mvc 1(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brct %r3,.Lmemset_fill_loop
+.Lmemset_fill_rest:
+ ex %r4,.Lmemset_mvc-.Lmemset_base(%r5)
+ br %r14
+.Lmemset_xc:
+ xc 0(1,%r1),0(%r1)
+.Lmemset_mvc:
+ mvc 1(1,%r1),0(%r1)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+ basr %r5,%r0
+.Lmemcpy_base:
+ ltr %r4,%r4
+ bzr %r14
+ ahi %r4,-1
+ lr %r0,%r4
+ srl %r0,8
+ ltr %r0,%r0
+ lr %r1,%r2
+ jnz .Lmemcpy_loop
+.Lmemcpy_rest:
+ ex %r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5)
+ br %r14
+.Lmemcpy_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brct %r0,.Lmemcpy_loop
+ j .Lmemcpy_rest
+.Lmemcpy_mvc:
+ mvc 0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem64.S
new file mode 100644
index 0000000..c6d553e
--- /dev/null
+++ b/arch/s390/lib/mem64.S
@@ -0,0 +1,88 @@
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ * if (likely(c == 0))
+ * return __builtin_memset(s, 0, n);
+ * return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+ ltgr %r4,%r4
+ bzr %r14
+ ltgr %r3,%r3
+ jnz .Lmemset_fill
+ aghi %r4,-1
+ srlg %r3,%r4,8
+ ltgr %r3,%r3
+ lgr %r1,%r2
+ jz .Lmemset_clear_rest
+.Lmemset_clear_loop:
+ xc 0(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r3,.Lmemset_clear_loop
+.Lmemset_clear_rest:
+ larl %r3,.Lmemset_xc
+ ex %r4,0(%r3)
+ br %r14
+.Lmemset_fill:
+ stc %r3,0(%r2)
+ cghi %r4,1
+ lgr %r1,%r2
+ ber %r14
+ aghi %r4,-2
+ srlg %r3,%r4,8
+ ltgr %r3,%r3
+ jz .Lmemset_fill_rest
+.Lmemset_fill_loop:
+ mvc 1(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r3,.Lmemset_fill_loop
+.Lmemset_fill_rest:
+ larl %r3,.Lmemset_mvc
+ ex %r4,0(%r3)
+ br %r14
+.Lmemset_xc:
+ xc 0(1,%r1),0(%r1)
+.Lmemset_mvc:
+ mvc 1(1,%r1),0(%r1)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+ ltgr %r4,%r4
+ bzr %r14
+ aghi %r4,-1
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ lgr %r1,%r2
+ jnz .Lmemcpy_loop
+.Lmemcpy_rest:
+ larl %r5,.Lmemcpy_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemcpy_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r5,.Lmemcpy_loop
+ j .Lmemcpy_rest
+.Lmemcpy_mvc:
+ mvc 0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 846ec64..b647d5f 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -43,11 +43,7 @@ static inline char *__strnend(const char *s, size_t n)
*/
size_t strlen(const char *s)
{
-#if __GNUC__ < 4
return __strend(s) - s;
-#else
- return __builtin_strlen(s);
-#endif
}
EXPORT_SYMBOL(strlen);
@@ -73,7 +69,6 @@ EXPORT_SYMBOL(strnlen);
*/
char *strcpy(char *dest, const char *src)
{
-#if __GNUC__ < 4
register int r0 asm("0") = 0;
char *ret = dest;
@@ -82,9 +77,6 @@ char *strcpy(char *dest, const char *src)
: "+&a" (dest), "+&a" (src) : "d" (r0)
: "cc", "memory" );
return ret;
-#else
- return __builtin_strcpy(dest, src);
-#endif
}
EXPORT_SYMBOL(strcpy);
@@ -106,7 +98,7 @@ size_t strlcpy(char *dest, const char *src, size_t size)
if (size) {
size_t len = (ret >= size) ? size-1 : ret;
dest[len] = '\0';
- __builtin_memcpy(dest, src, len);
+ memcpy(dest, src, len);
}
return ret;
}
@@ -124,8 +116,8 @@ EXPORT_SYMBOL(strlcpy);
char *strncpy(char *dest, const char *src, size_t n)
{
size_t len = __strnend(src, n) - src;
- __builtin_memset(dest + len, 0, n - len);
- __builtin_memcpy(dest, src, len);
+ memset(dest + len, 0, n - len);
+ memcpy(dest, src, len);
return dest;
}
EXPORT_SYMBOL(strncpy);
@@ -171,7 +163,7 @@ size_t strlcat(char *dest, const char *src, size_t n)
if (len >= n)
len = n - 1;
dest[len] = '\0';
- __builtin_memcpy(dest, src, len);
+ memcpy(dest, src, len);
}
return res;
}
@@ -194,7 +186,7 @@ char *strncat(char *dest, const char *src, size_t n)
char *p = __strend(dest);
p[len] = '\0';
- __builtin_memcpy(p, src, len);
+ memcpy(p, src, len);
return dest;
}
EXPORT_SYMBOL(strncat);
@@ -348,41 +340,3 @@ void *memscan(void *s, int c, size_t n)
return (void *) ret;
}
EXPORT_SYMBOL(memscan);
-
-/**
- * memcpy - Copy one area of memory to another
- * @dest: Where to copy to
- * @src: Where to copy from
- * @n: The size of the area.
- *
- * returns a pointer to @dest
- */
-void *memcpy(void *dest, const void *src, size_t n)
-{
- return __builtin_memcpy(dest, src, n);
-}
-EXPORT_SYMBOL(memcpy);
-
-/**
- * memset - Fill a region of memory with the given value
- * @s: Pointer to the start of the area.
- * @c: The byte to fill the area with
- * @n: The size of the area.
- *
- * returns a pointer to @s
- */
-void *memset(void *s, int c, size_t n)
-{
- char *xs;
-
- if (c == 0)
- return __builtin_memset(s, 0, n);
-
- xs = (char *) s;
- if (n > 0)
- do {
- *xs++ = c;
- } while (--n > 0);
- return s;
-}
-EXPORT_SYMBOL(memset);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60ee2b8..2d37bb8 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -2,69 +2,82 @@
* User access functions based on page table walks for enhanced
* system layout without hardware support.
*
- * Copyright IBM Corp. 2006
+ * Copyright IBM Corp. 2006, 2012
* Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
*/
#include <linux/errno.h>
#include <linux/hardirq.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <asm/uaccess.h>
#include <asm/futex.h>
#include "uaccess.h"
-static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr)
+
+/*
+ * Returns kernel address for user virtual address. If the returned address is
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
+ * contains the (negative) exception code.
+ */
+static __always_inline unsigned long follow_table(struct mm_struct *mm,
+ unsigned long addr, int write)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+ pte_t *ptep;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return (pte_t *) 0x3a;
+ return -0x3aUL;
pud = pud_offset(pgd, addr);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- return (pte_t *) 0x3b;
+ return -0x3bUL;
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return (pte_t *) 0x10;
+ if (pmd_none(*pmd))
+ return -0x10UL;
+ if (pmd_huge(*pmd)) {
+ if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
+ return -0x04UL;
+ return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);
+ }
+ if (unlikely(pmd_bad(*pmd)))
+ return -0x10UL;
+
+ ptep = pte_offset_map(pmd, addr);
+ if (!pte_present(*ptep))
+ return -0x11UL;
+ if (write && !pte_write(*ptep))
+ return -0x04UL;
- return pte_offset_map(pmd, addr);
+ return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
}
static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
size_t n, int write_user)
{
struct mm_struct *mm = current->mm;
- unsigned long offset, pfn, done, size;
- pte_t *pte;
+ unsigned long offset, done, size, kaddr;
void *from, *to;
done = 0;
retry:
spin_lock(&mm->page_table_lock);
do {
- pte = follow_table(mm, uaddr);
- if ((unsigned long) pte < 0x1000)
+ kaddr = follow_table(mm, uaddr, write_user);
+ if (IS_ERR_VALUE(kaddr))
goto fault;
- if (!pte_present(*pte)) {
- pte = (pte_t *) 0x11;
- goto fault;
- } else if (write_user && !pte_write(*pte)) {
- pte = (pte_t *) 0x04;
- goto fault;
- }
- pfn = pte_pfn(*pte);
- offset = uaddr & (PAGE_SIZE - 1);
+ offset = uaddr & ~PAGE_MASK;
size = min(n - done, PAGE_SIZE - offset);
if (write_user) {
- to = (void *)((pfn << PAGE_SHIFT) + offset);
+ to = (void *) kaddr;
from = kptr + done;
} else {
- from = (void *)((pfn << PAGE_SHIFT) + offset);
+ from = (void *) kaddr;
to = kptr + done;
}
memcpy(to, from, size);
@@ -75,7 +88,7 @@ retry:
return n - done;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, (unsigned long) pte, write_user))
+ if (__handle_fault(uaddr, -kaddr, write_user))
return n - done;
goto retry;
}
@@ -84,27 +97,22 @@ fault:
* Do DAT for user address by page table walk, return kernel address.
* This function needs to be called with current->mm->page_table_lock held.
*/
-static __always_inline unsigned long __dat_user_addr(unsigned long uaddr)
+static __always_inline unsigned long __dat_user_addr(unsigned long uaddr,
+ int write)
{
struct mm_struct *mm = current->mm;
- unsigned long pfn;
- pte_t *pte;
+ unsigned long kaddr;
int rc;
retry:
- pte = follow_table(mm, uaddr);
- if ((unsigned long) pte < 0x1000)
- goto fault;
- if (!pte_present(*pte)) {
- pte = (pte_t *) 0x11;
+ kaddr = follow_table(mm, uaddr, write);
+ if (IS_ERR_VALUE(kaddr))
goto fault;
- }
- pfn = pte_pfn(*pte);
- return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
+ return kaddr;
fault:
spin_unlock(&mm->page_table_lock);
- rc = __handle_fault(uaddr, (unsigned long) pte, 0);
+ rc = __handle_fault(uaddr, -kaddr, write);
spin_lock(&mm->page_table_lock);
if (!rc)
goto retry;
@@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to)
static size_t strnlen_user_pt(size_t count, const char __user *src)
{
- char *addr;
unsigned long uaddr = (unsigned long) src;
struct mm_struct *mm = current->mm;
- unsigned long offset, pfn, done, len;
- pte_t *pte;
+ unsigned long offset, done, len, kaddr;
size_t len_str;
if (segment_eq(get_fs(), KERNEL_DS))
@@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src)
retry:
spin_lock(&mm->page_table_lock);
do {
- pte = follow_table(mm, uaddr);
- if ((unsigned long) pte < 0x1000)
- goto fault;
- if (!pte_present(*pte)) {
- pte = (pte_t *) 0x11;
+ kaddr = follow_table(mm, uaddr, 0);
+ if (IS_ERR_VALUE(kaddr))
goto fault;
- }
- pfn = pte_pfn(*pte);
- offset = uaddr & (PAGE_SIZE-1);
- addr = (char *)(pfn << PAGE_SHIFT) + offset;
+ offset = uaddr & ~PAGE_MASK;
len = min(count - done, PAGE_SIZE - offset);
- len_str = strnlen(addr, len);
+ len_str = strnlen((char *) kaddr, len);
done += len_str;
uaddr += len_str;
} while ((len_str == len) && (done < count));
@@ -192,7 +192,7 @@ retry:
return done + 1;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, (unsigned long) pte, 0))
+ if (__handle_fault(uaddr, -kaddr, 0))
return 0;
goto retry;
}
@@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
const void __user *from)
{
struct mm_struct *mm = current->mm;
- unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
- uaddr, done, size, error_code;
+ unsigned long offset_max, uaddr, done, size, error_code;
unsigned long uaddr_from = (unsigned long) from;
unsigned long uaddr_to = (unsigned long) to;
- pte_t *pte_from, *pte_to;
+ unsigned long kaddr_to, kaddr_from;
int write_user;
if (segment_eq(get_fs(), KERNEL_DS)) {
@@ -242,38 +241,23 @@ retry:
do {
write_user = 0;
uaddr = uaddr_from;
- pte_from = follow_table(mm, uaddr_from);
- error_code = (unsigned long) pte_from;
- if (error_code < 0x1000)
- goto fault;
- if (!pte_present(*pte_from)) {
- error_code = 0x11;
+ kaddr_from = follow_table(mm, uaddr_from, 0);
+ error_code = kaddr_from;
+ if (IS_ERR_VALUE(error_code))
goto fault;
- }
write_user = 1;
uaddr = uaddr_to;
- pte_to = follow_table(mm, uaddr_to);
- error_code = (unsigned long) pte_to;
- if (error_code < 0x1000)
- goto fault;
- if (!pte_present(*pte_to)) {
- error_code = 0x11;
+ kaddr_to = follow_table(mm, uaddr_to, 1);
+ error_code = (unsigned long) kaddr_to;
+ if (IS_ERR_VALUE(error_code))
goto fault;
- } else if (!pte_write(*pte_to)) {
- error_code = 0x04;
- goto fault;
- }
- pfn_from = pte_pfn(*pte_from);
- pfn_to = pte_pfn(*pte_to);
- offset_from = uaddr_from & (PAGE_SIZE-1);
- offset_to = uaddr_from & (PAGE_SIZE-1);
- offset_max = max(offset_from, offset_to);
+ offset_max = max(uaddr_from & ~PAGE_MASK,
+ uaddr_to & ~PAGE_MASK);
size = min(n - done, PAGE_SIZE - offset_max);
- memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
- (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
+ memcpy((void *) kaddr_to, (void *) kaddr_from, size);
done += size;
uaddr_from += size;
uaddr_to += size;
@@ -282,7 +266,7 @@ retry:
return n - done;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, error_code, write_user))
+ if (__handle_fault(uaddr, -error_code, write_user))
return n - done;
goto retry;
}
@@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
return __futex_atomic_op_pt(op, uaddr, oparg, old);
spin_lock(&current->mm->page_table_lock);
uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr);
+ __dat_user_addr((__force unsigned long) uaddr, 1);
if (!uaddr) {
spin_unlock(&current->mm->page_table_lock);
return -EFAULT;
@@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
spin_lock(&current->mm->page_table_lock);
uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr);
+ __dat_user_addr((__force unsigned long) uaddr, 1);
if (!uaddr) {
spin_unlock(&current->mm->page_table_lock);
return -EFAULT;
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index d98fe90..0f5536b 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -3,7 +3,7 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
- page-states.o gup.o
+ page-states.o gup.o extable.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
new file mode 100644
index 0000000..4d1ee88
--- /dev/null
+++ b/arch/s390/mm/extable.c
@@ -0,0 +1,81 @@
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+/*
+ * Search one exception table for an entry corresponding to the
+ * given instruction address, and return the address of the entry,
+ * or NULL if none is found.
+ * We use a binary search, and thus we assume that the table is
+ * already sorted.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ const struct exception_table_entry *mid;
+ unsigned long addr;
+
+ while (first <= last) {
+ mid = ((last - first) >> 1) + first;
+ addr = extable_insn(mid);
+ if (addr < value)
+ first = mid + 1;
+ else if (addr > value)
+ last = mid - 1;
+ else
+ return mid;
+ }
+ return NULL;
+}
+
+/*
+ * The exception table needs to be sorted so that the binary
+ * search that we use to find entries in it works properly.
+ * This is used both for the kernel exception table and for
+ * the exception tables of modules that get loaded.
+ *
+ */
+static int cmp_ex(const void *a, const void *b)
+{
+ const struct exception_table_entry *x = a, *y = b;
+
+ /* This compare is only valid after normalization. */
+ return x->insn - y->insn;
+}
+
+void sort_extable(struct exception_table_entry *start,
+ struct exception_table_entry *finish)
+{
+ struct exception_table_entry *p;
+ int i;
+
+ /* Normalize entries to being relative to the start of the section */
+ for (p = start, i = 0; p < finish; p++, i += 8)
+ p->insn += i;
+ sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
+ /* Denormalize all entries */
+ for (p = start, i = 0; p < finish; p++, i += 8)
+ p->insn -= i;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+ /* Trim the beginning */
+ while (m->num_exentries &&
+ within_module_init(extable_insn(&m->extable[0]), m)) {
+ m->extable++;
+ m->num_exentries--;
+ }
+ /* Trim the end */
+ while (m->num_exentries &&
+ within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
+ m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 6c013f5..ac9122c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -111,7 +111,7 @@ static inline int user_space_fault(unsigned long trans_exc_code)
if (trans_exc_code == 2)
/* Access via secondary space, set_fs setting decides */
return current->thread.mm_segment.ar4;
- if (addressing_mode == HOME_SPACE_MODE)
+ if (s390_user_mode == HOME_SPACE_MODE)
/* User space if the access has been done via home space. */
return trans_exc_code == 3;
/*
@@ -163,7 +163,7 @@ static noinline void do_no_context(struct pt_regs *regs)
/* Are we prepared to handle this kernel fault? */
fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (fixup) {
- regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
return;
}
@@ -628,9 +628,8 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
struct thread_struct *thread, *next;
struct task_struct *tsk;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DEAD:
- case CPU_DEAD_FROZEN:
spin_lock_irq(&pfault_lock);
list_for_each_entry_safe(thread, next, &pfault_list, list) {
thread->pfault_wait = 0;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 65cb06e..eeaf802 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -154,6 +154,43 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
return 1;
}
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp, pgd;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
+
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd = *pgdp;
+ barrier();
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6adbc08..81e596c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -42,7 +42,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
-static unsigned long setup_zero_pages(void)
+static unsigned long __init setup_zero_pages(void)
{
struct cpuid cpu_id;
unsigned int order;
@@ -212,7 +212,7 @@ void free_initmem(void)
}
#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
+void __init free_initrd_mem(unsigned long start, unsigned long end)
{
free_init_pages("initrd memory", start, end);
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 18df31d..b402991 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -609,8 +609,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
*/
unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
{
- struct page *page;
- unsigned long *table;
+ unsigned long *uninitialized_var(table);
+ struct page *uninitialized_var(page);
unsigned int mask, bit;
if (mm_has_pgste(mm))
@@ -796,7 +796,7 @@ int s390_enable_sie(void)
struct mm_struct *mm, *old_mm;
/* Do we have switched amode? If no, we cannot do sie */
- if (addressing_mode == HOME_SPACE_MODE)
+ if (s390_user_mode == HOME_SPACE_MODE)
return -EINVAL;
/* Do we have pgstes? if yes, we are done */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6f896e7..c22abf9 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -107,7 +107,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
pm_dir = pmd_offset(pu_dir, address);
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
(address + HPAGE_SIZE <= start + size) &&
(address >= HPAGE_SIZE)) {
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
new file mode 100644
index 0000000..90568c3
--- /dev/null
+++ b/arch/s390/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
new file mode 100644
index 0000000..7e45d13
--- /dev/null
+++ b/arch/s390/net/bpf_jit.S
@@ -0,0 +1,130 @@
+/*
+ * BPF Jit compiler for s390, help functions.
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/linkage.h>
+
+/*
+ * Calling convention:
+ * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved
+ * %r2: skb pointer
+ * %r3: offset parameter
+ * %r5: BPF A accumulator
+ * %r8: return address
+ * %r9: save register for skb pointer
+ * %r10: skb->data
+ * %r11: skb->len - skb->data_len (headlen)
+ * %r12: BPF X accumulator
+ *
+ * skb_copy_bits takes 4 parameters:
+ * %r2 = skb pointer
+ * %r3 = offset into skb data
+ * %r4 = length to copy
+ * %r5 = pointer to temp buffer
+ */
+#define SKBDATA %r8
+
+ /* A = *(u32 *) (skb->data+K+X) */
+ENTRY(sk_load_word_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u32 *) (skb->data+K) */
+ENTRY(sk_load_word)
+ llgfr %r1,%r3 # extend offset
+ ahi %r3,4 # offset + 4
+ clr %r11,%r3 # hlen <= offset + 4 ?
+ jl sk_load_word_slow
+ l %r5,0(%r1,%r10) # get word from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_word_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,4 # 4 bytes
+ la %r5,160(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = *(u16 *) (skb->data+K+X) */
+ENTRY(sk_load_half_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u16 *) (skb->data+K) */
+ENTRY(sk_load_half)
+ llgfr %r1,%r3 # extend offset
+ ahi %r3,2 # offset + 2
+ clr %r11,%r3 # hlen <= offset + 2 ?
+ jl sk_load_half_slow
+ llgh %r5,0(%r1,%r10) # get half from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_half_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,2 # 2 bytes
+ la %r5,162(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(2,%r15),160(%r15)
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = *(u8 *) (skb->data+K+X) */
+ENTRY(sk_load_byte_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u8 *) (skb->data+K) */
+ENTRY(sk_load_byte)
+ llgfr %r1,%r3 # extend offset
+ clr %r11,%r3 # hlen < offset ?
+ jle sk_load_byte_slow
+ lhi %r5,0
+ ic %r5,0(%r1,%r10) # get byte from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_byte_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,1 # 1 bytes
+ la %r5,163(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(3,%r15),160(%r15)
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+ENTRY(sk_load_byte_msh)
+ llgfr %r1,%r3 # extend offset
+ clr %r11,%r3 # hlen < offset ?
+ jle sk_load_byte_slow
+ lhi %r12,0
+ ic %r12,0(%r1,%r10) # get byte from skb
+ nill %r12,0x0f
+ sll %r12,2
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_byte_msh_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,2 # 2 bytes
+ la %r5,162(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(3,%r15),160(%r15)
+ l %r12,160(%r15) # load result from temp buffer
+ nill %r12,0x0f
+ sll %r12,2
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
new file mode 100644
index 0000000..9b355b4
--- /dev/null
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -0,0 +1,776 @@
+/*
+ * BPF Jit compiler for s390.
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/moduleloader.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <asm/cacheflush.h>
+#include <asm/processor.h>
+#include <asm/facility.h>
+
+/*
+ * Conventions:
+ * %r2 = skb pointer
+ * %r3 = offset parameter
+ * %r4 = scratch register / length parameter
+ * %r5 = BPF A accumulator
+ * %r8 = return address
+ * %r9 = save register for skb pointer
+ * %r10 = skb->data
+ * %r11 = skb->len - skb->data_len (headlen)
+ * %r12 = BPF X accumulator
+ * %r13 = literal pool pointer
+ * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+struct bpf_jit {
+ unsigned int seen;
+ u8 *start;
+ u8 *prg;
+ u8 *mid;
+ u8 *lit;
+ u8 *end;
+ u8 *base_ip;
+ u8 *ret0_ip;
+ u8 *exit_ip;
+ unsigned int off_load_word;
+ unsigned int off_load_half;
+ unsigned int off_load_byte;
+ unsigned int off_load_bmsh;
+ unsigned int off_load_iword;
+ unsigned int off_load_ihalf;
+ unsigned int off_load_ibyte;
+};
+
+#define BPF_SIZE_MAX 4096 /* Max size for program */
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG 2 /* ebx is used */
+#define SEEN_MEM 4 /* use mem[] for temporary storage */
+#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */
+#define SEEN_LITERAL 16 /* code uses literals */
+#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */
+#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */
+#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */
+#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */
+#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */
+#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */
+#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */
+
+#define EMIT2(op) \
+({ \
+ if (jit->prg + 2 <= jit->mid) \
+ *(u16 *) jit->prg = op; \
+ jit->prg += 2; \
+})
+
+#define EMIT4(op) \
+({ \
+ if (jit->prg + 4 <= jit->mid) \
+ *(u32 *) jit->prg = op; \
+ jit->prg += 4; \
+})
+
+#define EMIT4_DISP(op, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ EMIT4(op | __disp); \
+})
+
+#define EMIT4_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm) & 0xffff; \
+ EMIT4(op | __imm); \
+})
+
+#define EMIT4_PCREL(op, pcrel) \
+({ \
+ long __pcrel = ((pcrel) >> 1) & 0xffff; \
+ EMIT4(op | __pcrel); \
+})
+
+#define EMIT6(op1, op2) \
+({ \
+ if (jit->prg + 6 <= jit->mid) { \
+ *(u32 *) jit->prg = op1; \
+ *(u16 *) (jit->prg + 4) = op2; \
+ } \
+ jit->prg += 6; \
+})
+
+#define EMIT6_DISP(op1, op2, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ EMIT6(op1 | __disp, op2); \
+})
+
+#define EMIT6_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm); \
+ EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+})
+
+#define EMIT_CONST(val) \
+({ \
+ unsigned int ret; \
+ ret = (unsigned int) (jit->lit - jit->base_ip); \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->lit + 4 <= jit->end) \
+ *(u32 *) jit->lit = val; \
+ jit->lit += 4; \
+ ret; \
+})
+
+#define EMIT_FN_CONST(bit, fn) \
+({ \
+ unsigned int ret; \
+ ret = (unsigned int) (jit->lit - jit->base_ip); \
+ if (jit->seen & bit) { \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->lit + 8 <= jit->end) \
+ *(void **) jit->lit = fn; \
+ jit->lit += 8; \
+ } \
+ ret; \
+})
+
+static void bpf_jit_prologue(struct bpf_jit *jit)
+{
+ /* Save registers and create stack frame if necessary */
+ if (jit->seen & SEEN_DATAREF) {
+ /* stmg %r8,%r15,88(%r15) */
+ EMIT6(0xeb8ff058, 0x0024);
+ /* lgr %r14,%r15 */
+ EMIT4(0xb90400ef);
+ /* ahi %r15,<offset> */
+ EMIT4_IMM(0xa7fa0000, (jit->seen & SEEN_MEM) ? -112 : -80);
+ /* stg %r14,152(%r15) */
+ EMIT6(0xe3e0f098, 0x0024);
+ } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
+ /* stmg %r12,%r13,120(%r15) */
+ EMIT6(0xebcdf078, 0x0024);
+ else if (jit->seen & SEEN_XREG)
+ /* stg %r12,120(%r15) */
+ EMIT6(0xe3c0f078, 0x0024);
+ else if (jit->seen & SEEN_LITERAL)
+ /* stg %r13,128(%r15) */
+ EMIT6(0xe3d0f080, 0x0024);
+
+ /* Setup literal pool */
+ if (jit->seen & SEEN_LITERAL) {
+ /* basr %r13,0 */
+ EMIT2(0x0dd0);
+ jit->base_ip = jit->prg;
+ }
+ jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word);
+ jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half);
+ jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte);
+ jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh);
+ jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind);
+ jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind);
+ jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind);
+
+ /* Filter needs to access skb data */
+ if (jit->seen & SEEN_DATAREF) {
+ /* l %r11,<len>(%r2) */
+ EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len));
+ /* s %r11,<data_len>(%r2) */
+ EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len));
+ /* lg %r10,<data>(%r2) */
+ EMIT6_DISP(0xe3a02000, 0x0004,
+ offsetof(struct sk_buff, data));
+ }
+}
+
+static void bpf_jit_epilogue(struct bpf_jit *jit)
+{
+ /* Return 0 */
+ if (jit->seen & SEEN_RET0) {
+ jit->ret0_ip = jit->prg;
+ /* lghi %r2,0 */
+ EMIT4(0xa7290000);
+ }
+ jit->exit_ip = jit->prg;
+ /* Restore registers */
+ if (jit->seen & SEEN_DATAREF)
+ /* lmg %r8,%r15,<offset>(%r15) */
+ EMIT6_DISP(0xeb8ff000, 0x0004,
+ (jit->seen & SEEN_MEM) ? 200 : 168);
+ else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
+ /* lmg %r12,%r13,120(%r15) */
+ EMIT6(0xebcdf078, 0x0004);
+ else if (jit->seen & SEEN_XREG)
+ /* lg %r12,120(%r15) */
+ EMIT6(0xe3c0f078, 0x0004);
+ else if (jit->seen & SEEN_LITERAL)
+ /* lg %r13,128(%r15) */
+ EMIT6(0xe3d0f080, 0x0004);
+ /* br %r14 */
+ EMIT2(0x07fe);
+}
+
+/*
+ * make sure we dont leak kernel information to user
+ */
+static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
+{
+ /* Clear temporary memory if (seen & SEEN_MEM) */
+ if (jit->seen & SEEN_MEM)
+ /* xc 0(64,%r15),0(%r15) */
+ EMIT6(0xd73ff000, 0xf000);
+ /* Clear X if (seen & SEEN_XREG) */
+ if (jit->seen & SEEN_XREG)
+ /* lhi %r12,0 */
+ EMIT4(0xa7c80000);
+ /* Clear A if the first register does not set it. */
+ switch (filter[0].code) {
+ case BPF_S_LD_W_ABS:
+ case BPF_S_LD_H_ABS:
+ case BPF_S_LD_B_ABS:
+ case BPF_S_LD_W_LEN:
+ case BPF_S_LD_W_IND:
+ case BPF_S_LD_H_IND:
+ case BPF_S_LD_B_IND:
+ case BPF_S_LDX_B_MSH:
+ case BPF_S_LD_IMM:
+ case BPF_S_LD_MEM:
+ case BPF_S_MISC_TXA:
+ case BPF_S_ANC_PROTOCOL:
+ case BPF_S_ANC_PKTTYPE:
+ case BPF_S_ANC_IFINDEX:
+ case BPF_S_ANC_MARK:
+ case BPF_S_ANC_QUEUE:
+ case BPF_S_ANC_HATYPE:
+ case BPF_S_ANC_RXHASH:
+ case BPF_S_ANC_CPU:
+ case BPF_S_RET_K:
+ /* first instruction sets A register */
+ break;
+ default: /* A = 0 */
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ }
+}
+
+static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
+ unsigned int *addrs, int i, int last)
+{
+ unsigned int K;
+ int offset;
+ unsigned int mask;
+
+ K = filter->k;
+ switch (filter->code) {
+ case BPF_S_ALU_ADD_X: /* A += X */
+ jit->seen |= SEEN_XREG;
+ /* ar %r5,%r12 */
+ EMIT2(0x1a5c);
+ break;
+ case BPF_S_ALU_ADD_K: /* A += K */
+ if (!K)
+ break;
+ if (K <= 16383)
+ /* ahi %r5,<K> */
+ EMIT4_IMM(0xa75a0000, K);
+ else if (test_facility(21))
+ /* alfi %r5,<K> */
+ EMIT6_IMM(0xc25b0000, K);
+ else
+ /* a %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
+ break;
+ case BPF_S_ALU_SUB_X: /* A -= X */
+ jit->seen |= SEEN_XREG;
+ /* sr %r5,%r12 */
+ EMIT2(0x1b5c);
+ break;
+ case BPF_S_ALU_SUB_K: /* A -= K */
+ if (!K)
+ break;
+ if (K <= 16384)
+ /* ahi %r5,-K */
+ EMIT4_IMM(0xa75a0000, -K);
+ else if (test_facility(21))
+ /* alfi %r5,-K */
+ EMIT6_IMM(0xc25b0000, -K);
+ else
+ /* s %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
+ break;
+ case BPF_S_ALU_MUL_X: /* A *= X */
+ jit->seen |= SEEN_XREG;
+ /* msr %r5,%r12 */
+ EMIT4(0xb252005c);
+ break;
+ case BPF_S_ALU_MUL_K: /* A *= K */
+ if (K <= 16383)
+ /* mhi %r5,K */
+ EMIT4_IMM(0xa75c0000, K);
+ else if (test_facility(34))
+ /* msfi %r5,<K> */
+ EMIT6_IMM(0xc2510000, K);
+ else
+ /* ms %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x7150d000, EMIT_CONST(K));
+ break;
+ case BPF_S_ALU_DIV_X: /* A /= X */
+ jit->seen |= SEEN_XREG | SEEN_RET0;
+ /* ltr %r12,%r12 */
+ EMIT2(0x12cc);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dr %r4,%r12 */
+ EMIT2(0x1d4c);
+ break;
+ case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */
+ /* m %r4,<d(K)>(%r13) */
+ EMIT4_DISP(0x5c40d000, EMIT_CONST(K));
+ /* lr %r5,%r4 */
+ EMIT2(0x1854);
+ break;
+ case BPF_S_ALU_AND_X: /* A &= X */
+ jit->seen |= SEEN_XREG;
+ /* nr %r5,%r12 */
+ EMIT2(0x145c);
+ break;
+ case BPF_S_ALU_AND_K: /* A &= K */
+ if (test_facility(21))
+ /* nilf %r5,<K> */
+ EMIT6_IMM(0xc05b0000, K);
+ else
+ /* n %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5450d000, EMIT_CONST(K));
+ break;
+ case BPF_S_ALU_OR_X: /* A |= X */
+ jit->seen |= SEEN_XREG;
+ /* or %r5,%r12 */
+ EMIT2(0x165c);
+ break;
+ case BPF_S_ALU_OR_K: /* A |= K */
+ if (test_facility(21))
+ /* oilf %r5,<K> */
+ EMIT6_IMM(0xc05d0000, K);
+ else
+ /* o %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5650d000, EMIT_CONST(K));
+ break;
+ case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
+ jit->seen |= SEEN_XREG;
+ /* xr %r5,%r12 */
+ EMIT2(0x175c);
+ break;
+ case BPF_S_ALU_LSH_X: /* A <<= X; */
+ jit->seen |= SEEN_XREG;
+ /* sll %r5,0(%r12) */
+ EMIT4(0x8950c000);
+ break;
+ case BPF_S_ALU_LSH_K: /* A <<= K */
+ if (K == 0)
+ break;
+ /* sll %r5,K */
+ EMIT4_DISP(0x89500000, K);
+ break;
+ case BPF_S_ALU_RSH_X: /* A >>= X; */
+ jit->seen |= SEEN_XREG;
+ /* srl %r5,0(%r12) */
+ EMIT4(0x8850c000);
+ break;
+ case BPF_S_ALU_RSH_K: /* A >>= K; */
+ if (K == 0)
+ break;
+ /* srl %r5,K */
+ EMIT4_DISP(0x88500000, K);
+ break;
+ case BPF_S_ALU_NEG: /* A = -A */
+ /* lnr %r5,%r5 */
+ EMIT2(0x1155);
+ break;
+ case BPF_S_JMP_JA: /* ip += K */
+ offset = addrs[i + K] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7f40000, offset);
+ break;
+ case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */
+ mask = 0x200000; /* jh */
+ goto kbranch;
+ case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */
+ mask = 0xa00000; /* jhe */
+ goto kbranch;
+ case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */
+ mask = 0x800000; /* je */
+kbranch: /* Emit compare if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ if (K <= 16383)
+ /* chi %r5,<K> */
+ EMIT4_IMM(0xa75e0000, K);
+ else if (test_facility(21))
+ /* clfi %r5,<K> */
+ EMIT6_IMM(0xc25f0000, K);
+ else
+ /* c %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5950d000, EMIT_CONST(K));
+ }
+branch: if (filter->jt == filter->jf) {
+ if (filter->jt == 0)
+ break;
+ /* j <jt> */
+ offset = addrs[i + filter->jt] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7f40000, offset);
+ break;
+ }
+ if (filter->jt != 0) {
+ /* brc <mask>,<jt> */
+ offset = addrs[i + filter->jt] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7040000 | mask, offset);
+ }
+ if (filter->jf != 0) {
+ /* brc <mask^15>,<jf> */
+ offset = addrs[i + filter->jf] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
+ }
+ break;
+ case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */
+ mask = 0x700000; /* jnz */
+ /* Emit test if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ if (K > 65535) {
+ /* lr %r4,%r5 */
+ EMIT2(0x1845);
+ /* n %r4,<d(K)>(%r13) */
+ EMIT4_DISP(0x5440d000, EMIT_CONST(K));
+ } else
+ /* tmll %r5,K */
+ EMIT4_IMM(0xa7510000, K);
+ }
+ goto branch;
+ case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */
+ mask = 0x200000; /* jh */
+ goto xbranch;
+ case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */
+ mask = 0xa00000; /* jhe */
+ goto xbranch;
+ case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */
+ mask = 0x800000; /* je */
+xbranch: /* Emit compare if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ jit->seen |= SEEN_XREG;
+ /* cr %r5,%r12 */
+ EMIT2(0x195c);
+ }
+ goto branch;
+ case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */
+ mask = 0x700000; /* jnz */
+ /* Emit test if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ jit->seen |= SEEN_XREG;
+ /* lr %r4,%r5 */
+ EMIT2(0x1845);
+ /* nr %r4,%r12 */
+ EMIT2(0x144c);
+ }
+ goto branch;
+ case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
+ offset = jit->off_load_word;
+ goto load_abs;
+ case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
+ offset = jit->off_load_half;
+ goto load_abs;
+ case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
+ offset = jit->off_load_byte;
+load_abs: if ((int) K < 0)
+ goto out;
+call_fn: /* lg %r1,<d(function)>(%r13) */
+ EMIT6_DISP(0xe310d000, 0x0004, offset);
+ /* l %r3,<d(K)>(%r13) */
+ EMIT4_DISP(0x5830d000, EMIT_CONST(K));
+ /* basr %r8,%r1 */
+ EMIT2(0x0d81);
+ /* jnz <ret0> */
+ EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
+ break;
+ case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
+ offset = jit->off_load_iword;
+ goto call_fn;
+ case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
+ offset = jit->off_load_ihalf;
+ goto call_fn;
+ case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
+ offset = jit->off_load_ibyte;
+ goto call_fn;
+ case BPF_S_LDX_B_MSH:
+ /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+ jit->seen |= SEEN_RET0;
+ if ((int) K < 0) {
+ /* j <ret0> */
+ EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg));
+ break;
+ }
+ jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
+ offset = jit->off_load_bmsh;
+ goto call_fn;
+ case BPF_S_LD_W_LEN: /* A = skb->len; */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+ /* l %r5,<d(len)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
+ break;
+ case BPF_S_LDX_W_LEN: /* X = skb->len; */
+ jit->seen |= SEEN_XREG;
+ /* l %r12,<d(len)>(%r2) */
+ EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
+ break;
+ case BPF_S_LD_IMM: /* A = K */
+ if (K <= 16383)
+ /* lhi %r5,K */
+ EMIT4_IMM(0xa7580000, K);
+ else if (test_facility(21))
+ /* llilf %r5,<K> */
+ EMIT6_IMM(0xc05f0000, K);
+ else
+ /* l %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5850d000, EMIT_CONST(K));
+ break;
+ case BPF_S_LDX_IMM: /* X = K */
+ jit->seen |= SEEN_XREG;
+ if (K <= 16383)
+ /* lhi %r12,<K> */
+ EMIT4_IMM(0xa7c80000, K);
+ else if (test_facility(21))
+ /* llilf %r12,<K> */
+ EMIT6_IMM(0xc0cf0000, K);
+ else
+ /* l %r12,<d(K)>(%r13) */
+ EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
+ break;
+ case BPF_S_LD_MEM: /* A = mem[K] */
+ jit->seen |= SEEN_MEM;
+ /* l %r5,<K>(%r15) */
+ EMIT4_DISP(0x5850f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_S_LDX_MEM: /* X = mem[K] */
+ jit->seen |= SEEN_XREG | SEEN_MEM;
+ /* l %r12,<K>(%r15) */
+ EMIT4_DISP(0x58c0f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_S_MISC_TAX: /* X = A */
+ jit->seen |= SEEN_XREG;
+ /* lr %r12,%r5 */
+ EMIT2(0x18c5);
+ break;
+ case BPF_S_MISC_TXA: /* A = X */
+ jit->seen |= SEEN_XREG;
+ /* lr %r5,%r12 */
+ EMIT2(0x185c);
+ break;
+ case BPF_S_RET_K:
+ if (K == 0) {
+ jit->seen |= SEEN_RET0;
+ if (last)
+ break;
+ /* j <ret0> */
+ EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg);
+ } else {
+ if (K <= 16383)
+ /* lghi %r2,K */
+ EMIT4_IMM(0xa7290000, K);
+ else
+ /* llgf %r2,<K>(%r13) */
+ EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K));
+ /* j <exit> */
+ if (last && !(jit->seen & SEEN_RET0))
+ break;
+ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ }
+ break;
+ case BPF_S_RET_A:
+ /* llgfr %r2,%r5 */
+ EMIT4(0xb9160025);
+ /* j <exit> */
+ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ break;
+ case BPF_S_ST: /* mem[K] = A */
+ jit->seen |= SEEN_MEM;
+ /* st %r5,<K>(%r15) */
+ EMIT4_DISP(0x5050f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+ jit->seen |= SEEN_XREG | SEEN_MEM;
+ /* st %r12,<K>(%r15) */
+ EMIT4_DISP(0x50c0f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(protocol)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
+ break;
+ case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0;
+ * A = skb->dev->ifindex */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+ jit->seen |= SEEN_RET0;
+ /* lg %r1,<d(dev)>(%r2) */
+ EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
+ /* ltgr %r1,%r1 */
+ EMIT4(0xb9020011);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* l %r5,<d(ifindex)>(%r1) */
+ EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
+ break;
+ case BPF_S_ANC_MARK: /* A = skb->mark */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ /* l %r5,<d(mark)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
+ break;
+ case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(queue_mapping)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
+ break;
+ case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0;
+ * A = skb->dev->type */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ jit->seen |= SEEN_RET0;
+ /* lg %r1,<d(dev)>(%r2) */
+ EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
+ /* ltgr %r1,%r1 */
+ EMIT4(0xb9020011);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(type)>(%r1) */
+ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
+ break;
+ case BPF_S_ANC_RXHASH: /* A = skb->rxhash */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+ /* l %r5,<d(rxhash)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash));
+ break;
+ case BPF_S_ANC_CPU: /* A = smp_processor_id() */
+#ifdef CONFIG_SMP
+ /* l %r5,<d(cpu_nr)> */
+ EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
+#else
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+#endif
+ break;
+ default: /* too complex, give up */
+ goto out;
+ }
+ addrs[i] = jit->prg - jit->start;
+ return 0;
+out:
+ return -1;
+}
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+ unsigned long size, prg_len, lit_len;
+ struct bpf_jit jit, cjit;
+ unsigned int *addrs;
+ int pass, i;
+
+ if (!bpf_jit_enable)
+ return;
+ addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL);
+ if (addrs == NULL)
+ return;
+ memset(addrs, 0, fp->len * sizeof(*addrs));
+ memset(&jit, 0, sizeof(cjit));
+ memset(&cjit, 0, sizeof(cjit));
+
+ for (pass = 0; pass < 10; pass++) {
+ jit.prg = jit.start;
+ jit.lit = jit.mid;
+
+ bpf_jit_prologue(&jit);
+ bpf_jit_noleaks(&jit, fp->insns);
+ for (i = 0; i < fp->len; i++) {
+ if (bpf_jit_insn(&jit, fp->insns + i, addrs, i,
+ i == fp->len - 1))
+ goto out;
+ }
+ bpf_jit_epilogue(&jit);
+ if (jit.start) {
+ WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit);
+ if (memcmp(&jit, &cjit, sizeof(jit)) == 0)
+ break;
+ } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
+ prg_len = jit.prg - jit.start;
+ lit_len = jit.lit - jit.mid;
+ size = max_t(unsigned long, prg_len + lit_len,
+ sizeof(struct work_struct));
+ if (size >= BPF_SIZE_MAX)
+ goto out;
+ jit.start = module_alloc(size);
+ if (!jit.start)
+ goto out;
+ jit.prg = jit.mid = jit.start + prg_len;
+ jit.lit = jit.end = jit.start + prg_len + lit_len;
+ jit.base_ip += (unsigned long) jit.start;
+ jit.exit_ip += (unsigned long) jit.start;
+ jit.ret0_ip += (unsigned long) jit.start;
+ }
+ cjit = jit;
+ }
+ if (bpf_jit_enable > 1) {
+ pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
+ fp->len, jit.end - jit.start, pass, jit.start);
+ if (jit.start) {
+ printk(KERN_ERR "JIT code:\n");
+ print_fn_code(jit.start, jit.mid - jit.start);
+ print_hex_dump(KERN_ERR, "JIT literals:\n",
+ DUMP_PREFIX_ADDRESS, 16, 1,
+ jit.mid, jit.end - jit.mid, false);
+ }
+ }
+ if (jit.start)
+ fp->bpf_func = (void *) jit.start;
+out:
+ kfree(addrs);
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+ module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+ struct work_struct *work;
+
+ if (fp->bpf_func == sk_run_filter)
+ return;
+ work = (struct work_struct *)fp->bpf_func;
+ INIT_WORK(work, jit_free_defer);
+ schedule_work(work);
+}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index a1e9d69..584b936 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -169,7 +169,7 @@ static ssize_t hw_interval_write(struct file *file, char const __user *buf,
if (*offset)
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
if (val < oprofile_min_interval)
oprofile_hw_interval = oprofile_min_interval;
@@ -212,7 +212,7 @@ static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
if (val != 0)
return -EINVAL;
@@ -243,7 +243,7 @@ static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
if (val != 0 && val != 1)
@@ -278,7 +278,7 @@ static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
if (val != 0 && val != 1)
@@ -317,7 +317,7 @@ static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
if (val != 0 && val != 1)
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 2707023..637970c 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -27,6 +27,7 @@
#include <linux/reboot.h>
#include <linux/elfcore.h>
#include <linux/pm.h>
+#include <linux/rcupdate.h>
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
@@ -50,9 +51,10 @@ void __noreturn cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
barrier();
-
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index b7cf6a5..7e605b9 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -933,7 +933,7 @@ ret_with_reschedule:
pta restore_all, tr1
- movi _TIF_SIGPENDING, r8
+ movi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), r8
and r8, r7, r8
pta work_notifysig, tr0
bne r8, ZERO, tr0
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index f67601c..b96489d 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -139,7 +139,7 @@ work_pending:
! r8: current_thread_info
! t: result of "tst #_TIF_NEED_RESCHED, r0"
bf/s work_resched
- tst #_TIF_SIGPENDING, r0
+ tst #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME), r0
work_notifysig:
bt/s __restore_all
mov r15, r4
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 15e0a16..f1ddc0d 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -48,9 +48,7 @@ void *module_alloc(unsigned long size)
return NULL;
ret = module_map(size);
- if (!ret)
- ret = ERR_PTR(-ENOMEM);
- else
+ if (ret)
memset(ret, 0, size);
return ret;
@@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
v = sym->st_value + rel[i].r_addend;
switch (ELF_R_TYPE(rel[i].r_info) & 0xff) {
+ case R_SPARC_DISP32:
+ v -= (Elf_Addr) location;
+ *loc32 = v;
+ break;
#ifdef CONFIG_SPARC64
case R_SPARC_64:
location[0] = v >> 56;
@@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
location[7] = v >> 0;
break;
- case R_SPARC_DISP32:
- v -= (Elf_Addr) location;
- *loc32 = v;
- break;
-
case R_SPARC_WDISP19:
v -= (Elf_Addr) location;
*loc32 = (*loc32 & ~0x7ffff) |
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 7a7ce390..d5e86c9 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -69,7 +69,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h
index 15fb779..58105c3 100644
--- a/arch/tile/include/gxio/iorpc_trio.h
+++ b/arch/tile/include/gxio/iorpc_trio.h
@@ -25,21 +25,23 @@
#include <linux/module.h>
#include <asm/pgtable.h>
-#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400)
+#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400)
+#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401)
-#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1402)
+#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404)
-#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e)
-#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140f)
+#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412)
-#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1417)
-#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1418)
-#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1419)
-#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x141a)
+#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414)
-#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141c)
-#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141d)
-#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e)
+#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e)
+#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f)
+#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420)
+#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421)
+
+#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423)
+#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424)
+#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425)
#define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
#define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 664a60e..c17de0d 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -705,6 +705,7 @@ static void stack_proc(void *arg)
struct task_struct *from = current, *to = arg;
to->thread.saved_task = from;
+ rcu_switch(from, to);
switch_to(from, to, from);
}
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 69f1c57..33a6a24 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,14 +20,6 @@ struct mm_struct;
struct thread_struct {
struct task_struct *saved_task;
- /*
- * This flag is set to 1 before calling do_fork (and analyzed in
- * copy_thread) to mark that we are begin called from userspace (fork /
- * vfork / clone), and reset to 0 after. It is left to 0 when called
- * from kernelspace (i.e. kernel_thread() or fork_idle(),
- * as of 2.6.11).
- */
- int forking;
struct pt_regs regs;
int singlestep_syscall;
void *fault_addr;
@@ -58,7 +50,6 @@ struct thread_struct {
#define INIT_THREAD \
{ \
- .forking = 0, \
.regs = EMPTY_REGS, \
.fault_addr = NULL, \
.prev_sched = NULL, \
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 40db8f7..2df313b 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
-DEFINE_STR(UM_KERN_EMERG, KERN_EMERG);
-DEFINE_STR(UM_KERN_ALERT, KERN_ALERT);
-DEFINE_STR(UM_KERN_CRIT, KERN_CRIT);
-DEFINE_STR(UM_KERN_ERR, KERN_ERR);
-DEFINE_STR(UM_KERN_WARNING, KERN_WARNING);
-DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
-DEFINE_STR(UM_KERN_INFO, KERN_INFO);
-DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
-DEFINE_STR(UM_KERN_CONT, KERN_CONT);
-
DEFINE(UM_ELF_CLASS, ELF_CLASS);
DEFINE(UM_ELFCLASS32, ELFCLASS32);
DEFINE(UM_ELFCLASS64, ELFCLASS64);
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 4fa82c0..cef0685 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -26,6 +26,17 @@
extern void panic(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+/* Requires preincluding include/linux/kern_levels.h */
+#define UM_KERN_EMERG KERN_EMERG
+#define UM_KERN_ALERT KERN_ALERT
+#define UM_KERN_CRIT KERN_CRIT
+#define UM_KERN_ERR KERN_ERR
+#define UM_KERN_WARNING KERN_WARNING
+#define UM_KERN_NOTICE KERN_NOTICE
+#define UM_KERN_INFO KERN_INFO
+#define UM_KERN_DEBUG KERN_DEBUG
+#define UM_KERN_CONT KERN_CONT
+
#ifdef UML_CONFIG_PRINTK
extern int printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 6cade93..8c82786 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -39,34 +39,21 @@ void flush_thread(void)
void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
{
+ get_safe_registers(regs->regs.gp, regs->regs.fp);
PT_REGS_IP(regs) = eip;
PT_REGS_SP(regs) = esp;
-}
-EXPORT_SYMBOL(start_thread);
-
-static long execve1(const char *file,
- const char __user *const __user *argv,
- const char __user *const __user *env)
-{
- long error;
-
- error = do_execve(file, argv, env, &current->thread.regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
+ current->ptrace &= ~PT_DTRACE;
#ifdef SUBARCH_EXECVE1
- SUBARCH_EXECVE1(&current->thread.regs.regs);
+ SUBARCH_EXECVE1(regs->regs);
#endif
- task_unlock(current);
- }
- return error;
}
+EXPORT_SYMBOL(start_thread);
long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env)
{
long err;
- err = execve1(file, argv, env);
+ err = do_execve(file, argv, env, &current->thread.regs);
if (!err)
UML_LONGJMP(current->thread.exec_buf, 1);
return err;
@@ -81,7 +68,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv,
filename = getname(file);
error = PTR_ERR(filename);
if (IS_ERR(filename)) goto out;
- error = execve1(filename, argv, env);
+ error = do_execve(filename, argv, env, &current->thread.regs);
putname(filename);
out:
return error;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 57fc702..c5f5afa 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
struct pt_regs *regs)
{
void (*handler)(void);
+ int kthread = current->flags & PF_KTHREAD;
int ret = 0;
p->thread = (struct thread_struct) INIT_THREAD;
- if (current->thread.forking) {
+ if (!kthread) {
memcpy(&p->thread.regs.regs, &regs->regs,
sizeof(p->thread.regs.regs));
PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
handler = fork_handler;
arch_copy_thread(&current->thread.arch, &p->thread.arch);
- }
- else {
+ } else {
get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
p->thread.request.u.thread = current->thread.request.u.thread;
handler = new_thread_handler;
@@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
- if (current->thread.forking) {
+ if (!kthread) {
clear_flushed_tls(p);
/*
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 7362d58..cc9c235 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
struct k_sigaction *ka, siginfo_t *info)
{
sigset_t *oldset = sigmask_to_save();
+ int singlestep = 0;
unsigned long sp;
int err;
+ if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+ singlestep = 1;
+
/* Did we come from a system call? */
if (PT_REGS_SYSCALL_NR(regs) >= 0) {
/* If so, check system call restarting.. */
@@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
if (err)
force_sigsegv(signr, current);
else
- signal_delivered(signr, info, ka, regs, 0);
+ signal_delivered(signr, info, ka, regs, singlestep);
}
static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index f958cb8..a4c6d8e 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -17,25 +17,25 @@
long sys_fork(void)
{
- long ret;
-
- current->thread.forking = 1;
- ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+ return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
&current->thread.regs, 0, NULL, NULL);
- current->thread.forking = 0;
- return ret;
}
long sys_vfork(void)
{
- long ret;
-
- current->thread.forking = 1;
- ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
UPT_SP(&current->thread.regs.regs),
&current->thread.regs, 0, NULL, NULL);
- current->thread.forking = 0;
- return ret;
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid)
+{
+ if (!newsp)
+ newsp = UPT_SP(&current->thread.regs.regs);
+
+ return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+ child_tid);
}
long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index f602385..0748fe0 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -114,7 +114,7 @@ static void deliver_alarm(void)
skew += this_tick - last_tick;
while (skew >= one_tick) {
- alarm_handler(SIGVTALRM, NULL);
+ alarm_handler(SIGVTALRM, NULL, NULL);
skew -= one_tick;
}
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index d50270d..15889df 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS))
USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
$(USER_OBJS:.o=.%): \
- c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
+ c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o)
# These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of
# using it directly.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1a..9436670 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -7,11 +7,13 @@ config 64BIT
Say no to build a 32-bit kernel - formerly known as i386
config X86_32
- def_bool !64BIT
+ def_bool y
+ depends on !64BIT
select CLKSRC_I8253
config X86_64
- def_bool 64BIT
+ def_bool y
+ depends on 64BIT
select X86_DEV_DMA_OPS
### Arch settings
@@ -36,6 +38,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
@@ -60,6 +63,8 @@ config X86
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
select ANON_INODES
select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
select HAVE_CMPXCHG_LOCAL if !M386
@@ -97,9 +102,12 @@ config X86
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
+ select HAVE_RCU_USER_QS if X86_64
+ select HAVE_IRQ_TIME_ACCOUNTING
config INSTRUCTION_DECODER
- def_bool (KPROBES || PERF_EVENTS || UPROBES)
+ def_bool y
+ depends on KPROBES || PERF_EVENTS || UPROBES
config OUTPUT_FORMAT
string
@@ -127,13 +135,15 @@ config SBUS
bool
config NEED_DMA_MAP_STATE
- def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
+ def_bool y
+ depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
config NEED_SG_DMA_LENGTH
def_bool y
config GENERIC_ISA_DMA
- def_bool ISA_DMA_API
+ def_bool y
+ depends on ISA_DMA_API
config GENERIC_BUG
def_bool y
@@ -150,13 +160,16 @@ config GENERIC_GPIO
bool
config ARCH_MAY_HAVE_PC_FDC
- def_bool ISA_DMA_API
+ def_bool y
+ depends on ISA_DMA_API
config RWSEM_GENERIC_SPINLOCK
- def_bool !X86_XADD
+ def_bool y
+ depends on !X86_XADD
config RWSEM_XCHGADD_ALGORITHM
- def_bool X86_XADD
+ def_bool y
+ depends on X86_XADD
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -746,13 +759,14 @@ config SWIOTLB
def_bool y if X86_64
---help---
Support for software bounce buffers used on x86-64 systems
- which don't have a hardware IOMMU (e.g. the current generation
- of Intel's x86-64 CPUs). Using this PCI devices which can only
- access 32-bits of memory can be used on systems with more than
- 3 GB of memory. If unsure, say Y.
+ which don't have a hardware IOMMU. Using this PCI devices
+ which can only access 32-bits of memory can be used on systems
+ with more than 3 GB of memory.
+ If unsure, say Y.
config IOMMU_HELPER
- def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+ def_bool y
+ depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
@@ -796,17 +810,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
@@ -871,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
config X86_MCE
bool "Machine Check / overheating reporting"
+ default y
---help---
Machine Check support allows the processor to notify the
kernel if it detects a problem (e.g. overheating, data corruption).
@@ -982,25 +986,25 @@ config X86_REBOOTFIXUPS
Say N otherwise.
config MICROCODE
- tristate "/dev/cpu/microcode - microcode support"
+ tristate "CPU microcode loading support"
select FW_LOADER
---help---
+
If you say Y here, you will be able to update the microcode on
certain Intel and AMD processors. The Intel support is for the
- IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
- Pentium 4, Xeon etc. The AMD support is for family 0x10 and
- 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
- You will obviously need the actual microcode binary data itself
- which is not shipped with the Linux kernel.
+ IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
+ Xeon etc. The AMD support is for families 0x10 and later. You will
+ obviously need the actual microcode binary data itself which is not
+ shipped with the Linux kernel.
This option selects the general module only, you need to select
at least one vendor specific module as well.
- To compile this driver as a module, choose M here: the
- module will be called microcode.
+ To compile this driver as a module, choose M here: the module
+ will be called microcode.
config MICROCODE_INTEL
- bool "Intel microcode patch loading support"
+ bool "Intel microcode loading support"
depends on MICROCODE
default MICROCODE
select FW_LOADER
@@ -1013,7 +1017,7 @@ config MICROCODE_INTEL
<http://www.urbanmyth.org/microcode/>.
config MICROCODE_AMD
- bool "AMD microcode patch loading support"
+ bool "AMD microcode loading support"
depends on MICROCODE
select FW_LOADER
---help---
@@ -1159,10 +1163,12 @@ config X86_PAE
consumes more pagetable space per process.
config ARCH_PHYS_ADDR_T_64BIT
- def_bool X86_64 || X86_PAE
+ def_bool y
+ depends on X86_64 || X86_PAE
config ARCH_DMA_ADDR_T_64BIT
- def_bool X86_64 || HIGHMEM64G
+ def_bool y
+ depends on X86_64 || HIGHMEM64G
config DIRECT_GBPAGES
bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -1285,8 +1291,8 @@ config ARCH_SELECT_MEMORY_MODEL
depends on ARCH_SPARSEMEM_ENABLE
config ARCH_MEMORY_PROBE
- def_bool X86_64
- depends on MEMORY_HOTPLUG
+ def_bool y
+ depends on X86_64 && MEMORY_HOTPLUG
config ARCH_PROC_KCORE_TEXT
def_bool y
@@ -1975,7 +1981,6 @@ config PCI_MMCONFIG
config PCI_CNB20LE_QUIRK
bool "Read CNB20LE Host Bridge Windows" if EXPERT
- default n
depends on PCI && EXPERIMENTAL
help
Read the PCI windows out of the CNB20LE host bridge. This allows
@@ -2186,18 +2191,18 @@ config COMPAT
depends on IA32_EMULATION || X86_X32
select ARCH_WANT_OLD_COMPAT_IPC
+if COMPAT
config COMPAT_FOR_U64_ALIGNMENT
- def_bool COMPAT
- depends on X86_64
+ def_bool y
config SYSVIPC_COMPAT
def_bool y
- depends on COMPAT && SYSVIPC
+ depends on SYSVIPC
config KEYS_COMPAT
- bool
- depends on COMPAT && KEYS
- default y
+ def_bool y
+ depends on KEYS
+endif
endmenu
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 706e12e..f3b86d0 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT
default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
- def_bool X86_64 || (X86_32 && !M386)
+ def_bool y
+ depends on X86_64 || (X86_32 && !M386)
config X86_L1_CACHE_SHIFT
int
@@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT
config X86_XADD
def_bool y
- depends on X86_64 || !M386
+ depends on !M386
config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 682e9c2..474ca35 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
-archscripts:
+archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
###
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e398bb5..8a84501 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
$(obj)/piggy.o
+$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
+$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
+
ifeq ($(CONFIG_EFI_STUB), y)
VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b3e0227..c760e07 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
nr_gops = size / sizeof(void *);
for (i = 0; i < nr_gops; i++) {
struct efi_graphics_output_mode_info *info;
- efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
- void *pciio;
+ efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+ bool conout_found = false;
+ void *dummy;
void *h = gop_handle[i];
status = efi_call_phys3(sys_table->boottime->handle_protocol,
@@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
if (status != EFI_SUCCESS)
continue;
- efi_call_phys3(sys_table->boottime->handle_protocol,
- h, &pciio_proto, &pciio);
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ h, &conout_proto, &dummy);
+
+ if (status == EFI_SUCCESS)
+ conout_found = true;
status = efi_call_phys4(gop->query_mode, gop,
gop->mode->mode, &size, &info);
- if (status == EFI_SUCCESS && (!first_gop || pciio)) {
+ if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
- * Apple provide GOPs that are not backed by
- * real hardware (they're used to handle
- * multiple displays). The workaround is to
- * search for a GOP implementing the PCIIO
- * protocol, and if one isn't found, to just
- * fallback to the first GOP.
+ * Systems that use the UEFI Console Splitter may
+ * provide multiple GOP devices, not all of which are
+ * backed by real hardware. The workaround is to search
+ * for a GOP implementing the ConOut protocol, and if
+ * one isn't found, to just fall back to the first GOP.
*/
width = info->horizontal_resolution;
height = info->vertical_resolution;
@@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
pixels_per_scan_line = info->pixels_per_scan_line;
/*
- * Once we've found a GOP supporting PCIIO,
+ * Once we've found a GOP supporting ConOut,
* don't bother looking any further.
*/
- if (pciio)
+ if (conout_found)
break;
first_gop = gop;
@@ -328,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
si->lfb_width = width;
si->lfb_height = height;
si->lfb_base = fb_base;
- si->lfb_size = fb_size;
si->pages = 1;
if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
@@ -376,6 +378,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
si->rsvd_pos = 0;
}
+ si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+ si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+
free_handle:
efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
return status;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 3b6e156..e5b0a8f 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -14,6 +14,10 @@
#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
#define EFI_READ_CHUNK_SIZE (1024 * 1024)
+#define EFI_CONSOLE_OUT_DEVICE_GUID \
+ EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
+ 0x3f, 0xc1, 0x4d)
+
#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
#define PIXEL_BIT_MASK 2
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b4e15dd..2a01744 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
#define SVGA_MODE ASK_VGA
#endif
-#ifndef RAMDISK
-#define RAMDISK 0
-#endif
-
#ifndef ROOT_RDONLY
#define ROOT_RDONLY 1
#endif
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 119db67..5598547 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
@@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
@@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
@@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
@@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 76eb290..671524d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
@@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
@@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_DEBUG=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
@@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
@@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 673ac9b..8c77c64 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -162,7 +162,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
}
seg = get_fs();
set_fs(KERNEL_DS);
- ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
+ ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
+ (stack_t __force __user *) &uoss, regs->sp);
set_fs(seg);
if (ret >= 0 && uoss_ptr) {
if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
@@ -250,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp);
- err |= restore_i387_xstate_ia32(buf);
+ err |= restore_xstate_sig(buf, 1);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
@@ -361,7 +362,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
*/
static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
size_t frame_size,
- void **fpstate)
+ void __user **fpstate)
{
unsigned long sp;
@@ -381,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
sp = (unsigned long) ka->sa.sa_restorer;
if (used_math()) {
- sp = sp - sig_xstate_ia32_size;
- *fpstate = (struct _fpstate_ia32 *) sp;
- if (save_i387_xstate_ia32(*fpstate) < 0)
+ unsigned long fx_aligned, math_size;
+
+ sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
+ *fpstate = (struct _fpstate_ia32 __user *) sp;
+ if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
+ math_size) < 0)
return (void __user *) -1L;
}
@@ -448,7 +452,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
* These are actually not used anymore, but left because some
* gdb versions depend on them as a marker.
*/
- put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+ put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
} put_user_catch(err);
if (err)
@@ -529,7 +533,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* Not actually used anymore, but left because some gdb
* versions need it.
*/
- put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+ put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
} put_user_catch(err);
if (err)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4540bec..c5b938d 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
return ret;
}
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7078068..444704c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end);
extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
+extern void alternatives_enable_smp(void);
extern int alternatives_text_reserved(void *start, void *end);
extern bool skip_smp_alternatives;
#else
@@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end) {}
static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
+static inline void alternatives_enable_smp(void) {}
static inline int alternatives_text_reserved(void *start, void *end)
{
return 0;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 72f5009..6dfd019 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*/
static inline unsigned long __ffs(unsigned long word)
{
- asm("bsf %1,%0"
+ asm("rep; bsf %1,%0"
: "=r" (word)
: "rm" (word));
return word;
@@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word)
*/
static inline unsigned long ffz(unsigned long word)
{
- asm("bsf %1,%0"
+ asm("rep; bsf %1,%0"
: "=r" (word)
: "r" (~word));
return word;
@@ -417,10 +417,9 @@ static inline int ffs(int x)
* We cannot do this on 32 bits because at the very least some
* 486 CPUs did not behave this way.
*/
- long tmp = -1;
asm("bsfl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (tmp));
+ : "rm" (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
@@ -459,10 +458,9 @@ static inline int fls(int x)
* We cannot do this on 32 bits because at the very least some
* 486 CPUs did not behave this way.
*/
- long tmp = -1;
asm("bsrl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (tmp));
+ : "rm" (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
@@ -490,13 +488,13 @@ static inline int fls(int x)
#ifdef CONFIG_X86_64
static __always_inline int fls64(__u64 x)
{
- long bitpos = -1;
+ int bitpos = -1;
/*
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
* value is written to set it to the same as before.
*/
- asm("bsrq %1,%0"
+ asm("bsrq %1,%q0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index a9e3a74..7f8422a 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with
#include "dwarf2.h"
/*
- * 64-bit system call stack frame layout defines and helpers, for
- * assembly code (note that the seemingly unnecessary parentheses
- * are to prevent cpp from inserting spaces in expressions that get
- * passed to macros):
+ * 64-bit system call stack frame layout defines and helpers,
+ * for assembly code:
*/
-#define R15 (0)
-#define R14 (8)
-#define R13 (16)
-#define R12 (24)
-#define RBP (32)
-#define RBX (40)
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 32
+#define RBX 40
/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11 (48)
-#define R10 (56)
-#define R9 (64)
-#define R8 (72)
-#define RAX (80)
-#define RCX (88)
-#define RDX (96)
-#define RSI (104)
-#define RDI (112)
-#define ORIG_RAX (120) /* + error_code */
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120 /* + error_code */
/* end of arguments */
/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP (128)
-#define CS (136)
-#define EFLAGS (144)
-#define RSP (152)
-#define SS (160)
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
#define ARGOFFSET R11
#define SWFRAME ORIG_RAX
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 6b7ee5f..16cae42 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -97,6 +97,7 @@
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -209,6 +210,7 @@
#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -299,12 +301,14 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
+#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 75f4c6d..92f3c6e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -12,6 +12,7 @@
#include <linux/kernel_stat.h>
#include <linux/regset.h>
+#include <linux/compat.h>
#include <linux/slab.h>
#include <asm/asm.h>
#include <asm/cpufeature.h>
@@ -21,42 +22,74 @@
#include <asm/uaccess.h>
#include <asm/xsave.h>
-extern unsigned int sig_xstate_size;
+#ifdef CONFIG_X86_64
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ compat_sigset_t *set, struct pt_regs *regs);
+#else
+# define user_i387_ia32_struct user_i387_struct
+# define user32_fxsr_struct user_fxsr_struct
+# define ia32_setup_frame __setup_frame
+# define ia32_setup_rt_frame __setup_rt_frame
+#endif
+
+extern unsigned int mxcsr_feature_mask;
extern void fpu_init(void);
+extern void eager_fpu_init(void);
DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
+extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
+ struct task_struct *tsk);
+extern void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env);
+
extern user_regset_active_fn fpregs_active, xfpregs_active;
extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
xstateregs_get;
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
xstateregs_set;
-
/*
* xstateregs_active == fpregs_active. Please refer to the comment
* at the definition of fpregs_active.
*/
#define xstateregs_active fpregs_active
-extern struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-extern unsigned int sig_xstate_ia32_size;
-extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
-struct _fpstate_ia32;
-struct _xstate_ia32;
-extern int save_i387_xstate_ia32(void __user *buf);
-extern int restore_i387_xstate_ia32(void __user *buf);
-#endif
-
#ifdef CONFIG_MATH_EMULATION
+# define HAVE_HWFP (boot_cpu_data.hard_math)
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
+# define HAVE_HWFP 1
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
+static inline int is_ia32_compat_frame(void)
+{
+ return config_enabled(CONFIG_IA32_EMULATION) &&
+ test_thread_flag(TIF_IA32);
+}
+
+static inline int is_ia32_frame(void)
+{
+ return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+}
+
+static inline int is_x32_frame(void)
+{
+ return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+}
+
#define X87_FSW_ES (1 << 7) /* Exception Summary */
+static __always_inline __pure bool use_eager_fpu(void)
+{
+ return static_cpu_has(X86_FEATURE_EAGER_FPU);
+}
+
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -72,6 +105,13 @@ static __always_inline __pure bool use_fxsr(void)
return static_cpu_has(X86_FEATURE_FXSR);
}
+static inline void fx_finit(struct i387_fxsave_struct *fx)
+{
+ memset(fx, 0, xstate_size);
+ fx->cwd = 0x37f;
+ fx->mxcsr = MXCSR_DEFAULT;
+}
+
extern void __sanitize_i387_state(struct task_struct *);
static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -81,131 +121,88 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
__sanitize_i387_state(tsk);
}
-#ifdef CONFIG_X86_64
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+#define check_insn(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+static inline int fsave_user(struct i387_fsave_struct __user *fx)
{
- int err;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxrstorq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "m" (*fx), "0" (0));
-#else
- asm volatile("1: rex64/fxrstor (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "R" (fx), "m" (*fx), "0" (0));
-#endif
- return err;
+ return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
}
static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
{
- int err;
+ if (config_enabled(CONFIG_X86_32))
+ return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
- /*
- * Clear the bytes not touched by the fxsave and reserved
- * for the SW usage.
- */
- err = __clear_user(&fx->sw_reserved,
- sizeof(struct _fpx_sw_bytes));
- if (unlikely(err))
- return -EFAULT;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxsaveq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), [fx] "=m" (*fx)
- : "0" (0));
-#else
- asm volatile("1: rex64/fxsave (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), "=m" (*fx)
- : [fx] "R" (fx), "0" (0));
-#endif
- if (unlikely(err) &&
- __clear_user(fx, sizeof(struct i387_fxsave_struct)))
- err = -EFAULT;
- /* No need to clear here because the caller clears USED_MATH */
- return err;
+ /* See comment in fpu_fxsave() below. */
+ return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
}
-static inline void fpu_fxsave(struct fpu *fpu)
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{
- /* Using "rex64; fxsave %0" is broken because, if the memory operand
- uses any extended registers for addressing, a second REX prefix
- will be generated (to the assembler, rex64 followed by semicolon
- is a separate instruction), and hence the 64-bitness is lost. */
+ if (config_enabled(CONFIG_X86_32))
+ return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-#ifdef CONFIG_AS_FXSAVEQ
- /* Using "fxsaveq %0" would be the ideal choice, but is only supported
- starting with gas 2.16. */
- __asm__ __volatile__("fxsaveq %0"
- : "=m" (fpu->state->fxsave));
-#else
- /* Using, as a workaround, the properly prefixed form below isn't
- accepted by any binutils version so far released, complaining that
- the same type of prefix is used twice if an extended register is
- needed for addressing (fix submitted to mainline 2005-11-21).
- asm volatile("rex64/fxsave %0"
- : "=m" (fpu->state->fxsave));
- This, however, we can work around by forcing the compiler to select
- an addressing mode that doesn't require extended registers. */
- asm volatile("rex64/fxsave (%[fx])"
- : "=m" (fpu->state->fxsave)
- : [fx] "R" (&fpu->state->fxsave));
-#endif
+ /* See comment in fpu_fxsave() below. */
+ return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+ "m" (*fx));
}
-#else /* CONFIG_X86_32 */
-
-/* perform fxrstor iff the processor has extended states, otherwise frstor */
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+static inline int frstor_checking(struct i387_fsave_struct *fx)
{
- /*
- * The "nop" is needed to make the instructions the same
- * length.
- */
- alternative_input(
- "nop ; frstor %1",
- "fxrstor %1",
- X86_FEATURE_FXSR,
- "m" (*fx));
-
- return 0;
+ return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void fpu_fxsave(struct fpu *fpu)
{
- asm volatile("fxsave %[fx]"
- : [fx] "=m" (fpu->state->fxsave));
+ if (config_enabled(CONFIG_X86_32))
+ asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
+ else {
+ /* Using "rex64; fxsave %0" is broken because, if the memory
+ * operand uses any extended registers for addressing, a second
+ * REX prefix will be generated (to the assembler, rex64
+ * followed by semicolon is a separate instruction), and hence
+ * the 64-bitness is lost.
+ *
+ * Using "fxsaveq %0" would be the ideal choice, but is only
+ * supported starting with gas 2.16.
+ *
+ * Using, as a workaround, the properly prefixed form below
+ * isn't accepted by any binutils version so far released,
+ * complaining that the same type of prefix is used twice if
+ * an extended register is needed for addressing (fix submitted
+ * to mainline 2005-11-21).
+ *
+ * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
+ *
+ * This, however, we can work around by forcing the compiler to
+ * select an addressing mode that doesn't require extended
+ * registers.
+ */
+ asm volatile( "rex64/fxsave (%[fx])"
+ : "=m" (fpu->state->fxsave)
+ : [fx] "R" (&fpu->state->fxsave));
+ }
}
-#endif /* CONFIG_X86_64 */
-
/*
* These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact.
@@ -248,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk)
return fpu_save_init(&tsk->thread.fpu);
}
-static inline int fpu_fxrstor_checking(struct fpu *fpu)
-{
- return fxrstor_checking(&fpu->state->fxsave);
-}
-
static inline int fpu_restore_checking(struct fpu *fpu)
{
if (use_xsave())
- return fpu_xrstor_checking(fpu);
+ return fpu_xrstor_checking(&fpu->state->xsave);
+ else if (use_fxsr())
+ return fxrstor_checking(&fpu->state->fxsave);
else
- return fpu_fxrstor_checking(fpu);
+ return frstor_checking(&fpu->state->fsave);
}
static inline int restore_fpu_checking(struct task_struct *tsk)
@@ -310,15 +304,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
static inline void __thread_fpu_end(struct task_struct *tsk)
{
__thread_clear_has_fpu(tsk);
- stts();
+ if (!use_eager_fpu())
+ stts();
}
static inline void __thread_fpu_begin(struct task_struct *tsk)
{
- clts();
+ if (!use_eager_fpu())
+ clts();
__thread_set_has_fpu(tsk);
}
+static inline void __drop_fpu(struct task_struct *tsk)
+{
+ if (__thread_has_fpu(tsk)) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ __thread_fpu_end(tsk);
+ }
+}
+
+static inline void drop_fpu(struct task_struct *tsk)
+{
+ /*
+ * Forget coprocessor state..
+ */
+ preempt_disable();
+ tsk->fpu_counter = 0;
+ __drop_fpu(tsk);
+ clear_used_math();
+ preempt_enable();
+}
+
+static inline void drop_init_fpu(struct task_struct *tsk)
+{
+ if (!use_eager_fpu())
+ drop_fpu(tsk);
+ else {
+ if (use_xsave())
+ xrstor_state(init_xstate_buf, -1);
+ else
+ fxrstor_checking(&init_xstate_buf->i387);
+ }
+}
+
/*
* FPU state switching for scheduling.
*
@@ -352,7 +383,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
{
fpu_switch_t fpu;
- fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+ /*
+ * If the task has used the math, pre-load the FPU on xsave processors
+ * or if the past 5 consecutive context-switches used math.
+ */
+ fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
+ new->fpu_counter > 5);
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
@@ -364,14 +400,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
new->fpu_counter++;
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
- } else
+ } else if (!use_eager_fpu())
stts();
} else {
old->fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
if (fpu.preload) {
new->fpu_counter++;
- if (fpu_lazy_restore(new, cpu))
+ if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@@ -391,44 +427,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
- __thread_fpu_end(new);
+ drop_init_fpu(new);
}
}
/*
* Signal frame handlers...
*/
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
+extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
+extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
-static inline void __clear_fpu(struct task_struct *tsk)
+static inline int xstate_sigframe_size(void)
{
- if (__thread_has_fpu(tsk)) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- __thread_fpu_end(tsk);
+ return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+}
+
+static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
+{
+ void __user *buf_fx = buf;
+ int size = xstate_sigframe_size();
+
+ if (ia32_frame && use_fxsr()) {
+ buf_fx = buf + sizeof(struct i387_fsave_struct);
+ size += sizeof(struct i387_fsave_struct);
}
+
+ return __restore_xstate_sig(buf, buf_fx, size);
}
/*
- * The actual user_fpu_begin/end() functions
- * need to be preemption-safe.
+ * Need to be preemption-safe.
*
- * NOTE! user_fpu_end() must be used only after you
- * have saved the FP state, and user_fpu_begin() must
- * be used only immediately before restoring it.
- * These functions do not do any save/restore on
- * their own.
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * it. This function does not do any save/restore on their own.
*/
-static inline void user_fpu_end(void)
-{
- preempt_disable();
- __thread_fpu_end(current);
- preempt_enable();
-}
-
static inline void user_fpu_begin(void)
{
preempt_disable();
@@ -437,25 +469,32 @@ static inline void user_fpu_begin(void)
preempt_enable();
}
+static inline void __save_fpu(struct task_struct *tsk)
+{
+ if (use_xsave())
+ xsave_state(&tsk->thread.fpu.state->xsave, -1);
+ else
+ fpu_fxsave(&tsk->thread.fpu);
+}
+
/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
WARN_ON_ONCE(!__thread_has_fpu(tsk));
+
+ if (use_eager_fpu()) {
+ __save_fpu(tsk);
+ return;
+ }
+
preempt_disable();
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
preempt_enable();
}
-static inline void clear_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- __clear_fpu(tsk);
- preempt_enable();
-}
-
/*
* i387 state interaction
*/
@@ -510,11 +549,34 @@ static inline void fpu_free(struct fpu *fpu)
}
}
-static inline void fpu_copy(struct fpu *dst, struct fpu *src)
+static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
{
- memcpy(dst->state, src->state, xstate_size);
+ if (use_eager_fpu()) {
+ memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
+ __save_fpu(dst);
+ } else {
+ struct fpu *dfpu = &dst->thread.fpu;
+ struct fpu *sfpu = &src->thread.fpu;
+
+ unlazy_fpu(src);
+ memcpy(dfpu->state, sfpu->state, xstate_size);
+ }
}
-extern void fpu_finit(struct fpu *fpu);
+static inline unsigned long
+alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
+ unsigned long *size)
+{
+ unsigned long frame_size = xstate_sigframe_size();
+
+ *buf_fx = sp = round_down(sp - frame_size, 64);
+ if (ia32_frame && use_fxsr()) {
+ frame_size += sizeof(struct i387_fsave_struct);
+ sp -= sizeof(struct i387_fsave_struct);
+ }
+
+ *size = frame_size;
+ return sp;
+}
#endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b0767bc..9a25b52 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,38 +3,54 @@
#ifdef __ASSEMBLY__
- .macro MCOUNT_SAVE_FRAME
- /* taken from glibc */
- subq $0x38, %rsp
- movq %rax, (%rsp)
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
+ /* skip is set if the stack was already partially adjusted */
+ .macro MCOUNT_SAVE_FRAME skip=0
+ /*
+ * We add enough stack to save all regs.
+ */
+ subq $(SS+8-\skip), %rsp
+ movq %rax, RAX(%rsp)
+ movq %rcx, RCX(%rsp)
+ movq %rdx, RDX(%rsp)
+ movq %rsi, RSI(%rsp)
+ movq %rdi, RDI(%rsp)
+ movq %r8, R8(%rsp)
+ movq %r9, R9(%rsp)
+ /* Move RIP to its proper location */
+ movq SS+8(%rsp), %rdx
+ movq %rdx, RIP(%rsp)
.endm
- .macro MCOUNT_RESTORE_FRAME
- movq 48(%rsp), %r9
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
- movq (%rsp), %rax
- addq $0x38, %rsp
+ .macro MCOUNT_RESTORE_FRAME skip=0
+ movq R9(%rsp), %r9
+ movq R8(%rsp), %r8
+ movq RDI(%rsp), %rdi
+ movq RSI(%rsp), %rsi
+ movq RDX(%rsp), %rdx
+ movq RCX(%rsp), %rcx
+ movq RAX(%rsp), %rax
+ addq $(SS+8-\skip), %rsp
.endm
#endif
#ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR ((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR ((long)(__fentry__))
+#else
+# define MCOUNT_ADDR ((long)(mcount))
+#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
+#ifdef CONFIG_DYNAMIC_FTRACE
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#endif
+
#ifndef __ASSEMBLY__
extern void mcount(void);
extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d3895db..81f04ce 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -18,6 +18,10 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
+ /*
+ * irq_tlb_count is double-counted in irq_call_count, so it must be
+ * subtracted from irq_call_count when displaying irq_call_count
+ */
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 2c392d6..434e210 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -35,8 +35,6 @@
#define HPET_ID_NUMBER_SHIFT 8
#define HPET_ID_VENDOR_SHIFT 16
-#define HPET_ID_VENDOR_8086 0x8086
-
#define HPET_CFG_ENABLE 0x001
#define HPET_CFG_LEGACY 0x002
#define HPET_LEGACY_8254 2
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 257d9cc..ed8089d6 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -19,12 +19,37 @@ struct pt_regs;
struct user_i387_struct;
extern int init_fpu(struct task_struct *child);
+extern void fpu_finit(struct fpu *fpu);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern void math_state_restore(void);
extern bool irq_fpu_usable(void);
-extern void kernel_fpu_begin(void);
-extern void kernel_fpu_end(void);
+
+/*
+ * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
+ * and they don't touch the preempt state on their own.
+ * If you enable preemption after __kernel_fpu_begin(), preempt notifier
+ * should call the __kernel_fpu_end() to prevent the kernel/user FPU
+ * state from getting corrupted. KVM for example uses this model.
+ *
+ * All other cases use kernel_fpu_begin/end() which disable preemption
+ * during kernel FPU usage.
+ */
+extern void __kernel_fpu_begin(void);
+extern void __kernel_fpu_end(void);
+
+static inline void kernel_fpu_begin(void)
+{
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+ __kernel_fpu_begin();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ __kernel_fpu_end();
+ preempt_enable();
+}
/*
* Some instructions like VIA's padlock instructions generate a spurious
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f229b13..f42a047 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -48,7 +48,7 @@ struct iommu_table_entry {
#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
- static const struct iommu_table_entry const \
+ static const struct iommu_table_entry \
__iommu_entry_##_detect __used \
__attribute__ ((unused, __section__(".iommu_table"), \
aligned((sizeof(void *))))) \
@@ -63,10 +63,10 @@ struct iommu_table_entry {
* to stop detecting the other IOMMUs after yours has been detected.
*/
#define IOMMU_INIT_POST(_detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0)
+ __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0)
#define IOMMU_INIT_POST_FINISH(detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1)
+ __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1)
/*
* A more sophisticated version of IOMMU_INIT. This variant requires:
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5478825..d3ddd17 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
#include <asm/insn.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define ARCH_SUPPORTS_KPROBES_ON_FTRACE
struct pt_regs;
struct kprobe;
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617e..41e08cb 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,22 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d6..1eaa6b0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,22 +75,6 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-#define DE_VECTOR 0
-#define DB_VECTOR 1
-#define BP_VECTOR 3
-#define OF_VECTOR 4
-#define BR_VECTOR 5
-#define UD_VECTOR 6
-#define NM_VECTOR 7
-#define DF_VECTOR 8
-#define TS_VECTOR 10
-#define NP_VECTOR 11
-#define SS_VECTOR 12
-#define GP_VECTOR 13
-#define PF_VECTOR 14
-#define MF_VECTOR 16
-#define MC_VECTOR 18
-
#define SELECTOR_TI_MASK (1 << 2)
#define SELECTOR_RPL_MASK 0x03
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a3ac52b..54d73b1 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,19 +116,9 @@ struct mce_log {
/* Software defined banks */
#define MCE_EXTENDED_BANK 128
#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
-
-#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */
-#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9)
-#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9)
-#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9)
-#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9)
-#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9)
-#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
-#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
-
+#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
#ifdef __KERNEL__
-
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
#ifdef CONFIG_X86_MCE_INTEL
extern int mce_cmci_disabled;
extern int mce_ignore_ce;
+extern int mce_bios_cmci_threshold;
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 4ebe157..43d921b 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -15,8 +15,8 @@ struct microcode_ops {
enum ucode_state (*request_microcode_user) (int cpu,
const void __user *buf, size_t size);
- enum ucode_state (*request_microcode_fw) (int cpu,
- struct device *device);
+ enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
+ bool refresh_fw);
void (*microcode_fini_cpu) (int cpu);
@@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
#ifdef CONFIG_MICROCODE_AMD
extern struct microcode_ops * __init init_amd_microcode(void);
extern void __exit exit_amd_microcode(void);
-
-static inline void get_ucode_data(void *to, const u8 *from, size_t n)
-{
- memcpy(to, from, n);
-}
-
#else
static inline struct microcode_ops * __init init_amd_microcode(void)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index cb4e43b..4fabcdf 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { }
static inline void amd_pmu_disable_virt(void) { }
#endif
+#define arch_perf_out_copy_user copy_from_user_nmi
+
#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h
new file mode 100644
index 0000000..3f2207b
--- /dev/null
+++ b/arch/x86/include/asm/perf_regs.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_PERF_REGS_H
+#define _ASM_X86_PERF_REGS_H
+
+enum perf_event_x86_regs {
+ PERF_REG_X86_AX,
+ PERF_REG_X86_BX,
+ PERF_REG_X86_CX,
+ PERF_REG_X86_DX,
+ PERF_REG_X86_SI,
+ PERF_REG_X86_DI,
+ PERF_REG_X86_BP,
+ PERF_REG_X86_SP,
+ PERF_REG_X86_IP,
+ PERF_REG_X86_FLAGS,
+ PERF_REG_X86_CS,
+ PERF_REG_X86_SS,
+ PERF_REG_X86_DS,
+ PERF_REG_X86_ES,
+ PERF_REG_X86_FS,
+ PERF_REG_X86_GS,
+ PERF_REG_X86_R8,
+ PERF_REG_X86_R9,
+ PERF_REG_X86_R10,
+ PERF_REG_X86_R11,
+ PERF_REG_X86_R12,
+ PERF_REG_X86_R13,
+ PERF_REG_X86_R14,
+ PERF_REG_X86_R15,
+
+ PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
+ PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+};
+#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 013286a1..db8fec6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -303,11 +303,9 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
+extern void native_pagetable_init(void);
#else
-#define native_pagetable_setup_start x86_init_pgd_noop
-#define native_pagetable_setup_done x86_init_pgd_noop
+#define native_pagetable_init paging_init
#endif
struct seq_file;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad..b98c0d9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
-extern unsigned long kernel_eflags;
extern asmlinkage void ignore_sysret(void);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
@@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
+extern void set_task_blockstep(struct task_struct *task, bool on);
+
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644
index 0000000..d1ac07a
--- /dev/null
+++ b/arch/x86/include/asm/rcu.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_X86_RCU_H
+#define _ASM_X86_RCU_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/rcupdate.h>
+#include <asm/ptrace.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+ rcu_user_exit();
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+#ifdef CONFIG_RCU_USER_QS
+ if (user_mode(regs))
+ rcu_user_enter();
+#endif
+}
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_RCU_USER_QS
+# define SCHEDULE_USER call schedule_user
+#else
+# define SCHEDULE_USER call schedule
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 598457c..323973f 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,10 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
+#ifndef CONFIG_COMPAT
+typedef sigset_t compat_sigset_t;
+#endif
+
#else
/* Here we must cater to libcs that poke about in kernel headers. */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f2b83bc..cdf5674 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,6 +1,135 @@
#ifndef __SVM_H
#define __SVM_H
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_MWAIT_COND 0x08c
+#define SVM_EXIT_XSETBV 0x08d
+#define SVM_EXIT_NPF 0x400
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_EXIT_REASONS \
+ { SVM_EXIT_READ_CR0, "read_cr0" }, \
+ { SVM_EXIT_READ_CR3, "read_cr3" }, \
+ { SVM_EXIT_READ_CR4, "read_cr4" }, \
+ { SVM_EXIT_READ_CR8, "read_cr8" }, \
+ { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
+ { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
+ { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
+ { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
+ { SVM_EXIT_READ_DR0, "read_dr0" }, \
+ { SVM_EXIT_READ_DR1, "read_dr1" }, \
+ { SVM_EXIT_READ_DR2, "read_dr2" }, \
+ { SVM_EXIT_READ_DR3, "read_dr3" }, \
+ { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
+ { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
+ { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
+ { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
+ { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
+ { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
+ { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
+ { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
+ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
+ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
+ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
+ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
+ { SVM_EXIT_INTR, "interrupt" }, \
+ { SVM_EXIT_NMI, "nmi" }, \
+ { SVM_EXIT_SMI, "smi" }, \
+ { SVM_EXIT_INIT, "init" }, \
+ { SVM_EXIT_VINTR, "vintr" }, \
+ { SVM_EXIT_CPUID, "cpuid" }, \
+ { SVM_EXIT_INVD, "invd" }, \
+ { SVM_EXIT_HLT, "hlt" }, \
+ { SVM_EXIT_INVLPG, "invlpg" }, \
+ { SVM_EXIT_INVLPGA, "invlpga" }, \
+ { SVM_EXIT_IOIO, "io" }, \
+ { SVM_EXIT_MSR, "msr" }, \
+ { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+ { SVM_EXIT_SHUTDOWN, "shutdown" }, \
+ { SVM_EXIT_VMRUN, "vmrun" }, \
+ { SVM_EXIT_VMMCALL, "hypercall" }, \
+ { SVM_EXIT_VMLOAD, "vmload" }, \
+ { SVM_EXIT_VMSAVE, "vmsave" }, \
+ { SVM_EXIT_STGI, "stgi" }, \
+ { SVM_EXIT_CLGI, "clgi" }, \
+ { SVM_EXIT_SKINIT, "skinit" }, \
+ { SVM_EXIT_WBINVD, "wbinvd" }, \
+ { SVM_EXIT_MONITOR, "monitor" }, \
+ { SVM_EXIT_MWAIT, "mwait" }, \
+ { SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_NPF, "npf" }
+
+#ifdef __KERNEL__
+
enum {
INTERCEPT_INTR,
INTERCEPT_NMI,
@@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXITINFO_REG_MASK 0x0F
-#define SVM_EXIT_READ_CR0 0x000
-#define SVM_EXIT_READ_CR3 0x003
-#define SVM_EXIT_READ_CR4 0x004
-#define SVM_EXIT_READ_CR8 0x008
-#define SVM_EXIT_WRITE_CR0 0x010
-#define SVM_EXIT_WRITE_CR3 0x013
-#define SVM_EXIT_WRITE_CR4 0x014
-#define SVM_EXIT_WRITE_CR8 0x018
-#define SVM_EXIT_READ_DR0 0x020
-#define SVM_EXIT_READ_DR1 0x021
-#define SVM_EXIT_READ_DR2 0x022
-#define SVM_EXIT_READ_DR3 0x023
-#define SVM_EXIT_READ_DR4 0x024
-#define SVM_EXIT_READ_DR5 0x025
-#define SVM_EXIT_READ_DR6 0x026
-#define SVM_EXIT_READ_DR7 0x027
-#define SVM_EXIT_WRITE_DR0 0x030
-#define SVM_EXIT_WRITE_DR1 0x031
-#define SVM_EXIT_WRITE_DR2 0x032
-#define SVM_EXIT_WRITE_DR3 0x033
-#define SVM_EXIT_WRITE_DR4 0x034
-#define SVM_EXIT_WRITE_DR5 0x035
-#define SVM_EXIT_WRITE_DR6 0x036
-#define SVM_EXIT_WRITE_DR7 0x037
-#define SVM_EXIT_EXCP_BASE 0x040
-#define SVM_EXIT_INTR 0x060
-#define SVM_EXIT_NMI 0x061
-#define SVM_EXIT_SMI 0x062
-#define SVM_EXIT_INIT 0x063
-#define SVM_EXIT_VINTR 0x064
-#define SVM_EXIT_CR0_SEL_WRITE 0x065
-#define SVM_EXIT_IDTR_READ 0x066
-#define SVM_EXIT_GDTR_READ 0x067
-#define SVM_EXIT_LDTR_READ 0x068
-#define SVM_EXIT_TR_READ 0x069
-#define SVM_EXIT_IDTR_WRITE 0x06a
-#define SVM_EXIT_GDTR_WRITE 0x06b
-#define SVM_EXIT_LDTR_WRITE 0x06c
-#define SVM_EXIT_TR_WRITE 0x06d
-#define SVM_EXIT_RDTSC 0x06e
-#define SVM_EXIT_RDPMC 0x06f
-#define SVM_EXIT_PUSHF 0x070
-#define SVM_EXIT_POPF 0x071
-#define SVM_EXIT_CPUID 0x072
-#define SVM_EXIT_RSM 0x073
-#define SVM_EXIT_IRET 0x074
-#define SVM_EXIT_SWINT 0x075
-#define SVM_EXIT_INVD 0x076
-#define SVM_EXIT_PAUSE 0x077
-#define SVM_EXIT_HLT 0x078
-#define SVM_EXIT_INVLPG 0x079
-#define SVM_EXIT_INVLPGA 0x07a
-#define SVM_EXIT_IOIO 0x07b
-#define SVM_EXIT_MSR 0x07c
-#define SVM_EXIT_TASK_SWITCH 0x07d
-#define SVM_EXIT_FERR_FREEZE 0x07e
-#define SVM_EXIT_SHUTDOWN 0x07f
-#define SVM_EXIT_VMRUN 0x080
-#define SVM_EXIT_VMMCALL 0x081
-#define SVM_EXIT_VMLOAD 0x082
-#define SVM_EXIT_VMSAVE 0x083
-#define SVM_EXIT_STGI 0x084
-#define SVM_EXIT_CLGI 0x085
-#define SVM_EXIT_SKINIT 0x086
-#define SVM_EXIT_RDTSCP 0x087
-#define SVM_EXIT_ICEBP 0x088
-#define SVM_EXIT_WBINVD 0x089
-#define SVM_EXIT_MONITOR 0x08a
-#define SVM_EXIT_MWAIT 0x08b
-#define SVM_EXIT_MWAIT_COND 0x08c
-#define SVM_EXIT_XSETBV 0x08d
-#define SVM_EXIT_NPF 0x400
-
-#define SVM_EXIT_ERR -1
-
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
@@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb {
#endif
+#endif
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 3fda9db4..4ca1c61 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
struct old_sigaction32 __user *);
asmlinkage long sys32_alarm(unsigned int);
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
asmlinkage long sys32_sysfs(int, u32, u32);
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f..c535d847 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,6 +89,7 @@ struct thread_info {
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_FORK 18 /* ret_from_fork */
+#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_DEBUG 21 /* uses debug registers */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
+#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_DEBUG (1 << TIF_DEBUG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_NOHZ)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
- _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
@@ -141,7 +144,8 @@ struct thread_info {
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
- ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
+ ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_NOHZ)
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f3971bb..8ff8be7 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -42,10 +42,11 @@ struct arch_uprobe {
};
struct arch_uprobe_task {
- unsigned long saved_trap_nr;
#ifdef CONFIG_X86_64
unsigned long saved_scratch_register;
#endif
+ unsigned int saved_trap_nr;
+ unsigned int saved_tf;
};
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index bb05228..fddb53d 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[];
#define VDSO32_SYMBOL(base, name) \
({ \
extern const char VDSO32_##name[]; \
- (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \
+ (void __user *)(VDSO32_##name - VDSO32_PRELINK + \
+ (unsigned long)(base)); \
})
#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb96..36ec21c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,88 @@
*
*/
+#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
+
+#define VMX_EXIT_REASONS \
+ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
+ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
+ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
+ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
+ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
+ { EXIT_REASON_CPUID, "CPUID" }, \
+ { EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVLPG, "INVLPG" }, \
+ { EXIT_REASON_RDPMC, "RDPMC" }, \
+ { EXIT_REASON_RDTSC, "RDTSC" }, \
+ { EXIT_REASON_VMCALL, "VMCALL" }, \
+ { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
+ { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
+ { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
+ { EXIT_REASON_VMPTRST, "VMPTRST" }, \
+ { EXIT_REASON_VMREAD, "VMREAD" }, \
+ { EXIT_REASON_VMRESUME, "VMRESUME" }, \
+ { EXIT_REASON_VMWRITE, "VMWRITE" }, \
+ { EXIT_REASON_VMOFF, "VMOFF" }, \
+ { EXIT_REASON_VMON, "VMON" }, \
+ { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
+ { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
+ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
+ { EXIT_REASON_MSR_READ, "MSR_READ" }, \
+ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
+ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
+ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
+ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
+ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
+ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
+ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
+ { EXIT_REASON_WBINVD, "WBINVD" }
+
+#ifdef __KERNEL__
+
#include <linux/types.h>
/*
@@ -241,49 +323,6 @@ enum vmcs_field {
HOST_RIP = 0x00006c16,
};
-#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI 0
-#define EXIT_REASON_EXTERNAL_INTERRUPT 1
-#define EXIT_REASON_TRIPLE_FAULT 2
-
-#define EXIT_REASON_PENDING_INTERRUPT 7
-#define EXIT_REASON_NMI_WINDOW 8
-#define EXIT_REASON_TASK_SWITCH 9
-#define EXIT_REASON_CPUID 10
-#define EXIT_REASON_HLT 12
-#define EXIT_REASON_INVD 13
-#define EXIT_REASON_INVLPG 14
-#define EXIT_REASON_RDPMC 15
-#define EXIT_REASON_RDTSC 16
-#define EXIT_REASON_VMCALL 18
-#define EXIT_REASON_VMCLEAR 19
-#define EXIT_REASON_VMLAUNCH 20
-#define EXIT_REASON_VMPTRLD 21
-#define EXIT_REASON_VMPTRST 22
-#define EXIT_REASON_VMREAD 23
-#define EXIT_REASON_VMRESUME 24
-#define EXIT_REASON_VMWRITE 25
-#define EXIT_REASON_VMOFF 26
-#define EXIT_REASON_VMON 27
-#define EXIT_REASON_CR_ACCESS 28
-#define EXIT_REASON_DR_ACCESS 29
-#define EXIT_REASON_IO_INSTRUCTION 30
-#define EXIT_REASON_MSR_READ 31
-#define EXIT_REASON_MSR_WRITE 32
-#define EXIT_REASON_INVALID_STATE 33
-#define EXIT_REASON_MWAIT_INSTRUCTION 36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MCE_DURING_VMENTRY 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS 44
-#define EXIT_REASON_EPT_VIOLATION 48
-#define EXIT_REASON_EPT_MISCONFIG 49
-#define EXIT_REASON_WBINVD 54
-#define EXIT_REASON_XSETBV 55
-#define EXIT_REASON_INVPCID 58
-
/*
* Interruption-information format
*/
@@ -488,3 +527,5 @@ enum vm_instruction_error_number {
};
#endif
+
+#endif
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 38155f6..5769349 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -81,12 +81,13 @@ struct x86_init_mapping {
/**
* struct x86_init_paging - platform specific paging functions
- * @pagetable_setup_start: platform specific pre paging_init() call
- * @pagetable_setup_done: platform specific post paging_init() call
+ * @pagetable_init: platform specific paging initialization call to setup
+ * the kernel pagetables and prepare accessors functions.
+ * Callback must call paging_init(). Called once after the
+ * direct mapping for phys memory is available.
*/
struct x86_init_paging {
- void (*pagetable_setup_start)(pgd_t *base);
- void (*pagetable_setup_done)(pgd_t *base);
+ void (*pagetable_init)(void);
};
/**
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 93971e8..472b9b7 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
extern int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op);
-extern int m2p_remove_override(struct page *page, bool clear_pte);
+extern int m2p_remove_override(struct page *page,
+ struct gnttab_map_grant_ref *kmap_op);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 4545708..aabd585 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
*/
-#define XMMS_SAVE \
-do { \
- preempt_disable(); \
- cr0 = read_cr0(); \
- clts(); \
- asm volatile( \
- "movups %%xmm0,(%0) ;\n\t" \
- "movups %%xmm1,0x10(%0) ;\n\t" \
- "movups %%xmm2,0x20(%0) ;\n\t" \
- "movups %%xmm3,0x30(%0) ;\n\t" \
- : \
- : "r" (xmm_save) \
- : "memory"); \
-} while (0)
-
-#define XMMS_RESTORE \
-do { \
- asm volatile( \
- "sfence ;\n\t" \
- "movups (%0),%%xmm0 ;\n\t" \
- "movups 0x10(%0),%%xmm1 ;\n\t" \
- "movups 0x20(%0),%%xmm2 ;\n\t" \
- "movups 0x30(%0),%%xmm3 ;\n\t" \
- : \
- : "r" (xmm_save) \
- : "memory"); \
- write_cr0(cr0); \
- preempt_enable(); \
-} while (0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
#define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")"
#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
@@ -587,10 +555,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{
unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
:
: "memory");
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3)
{
unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
:
: "memory" );
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4)
{
unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
:
: "memory" );
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5)
{
unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
/* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5));
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static struct xor_block_template xor_block_pIII_sse = {
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index b9b2323..5fc06d0 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -34,41 +34,7 @@
* no advantages to be gotten from x86-64 here anyways.
*/
-typedef struct {
- unsigned long a, b;
-} __attribute__((aligned(16))) xmm_store_t;
-
-/* Doesn't use gcc to save the XMM registers, because there is no easy way to
- tell it to do a clts before the register saving. */
-#define XMMS_SAVE \
-do { \
- preempt_disable(); \
- asm volatile( \
- "movq %%cr0,%0 ;\n\t" \
- "clts ;\n\t" \
- "movups %%xmm0,(%1) ;\n\t" \
- "movups %%xmm1,0x10(%1) ;\n\t" \
- "movups %%xmm2,0x20(%1) ;\n\t" \
- "movups %%xmm3,0x30(%1) ;\n\t" \
- : "=&r" (cr0) \
- : "r" (xmm_save) \
- : "memory"); \
-} while (0)
-
-#define XMMS_RESTORE \
-do { \
- asm volatile( \
- "sfence ;\n\t" \
- "movups (%1),%%xmm0 ;\n\t" \
- "movups 0x10(%1),%%xmm1 ;\n\t" \
- "movups 0x20(%1),%%xmm2 ;\n\t" \
- "movups 0x30(%1),%%xmm3 ;\n\t" \
- "movq %0,%%cr0 ;\n\t" \
- : \
- : "r" (cr0), "r" (xmm_save) \
- : "memory"); \
- preempt_enable(); \
-} while (0)
+#include <asm/i387.h>
#define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")"
@@ -91,10 +57,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{
unsigned int lines = bytes >> 8;
- unsigned long cr0;
- xmm_store_t xmm_save[4];
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: [inc] "r" (256UL)
: "memory");
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3)
{
unsigned int lines = bytes >> 8;
- xmm_store_t xmm_save[4];
- unsigned long cr0;
-
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
#define BLOCK(i) \
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] "r" (256UL)
: "memory");
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4)
{
unsigned int lines = bytes >> 8;
- xmm_store_t xmm_save[4];
- unsigned long cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL)
: "memory" );
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static void
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5)
{
unsigned int lines = bytes >> 8;
- xmm_store_t xmm_save[4];
- unsigned long cr0;
- XMMS_SAVE;
+ kernel_fpu_begin();
asm volatile(
#undef BLOCK
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL)
: "memory");
- XMMS_RESTORE;
+ kernel_fpu_end();
}
static struct xor_block_template xor_block_sse = {
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 2510d35..7ea79c5 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -20,32 +20,6 @@
#include <linux/compiler.h>
#include <asm/i387.h>
-#define ALIGN32 __aligned(32)
-
-#define YMM_SAVED_REGS 4
-
-#define YMMS_SAVE \
-do { \
- preempt_disable(); \
- cr0 = read_cr0(); \
- clts(); \
- asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
- asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
- asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
- asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
-} while (0);
-
-#define YMMS_RESTORE \
-do { \
- asm volatile("sfence" : : : "memory"); \
- asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
- asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
- asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
- asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
- write_cr0(cr0); \
- preempt_enable(); \
-} while (0);
-
#define BLOCK4(i) \
BLOCK(32 * i, 0) \
BLOCK(32 * (i + 1), 1) \
@@ -60,10 +34,9 @@ do { \
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
{
- unsigned long cr0, lines = bytes >> 9;
- char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+ unsigned long lines = bytes >> 9;
- YMMS_SAVE
+ kernel_fpu_begin();
while (lines--) {
#undef BLOCK
@@ -82,16 +55,15 @@ do { \
p1 = (unsigned long *)((uintptr_t)p1 + 512);
}
- YMMS_RESTORE
+ kernel_fpu_end();
}
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2)
{
- unsigned long cr0, lines = bytes >> 9;
- char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+ unsigned long lines = bytes >> 9;
- YMMS_SAVE
+ kernel_fpu_begin();
while (lines--) {
#undef BLOCK
@@ -113,16 +85,15 @@ do { \
p2 = (unsigned long *)((uintptr_t)p2 + 512);
}
- YMMS_RESTORE
+ kernel_fpu_end();
}
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3)
{
- unsigned long cr0, lines = bytes >> 9;
- char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+ unsigned long lines = bytes >> 9;
- YMMS_SAVE
+ kernel_fpu_begin();
while (lines--) {
#undef BLOCK
@@ -147,16 +118,15 @@ do { \
p3 = (unsigned long *)((uintptr_t)p3 + 512);
}
- YMMS_RESTORE
+ kernel_fpu_end();
}
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3, unsigned long *p4)
{
- unsigned long cr0, lines = bytes >> 9;
- char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+ unsigned long lines = bytes >> 9;
- YMMS_SAVE
+ kernel_fpu_begin();
while (lines--) {
#undef BLOCK
@@ -184,7 +154,7 @@ do { \
p4 = (unsigned long *)((uintptr_t)p4 + 512);
}
- YMMS_RESTORE
+ kernel_fpu_end();
}
static struct xor_block_template xor_block_avx = {
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 8a1b6f9..2ddee1b8 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -34,17 +34,14 @@
extern unsigned int xstate_size;
extern u64 pcntxt_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+extern struct xsave_struct *init_xstate_buf;
extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child);
-extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
- void __user *fpstate,
- struct _fpx_sw_bytes *sw);
-static inline int fpu_xrstor_checking(struct fpu *fpu)
+static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{
- struct xsave_struct *fx = &fpu->state->xsave;
int err;
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
* Clear the xsave header first, so that reserved fields are
* initialized to zero.
*/
- err = __clear_user(&buf->xsave_hdr,
- sizeof(struct xsave_hdr_struct));
+ err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
if (unlikely(err))
return -EFAULT;
@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
: [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory");
- if (unlikely(err) && __clear_user(buf, xstate_size))
- err = -EFAULT;
- /* No need to clear here because the caller clears USED_MATH */
return err;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e56..8d7a619 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
+obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e5..e651f7a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
acpi_register_lapic(physid, ACPI_MADT_ENABLED);
/*
- * If mp_register_lapic successfully generates a new logical cpu
+ * If acpi_register_lapic successfully generates a new logical cpu
* number, then the following will get us exactly what was mapped
*/
cpumask_andnot(new_map, cpu_present_mask, tmp_map);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ced4534..ef5ccca 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,19 +23,6 @@
#define MAX_PATCH_LEN (255-1)
-#ifdef CONFIG_HOTPLUG_CPU
-static int smp_alt_once;
-
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-__setup("smp-alt-boot", bootonly);
-#else
-#define smp_alt_once 1
-#endif
-
static int __initdata_or_module debug_alternative;
static int __init debug_alt(char *str)
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
/* turn DS segment override prefix into lock prefix */
if (*ptr == 0x3e)
text_poke(ptr, ((unsigned char []){0xf0}), 1);
- };
+ }
mutex_unlock(&text_mutex);
}
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
{
const s32 *poff;
- if (noreplace_smp)
- return;
-
mutex_lock(&text_mutex);
for (poff = start; poff < end; poff++) {
u8 *ptr = (u8 *)poff + *poff;
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
/* turn lock prefix into DS segment override prefix */
if (*ptr == 0xf0)
text_poke(ptr, ((unsigned char []){0x3E}), 1);
- };
+ }
mutex_unlock(&text_mutex);
}
@@ -359,7 +343,7 @@ struct smp_alt_module {
};
static LIST_HEAD(smp_alt_modules);
static DEFINE_MUTEX(smp_alt);
-static int smp_mode = 1; /* protected by smp_alt */
+static bool uniproc_patched = false; /* protected by smp_alt */
void __init_or_module alternatives_smp_module_add(struct module *mod,
char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
{
struct smp_alt_module *smp;
- if (noreplace_smp)
- return;
+ mutex_lock(&smp_alt);
+ if (!uniproc_patched)
+ goto unlock;
- if (smp_alt_once) {
- if (boot_cpu_has(X86_FEATURE_UP))
- alternatives_smp_unlock(locks, locks_end,
- text, text_end);
- return;
- }
+ if (num_possible_cpus() == 1)
+ /* Don't bother remembering, we'll never have to undo it. */
+ goto smp_unlock;
smp = kzalloc(sizeof(*smp), GFP_KERNEL);
if (NULL == smp)
- return; /* we'll run the (safe but slow) SMP code then ... */
+ /* we'll run the (safe but slow) SMP code then ... */
+ goto unlock;
smp->mod = mod;
smp->name = name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
- mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
- if (boot_cpu_has(X86_FEATURE_UP))
- alternatives_smp_unlock(smp->locks, smp->locks_end,
- smp->text, smp->text_end);
+smp_unlock:
+ alternatives_smp_unlock(locks, locks_end, text, text_end);
+unlock:
mutex_unlock(&smp_alt);
}
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
{
struct smp_alt_module *item;
- if (smp_alt_once || noreplace_smp)
- return;
-
mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
- mutex_unlock(&smp_alt);
- DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
- return;
+ break;
}
mutex_unlock(&smp_alt);
}
-bool skip_smp_alternatives;
-void alternatives_smp_switch(int smp)
+void alternatives_enable_smp(void)
{
struct smp_alt_module *mod;
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
pr_info("lockdep: fixing up alternatives\n");
#endif
- if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
- return;
- BUG_ON(!smp && (num_online_cpus() > 1));
+ /* Why bother if there are no other CPUs? */
+ BUG_ON(num_possible_cpus() == 1);
mutex_lock(&smp_alt);
- /*
- * Avoid unnecessary switches because it forces JIT based VMs to
- * throw away all cached translations, which can be quite costly.
- */
- if (smp == smp_mode) {
- /* nothing */
- } else if (smp) {
+ if (uniproc_patched) {
pr_info("switching to SMP code\n");
+ BUG_ON(num_online_cpus() != 1);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
- } else {
- pr_info("switching to UP code\n");
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
- set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
- list_for_each_entry(mod, &smp_alt_modules, next)
- alternatives_smp_unlock(mod->locks, mod->locks_end,
- mod->text, mod->text_end);
+ uniproc_patched = false;
}
- smp_mode = smp;
mutex_unlock(&smp_alt);
}
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
apply_alternatives(__alt_instructions, __alt_instructions_end);
- /* switch to patch-once-at-boottime-only mode and free the
- * tables in case we know the number of CPUs will never ever
- * change */
-#ifdef CONFIG_HOTPLUG_CPU
- if (num_possible_cpus() < 2)
- smp_alt_once = 1;
-#endif
-
#ifdef CONFIG_SMP
- if (smp_alt_once) {
- if (1 == num_possible_cpus()) {
- pr_info("switching to UP code\n");
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
- set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-
- alternatives_smp_unlock(__smp_locks, __smp_locks_end,
- _text, _etext);
- }
- } else {
+ /* Patch to UP if other cpus not imminent. */
+ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
+ uniproc_patched = true;
alternatives_smp_module_add(NULL, "core kernel",
__smp_locks, __smp_locks_end,
_text, _etext);
-
- /* Only switch to UP mode if we don't immediately boot others */
- if (num_present_cpus() == 1 || setup_max_cpus <= 1)
- alternatives_smp_switch(0);
}
-#endif
- apply_paravirt(__parainstructions, __parainstructions_end);
- if (smp_alt_once)
+ if (!uniproc_patched || num_possible_cpus() == 1)
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
+#endif
+
+ apply_paravirt(__parainstructions, __parainstructions_end);
restart_nmi();
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24deb30..b17416e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
i++;
v1 >>= 1;
- };
+ }
apic_printk(APIC_DEBUG, KERN_CONT "\n");
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19..f7e98a2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
}
#endif
+static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_invlpg)
+ return;
+
+ tlb_flushall_shift = 5;
+
+ if (c->x86 <= 0x11)
+ tlb_flushall_shift = 4;
+}
+
+static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+{
+ u32 ebx, eax, ecx, edx;
+ u16 mask = 0xfff;
+
+ if (c->x86 < 0xf)
+ return;
+
+ if (c->extended_cpuid_level < 0x80000006)
+ return;
+
+ cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
+
+ tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
+ tlb_lli_4k[ENTRIES] = ebx & mask;
+
+ /*
+ * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
+ * characteristics from the CPUID function 0x80000005 instead.
+ */
+ if (c->x86 == 0xf) {
+ cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ mask = 0xff;
+ }
+
+ /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!((eax >> 16) & mask)) {
+ u32 a, b, c, d;
+
+ cpuid(0x80000005, &a, &b, &c, &d);
+ tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
+ } else {
+ tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+ }
+
+ /* a 4M entry uses two 2M entries */
+ tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+
+ /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!(eax & mask)) {
+ /* Erratum 658 */
+ if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
+ tlb_lli_2m[ENTRIES] = 1024;
+ } else {
+ cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ tlb_lli_2m[ENTRIES] = eax & 0xff;
+ }
+ } else
+ tlb_lli_2m[ENTRIES] = eax & mask;
+
+ tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+
+ cpu_set_tlb_flushall_shift(c);
+}
+
static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_size_cache = amd_size_cache,
#endif
.c_early_init = early_init_amd,
+ .c_detect_tlb = cpu_detect_tlb_amd,
.c_bsp_init = bsp_init_amd,
.c_init = init_amd,
.c_x86_vendor = X86_VENDOR_AMD,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c97bb7b..d0e910d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -165,10 +165,15 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
#endif
check_config();
- check_fpu();
check_hlt();
check_popad();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
+
+ /*
+ * kernel_fpu_begin/end() in check_fpu() relies on the patched
+ * alternative instructions.
+ */
+ check_fpu();
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c..532691b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,7 +476,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
- "tlb_flushall_shift is 0x%x\n",
+ "tlb_flushall_shift: %d\n",
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
@@ -942,8 +942,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
- if (boot_cpu_data.cpuid_level >= 2)
- cpu_detect_tlb(&boot_cpu_data);
+ cpu_detect_tlb(&boot_cpu_data);
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1023,14 +1022,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT "%s ", vendor);
if (c->x86_model_id[0])
- printk(KERN_CONT "%s", c->x86_model_id);
+ printk(KERN_CONT "%s", strim(c->x86_model_id));
else
printk(KERN_CONT "%d86", c->x86);
+ printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+ printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
else
- printk(KERN_CONT "\n");
+ printk(KERN_CONT ")\n");
print_cpu_msr(c);
}
@@ -1116,8 +1117,6 @@ void syscall_init(void)
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
}
-unsigned long kernel_eflags;
-
/*
* Copies of the original ist values from the tss are only accessed during
* debugging, no special alignment required.
@@ -1297,9 +1296,6 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs();
fpu_init();
- xsave_init();
-
- raw_local_save_flags(kernel_eflags);
if (is_uv_system())
uv_cpu_init();
@@ -1352,6 +1348,5 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs();
fpu_init();
- xsave_init();
}
#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4ce29..198e019 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
int i, j, n;
unsigned int regs[4];
unsigned char *desc = (unsigned char *)regs;
+
+ if (c->cpuid_level < 2)
+ return;
+
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb3..ddc72f8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
}
static cpumask_var_t mce_inject_cpumask;
+static DEFINE_MUTEX(mce_inject_mutex);
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
put_online_cpus();
} else
#endif
+ {
+ preempt_disable();
raise_local();
+ preempt_enable();
+ }
}
/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
* so do it a jiffie or two later everywhere.
*/
schedule_timeout(2);
+
+ mutex_lock(&mce_inject_mutex);
raise_mce(&m);
+ mutex_unlock(&mce_inject_mutex);
return usize;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a..6a05c1d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
+#ifdef CONFIG_X86_MCE_INTEL
+unsigned long mce_intel_adjust_timer(unsigned long interval);
+void mce_intel_cmci_poll(void);
+void mce_intel_hcpu_update(unsigned long cpu);
+#else
+# define mce_intel_adjust_timer mce_adjust_timer_default
+static inline void mce_intel_cmci_poll(void) { }
+static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+#endif
+
+void mce_timer_kick(unsigned long interval);
+
#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 292d025..29e87d3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
int mce_cmci_disabled __read_mostly;
int mce_ignore_ce __read_mostly;
int mce_ser __read_mostly;
+int mce_bios_cmci_threshold __read_mostly;
struct mce_bank *mce_banks __read_mostly;
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
+static unsigned long mce_adjust_timer_default(unsigned long interval)
+{
+ return interval;
+}
+
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+ mce_adjust_timer_default;
+
static void mce_timer_fn(unsigned long data)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
+ mce_intel_cmci_poll();
}
/*
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
* polling interval, otherwise increase the polling interval.
*/
iv = __this_cpu_read(mce_next_interval);
- if (mce_notify_irq())
+ if (mce_notify_irq()) {
iv = max(iv / 2, (unsigned long) HZ/100);
- else
+ } else {
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+ iv = mce_adjust_timer(iv);
+ }
__this_cpu_write(mce_next_interval, iv);
+ /* Might have become 0 after CMCI storm subsided */
+ if (iv) {
+ t->expires = jiffies + iv;
+ add_timer_on(t, smp_processor_id());
+ }
+}
- t->expires = jiffies + iv;
- add_timer_on(t, smp_processor_id());
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long when = jiffies + interval;
+ unsigned long iv = __this_cpu_read(mce_next_interval);
+
+ if (timer_pending(t)) {
+ if (time_before(when, t->expires))
+ mod_timer_pinned(t, when);
+ } else {
+ t->expires = round_jiffies(when);
+ add_timer_on(t, smp_processor_id());
+ }
+ if (interval < iv)
+ __this_cpu_write(mce_next_interval, interval);
}
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
+ mce_adjust_timer = mce_intel_adjust_timer;
break;
case X86_VENDOR_AMD:
mce_amd_feature_init(c);
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
}
}
-static void __mcheck_cpu_init_timer(void)
+static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
- unsigned long iv = check_interval * HZ;
+ unsigned long iv = mce_adjust_timer(check_interval * HZ);
- setup_timer(t, mce_timer_fn, smp_processor_id());
+ __this_cpu_write(mce_next_interval, iv);
- if (mce_ignore_ce)
+ if (mce_ignore_ce || !iv)
return;
- __this_cpu_write(mce_next_interval, iv);
- if (!iv)
- return;
t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
}
+static void __mcheck_cpu_init_timer(void)
+{
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned int cpu = smp_processor_id();
+
+ setup_timer(t, mce_timer_fn, cpu);
+ mce_start_timer(cpu, t);
+}
+
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
* check, or 0 to not wait
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
* mce=nobootlog Don't log MCEs from before booting.
+ * mce=bios_cmci_threshold Don't program the CMCI threshold
*/
static int __init mcheck_enable(char *str)
{
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
mce_ignore_ce = 1;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
mce_bootlog = (str[0] == 'b');
+ else if (!strcmp(str, "bios_cmci_threshold"))
+ mce_bios_cmci_threshold = 1;
else if (isdigit(str[0])) {
get_option(&str, &tolerant);
if (*str == ',') {
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
&mce_cmci_disabled
};
+static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
+ __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
+ &mce_bios_cmci_threshold
+};
+
static struct device_attribute *mce_device_attrs[] = {
&dev_attr_tolerant.attr,
&dev_attr_check_interval.attr,
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
&dev_attr_dont_log_ce.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
+ &dev_attr_bios_cmci_threshold.attr,
NULL
};
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
unsigned int cpu = (unsigned long)hcpu;
struct timer_list *t = &per_cpu(mce_timer, cpu);
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
mce_device_create(cpu);
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
break;
case CPU_DEAD:
- case CPU_DEAD_FROZEN:
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
mce_device_remove(cpu);
+ mce_intel_hcpu_update(cpu);
break;
case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- del_timer_sync(t);
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+ del_timer_sync(t);
break;
case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- if (!mce_ignore_ce && check_interval) {
- t->expires = round_jiffies(jiffies +
- per_cpu(mce_next_interval, cpu));
- add_timer_on(t, cpu);
- }
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+ mce_start_timer(cpu, t);
break;
- case CPU_POST_DEAD:
+ }
+
+ if (action == CPU_POST_DEAD) {
/* intentionally ignoring frozen here */
cmci_rediscover(cpu);
- break;
}
+
return NOTIFY_OK;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc9..5f88abf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
#include <asm/msr.h>
#include <asm/mce.h>
+#include "mce-internal.h"
+
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
*/
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
-#define CMCI_THRESHOLD 1
+#define CMCI_THRESHOLD 1
+#define CMCI_POLL_INTERVAL (30 * HZ)
+#define CMCI_STORM_INTERVAL (1 * HZ)
+#define CMCI_STORM_THRESHOLD 15
+
+static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
+
+enum {
+ CMCI_STORM_NONE,
+ CMCI_STORM_ACTIVE,
+ CMCI_STORM_SUBSIDED,
+};
+
+static atomic_t cmci_storm_on_cpus;
static int cmci_supported(int *banks)
{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}
+void mce_intel_cmci_poll(void)
+{
+ if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
+ return;
+ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+}
+
+void mce_intel_hcpu_update(unsigned long cpu)
+{
+ if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
+ atomic_dec(&cmci_storm_on_cpus);
+
+ per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+}
+
+unsigned long mce_intel_adjust_timer(unsigned long interval)
+{
+ int r;
+
+ if (interval < CMCI_POLL_INTERVAL)
+ return interval;
+
+ switch (__this_cpu_read(cmci_storm_state)) {
+ case CMCI_STORM_ACTIVE:
+ /*
+ * We switch back to interrupt mode once the poll timer has
+ * silenced itself. That means no events recorded and the
+ * timer interval is back to our poll interval.
+ */
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
+ r = atomic_sub_return(1, &cmci_storm_on_cpus);
+ if (r == 0)
+ pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+ /* FALLTHROUGH */
+
+ case CMCI_STORM_SUBSIDED:
+ /*
+ * We wait for all cpus to go back to SUBSIDED
+ * state. When that happens we switch back to
+ * interrupt mode.
+ */
+ if (!atomic_read(&cmci_storm_on_cpus)) {
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+ cmci_reenable();
+ cmci_recheck();
+ }
+ return CMCI_POLL_INTERVAL;
+ default:
+ /*
+ * We have shiny weather. Let the poll do whatever it
+ * thinks.
+ */
+ return interval;
+ }
+}
+
+static bool cmci_storm_detect(void)
+{
+ unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
+ unsigned long ts = __this_cpu_read(cmci_time_stamp);
+ unsigned long now = jiffies;
+ int r;
+
+ if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
+ return true;
+
+ if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
+ cnt++;
+ } else {
+ cnt = 1;
+ __this_cpu_write(cmci_time_stamp, now);
+ }
+ __this_cpu_write(cmci_storm_cnt, cnt);
+
+ if (cnt <= CMCI_STORM_THRESHOLD)
+ return false;
+
+ cmci_clear();
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
+ r = atomic_add_return(1, &cmci_storm_on_cpus);
+ mce_timer_kick(CMCI_POLL_INTERVAL);
+
+ if (r == 1)
+ pr_notice("CMCI storm detected: switching to poll mode\n");
+ return true;
+}
+
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
*/
static void intel_threshold_interrupt(void)
{
+ if (cmci_storm_detect())
+ return;
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
-static void print_update(char *type, int *hdr, int num)
-{
- if (*hdr == 0)
- printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
- *hdr = 1;
- printk(KERN_CONT " %s:%d", type, num);
-}
-
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
- int hdr = 0;
int i;
+ int bios_wrong_thresh = 0;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
+ int bios_zero_thresh = 0;
if (test_bit(i, owned))
continue;
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
/* Already owned by someone else? */
if (val & MCI_CTL2_CMCI_EN) {
- if (test_and_clear_bit(i, owned) && !boot)
- print_update("SHD", &hdr, i);
+ clear_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
- val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
- val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
+ if (!mce_bios_cmci_threshold) {
+ val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
+ val |= CMCI_THRESHOLD;
+ } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
+ /*
+ * If bios_cmci_threshold boot option was specified
+ * but the threshold is zero, we'll try to initialize
+ * it to 1.
+ */
+ bios_zero_thresh = 1;
+ val |= CMCI_THRESHOLD;
+ }
+
+ val |= MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & MCI_CTL2_CMCI_EN) {
- if (!test_and_set_bit(i, owned) && !boot)
- print_update("CMCI", &hdr, i);
+ set_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
+ /*
+ * We are able to set thresholds for some banks that
+ * had a threshold of 0. This means the BIOS has not
+ * set the thresholds properly or does not work with
+ * this boot option. Note down now and report later.
+ */
+ if (mce_bios_cmci_threshold && bios_zero_thresh &&
+ (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
+ bios_wrong_thresh = 1;
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
- if (hdr)
- printk(KERN_CONT "\n");
+ if (mce_bios_cmci_threshold && bios_wrong_thresh) {
+ pr_info_once(
+ "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
+ pr_info_once(
+ "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
+ }
}
/*
@@ -156,7 +278,7 @@ void cmci_clear(void)
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
- val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
+ val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
- cmci_discover(banks, 0);
+ cmci_discover(banks);
}
set_cpus_allowed_ptr(current, old);
@@ -200,7 +322,7 @@ void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
- cmci_discover(banks, 0);
+ cmci_discover(banks);
}
static void intel_init_cmci(void)
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
return;
mce_threshold_vector = intel_threshold_interrupt;
- cmci_discover(banks, 1);
+ cmci_discover(banks);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6605a81..8b6defe 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
extern struct event_constraint intel_snb_pebs_event_constraints[];
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 7bfb5be..eebd5ff 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}
+static const struct perf_event_attr ibs_notsupp = {
+ .exclude_user = 1,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .exclude_idle = 1,
+ .exclude_host = 1,
+ .exclude_guest = 1,
+};
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event)
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
+ if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+ return -EINVAL;
+
if (config & ~perf_ibs->config_mask)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7f2739e..6bca492 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
+ case 54: /* Cedariew */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2047,7 +2048,6 @@ __init int intel_pmu_init(void)
case 42: /* SandyBridge */
case 45: /* SandyBridge, "Romely-EP" */
x86_add_quirk(intel_sandybridge_quirk);
- case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2072,6 +2072,29 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
+ case 58: /* IvyBridge */
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+ pr_cont("IvyBridge events, ");
+ break;
+
default:
switch (x86_pmu.version) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e38d97b..826054a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 520b426..da02e9c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
* to have an operational LBR which can freeze
* on PMU interrupt
*/
- if (boot_cpu_data.x86_mask < 10) {
+ if (boot_cpu_data.x86_model == 28
+ && boot_cpu_data.x86_mask < 10) {
pr_cont("LBR disabled due to erratum");
return;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a55710..99d96a4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
+static struct uncore_event_desc snb_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ { /* end: all zeroes */ },
+};
+
static struct attribute *snb_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
.constraints = snb_uncore_cbox_constraints,
.ops = &snb_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
};
static struct intel_uncore_type *snb_msr_uncores[] = {
@@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- static struct intel_uncore_box *box;
+ struct intel_uncore_box *box;
box = *per_cpu_ptr(pmu->box, cpu);
if (box)
@@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
+
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+ .attrs = uncore_pmu_attrs,
+};
+
static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
int ret;
@@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
free_percpu(type->pmus[i].box);
kfree(type->pmus);
type->pmus = NULL;
- kfree(type->attr_groups[1]);
- type->attr_groups[1] = NULL;
+ kfree(type->events_group);
+ type->events_group = NULL;
}
static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
for (j = 0; j < i; j++)
attrs[j] = &type->event_descs[j].attr.attr;
- type->attr_groups[1] = events_group;
+ type->events_group = events_group;
}
+ type->pmu_group = &uncore_pmu_attr_group;
type->pmus = pmus;
return 0;
fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c18..e68a455 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
struct intel_uncore_pmu *pmus;
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
- const struct attribute_group *attr_groups[3];
+ const struct attribute_group *attr_groups[4];
};
-#define format_group attr_groups[0]
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
struct intel_uncore_ops {
void (*init_box)(struct intel_uncore_box *);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 8022c66..fbd8955 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
static void *c_start(struct seq_file *m, loff_t *pos)
{
- if (*pos == 0) /* just in case, cpu 0 is not the first */
- *pos = cpumask_first(cpu_online_mask);
- else
- *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
if ((*pos) < nr_cpu_ids)
return &cpu_data(*pos);
return NULL;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd..60c7891 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
goto out_chrdev;
}
cpuid_class->devnode = cpuid_devnode;
+ get_online_cpus();
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ put_online_cpus();
err = 0;
goto out;
@@ -214,6 +216,7 @@ out_class:
for_each_online_cpu(i) {
cpuid_device_destroy(i);
}
+ put_online_cpus();
class_destroy(cpuid_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
{
int cpu = 0;
+ get_online_cpus();
for_each_online_cpu(cpu)
cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ put_online_cpus();
}
module_init(cpuid_init);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced..b158152 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
.xlate = ioapic_xlate,
};
+static void dt_add_ioapic_domain(unsigned int ioapic_num,
+ struct device_node *np)
+{
+ struct irq_domain *id;
+ struct mp_ioapic_gsi *gsi_cfg;
+ int ret;
+ int num;
+
+ gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
+ num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+ id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
+ (void *)ioapic_num);
+ BUG_ON(!id);
+ if (gsi_cfg->gsi_base == 0) {
+ /*
+ * The first NR_IRQS_LEGACY irq descs are allocated in
+ * early_irq_init() and need just a mapping. The
+ * remaining irqs need both. All of them are preallocated
+ * and assigned so we can keep the 1:1 mapping which the ioapic
+ * is having.
+ */
+ ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
+ if (ret)
+ pr_err("Error mapping legacy IRQs: %d\n", ret);
+
+ if (num > NR_IRQS_LEGACY) {
+ ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
+ NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
+ if (ret)
+ pr_err("Error creating mapping for the "
+ "remaining IRQs: %d\n", ret);
+ }
+ irq_set_default_host(id);
+ } else {
+ ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
+ if (ret)
+ pr_err("Error creating IRQ mapping: %d\n", ret);
+ }
+}
+
static void __init ioapic_add_ofnode(struct device_node *np)
{
struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
for (i = 0; i < nr_ioapics; i++) {
if (r.start == mpc_ioapic_addr(i)) {
- struct irq_domain *id;
- struct mp_ioapic_gsi *gsi_cfg;
-
- gsi_cfg = mp_ioapic_gsi_routing(i);
-
- id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
- &ioapic_irq_domain_ops,
- (void*)i);
- BUG_ON(!id);
+ dt_add_ioapic_domain(i, np);
return;
}
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f288..f438a44 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1109,17 +1109,21 @@ ENTRY(ftrace_caller)
pushl %eax
pushl %ecx
pushl %edx
- movl 0xc(%esp), %eax
+ pushl $0 /* Pass NULL as regs pointer */
+ movl 4*4(%esp), %eax
movl 0x4(%ebp), %edx
+ leal function_trace_op, %ecx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
ftrace_call:
call ftrace_stub
+ addl $4,%esp /* skip NULL pointer */
popl %edx
popl %ecx
popl %eax
+ftrace_ret:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
@@ -1131,6 +1135,71 @@ ftrace_stub:
ret
END(ftrace_caller)
+ENTRY(ftrace_regs_caller)
+ pushf /* push flags before compare (in cs location) */
+ cmpl $0, function_trace_stop
+ jne ftrace_restore_flags
+
+ /*
+ * i386 does not save SS and ESP when coming from kernel.
+ * Instead, to get sp, &regs->sp is used (see ptrace.h).
+ * Unfortunately, that means eflags must be at the same location
+ * as the current return ip is. We move the return ip into the
+ * ip location, and move flags into the return ip location.
+ */
+ pushl 4(%esp) /* save return ip into ip slot */
+
+ pushl $0 /* Load 0 into orig_ax */
+ pushl %gs
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+
+ movl 13*4(%esp), %eax /* Get the saved flags */
+ movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
+ /* clobbering return ip */
+ movl $__KERNEL_CS,13*4(%esp)
+
+ movl 12*4(%esp), %eax /* Load ip (1st parameter) */
+ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
+ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
+ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+ pushl %esp /* Save pt_regs as 4th parameter */
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ addl $4, %esp /* Skip pt_regs */
+ movl 14*4(%esp), %eax /* Move flags back into cs */
+ movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
+ movl 12*4(%esp), %eax /* Get return ip from regs->ip */
+ movl %eax, 14*4(%esp) /* Put return ip back for ret */
+
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ addl $8, %esp /* Skip orig_ax and ip */
+ popf /* Pop flags at end (no addl to corrupt flags) */
+ jmp ftrace_ret
+
+ftrace_restore_flags:
+ popf
+ jmp ftrace_stub
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
@@ -1171,9 +1240,6 @@ END(mcount)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
pushl %eax
pushl %ecx
pushl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8..066334b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,7 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
+#include <asm/rcu.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -68,25 +69,51 @@
.section .entry.text, "ax"
#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook __fentry__
+#else
+# define function_hook mcount
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
retq
-END(mcount)
+END(function_hook)
+
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+ MCOUNT_SAVE_FRAME \skip
+
+ /* Load the ftrace_ops into the 3rd parameter */
+ leaq function_trace_op, %rdx
+
+ /* Load ip into the first parameter */
+ movq RIP(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+ /* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
+ movq 8(%rbp), %rsi
+#endif
+.endm
ENTRY(ftrace_caller)
+ /* Check if tracing was disabled (quick check) */
cmpl $0, function_trace_stop
jne ftrace_stub
- MCOUNT_SAVE_FRAME
-
- movq 0x38(%rsp), %rdi
- movq 8(%rbp), %rsi
- subq $MCOUNT_INSN_SIZE, %rdi
+ ftrace_caller_setup
+ /* regs go into 4th parameter (but make it NULL) */
+ movq $0, %rcx
GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
+ftrace_return:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -97,8 +124,78 @@ GLOBAL(ftrace_stub)
retq
END(ftrace_caller)
+ENTRY(ftrace_regs_caller)
+ /* Save the current flags before compare (in SS location)*/
+ pushfq
+
+ /* Check if tracing was disabled (quick check) */
+ cmpl $0, function_trace_stop
+ jne ftrace_restore_flags
+
+ /* skip=8 to skip flags saved in SS */
+ ftrace_caller_setup 8
+
+ /* Save the rest of pt_regs */
+ movq %r15, R15(%rsp)
+ movq %r14, R14(%rsp)
+ movq %r13, R13(%rsp)
+ movq %r12, R12(%rsp)
+ movq %r11, R11(%rsp)
+ movq %r10, R10(%rsp)
+ movq %rbp, RBP(%rsp)
+ movq %rbx, RBX(%rsp)
+ /* Copy saved flags */
+ movq SS(%rsp), %rcx
+ movq %rcx, EFLAGS(%rsp)
+ /* Kernel segments */
+ movq $__KERNEL_DS, %rcx
+ movq %rcx, SS(%rsp)
+ movq $__KERNEL_CS, %rcx
+ movq %rcx, CS(%rsp)
+ /* Stack - skipping return address */
+ leaq SS+16(%rsp), %rcx
+ movq %rcx, RSP(%rsp)
+
+ /* regs go into 4th parameter */
+ leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ /* Copy flags back to SS, to restore them */
+ movq EFLAGS(%rsp), %rax
+ movq %rax, SS(%rsp)
+
+ /* Handlers can change the RIP */
+ movq RIP(%rsp), %rax
+ movq %rax, SS+8(%rsp)
+
+ /* restore the rest of pt_regs */
+ movq R15(%rsp), %r15
+ movq R14(%rsp), %r14
+ movq R13(%rsp), %r13
+ movq R12(%rsp), %r12
+ movq R10(%rsp), %r10
+ movq RBP(%rsp), %rbp
+ movq RBX(%rsp), %rbx
+
+ /* skip=8 to skip flags saved in SS */
+ MCOUNT_RESTORE_FRAME 8
+
+ /* Restore flags */
+ popfq
+
+ jmp ftrace_return
+ftrace_restore_flags:
+ popfq
+ jmp ftrace_stub
+
+END(ftrace_regs_caller)
+
+
#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub
@@ -119,8 +216,12 @@ GLOBAL(ftrace_stub)
trace:
MCOUNT_SAVE_FRAME
- movq 0x38(%rsp), %rdi
+ movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
subq $MCOUNT_INSN_SIZE, %rdi
call *ftrace_trace_function
@@ -128,20 +229,22 @@ trace:
MCOUNT_RESTORE_FRAME
jmp ftrace_stub
-END(mcount)
+END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
MCOUNT_SAVE_FRAME
+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
leaq 8(%rbp), %rdi
- movq 0x38(%rsp), %rsi
movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
@@ -342,15 +445,15 @@ ENDPROC(native_usergs_sysret64)
.macro SAVE_ARGS_IRQ
cld
/* start from rbp in pt_regs and jump over */
- movq_cfi rdi, RDI-RBP
- movq_cfi rsi, RSI-RBP
- movq_cfi rdx, RDX-RBP
- movq_cfi rcx, RCX-RBP
- movq_cfi rax, RAX-RBP
- movq_cfi r8, R8-RBP
- movq_cfi r9, R9-RBP
- movq_cfi r10, R10-RBP
- movq_cfi r11, R11-RBP
+ movq_cfi rdi, (RDI-RBP)
+ movq_cfi rsi, (RSI-RBP)
+ movq_cfi rdx, (RDX-RBP)
+ movq_cfi rcx, (RCX-RBP)
+ movq_cfi rax, (RAX-RBP)
+ movq_cfi r8, (R8-RBP)
+ movq_cfi r9, (R9-RBP)
+ movq_cfi r10, (R10-RBP)
+ movq_cfi r11, (R11-RBP)
/* Save rbp so that we can unwind from get_irq_regs() */
movq_cfi rbp, 0
@@ -384,7 +487,7 @@ ENDPROC(native_usergs_sysret64)
.endm
ENTRY(save_rest)
- PARTIAL_FRAME 1 REST_SKIP+8
+ PARTIAL_FRAME 1 (REST_SKIP+8)
movq 5*8+16(%rsp), %r11 /* save return address */
movq_cfi rbx, RBX+16
movq_cfi rbp, RBP+16
@@ -440,7 +543,7 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK,TI_flags(%r8)
- pushq_cfi kernel_eflags(%rip)
+ pushq_cfi $0x0002
popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
@@ -565,7 +668,7 @@ sysret_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
jmp sysret_check
@@ -678,7 +781,7 @@ int_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -974,7 +1077,7 @@ retint_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1449,7 +1552,7 @@ paranoid_userspace:
paranoid_schedule:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
- call schedule
+ SCHEDULE_USER
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
jmp paranoid_userspace
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4..1d41402 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,21 @@ static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code);
+/*
+ * Should never be called:
+ * As it is only called by __ftrace_replace_code() which is called by
+ * ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
+ * which is called to turn mcount into nops or nops into function calls
+ * but not to convert a function from not using regs to one that uses
+ * regs, which ftrace_modify_call() is for.
+ */
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ WARN_ON(1);
+ return -EINVAL;
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
ret = ftrace_modify_code(ip, old, new);
+ /* Also update the regs callback function */
+ if (!ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+ }
+
atomic_dec(&modifying_ftrace_code);
return ret;
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
return add_break(rec->ip, old);
}
+/*
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ */
+static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_REGS)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
+}
+
+/*
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ */
+static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_REGS_EN)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
+}
+
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
{
unsigned long ftrace_addr;
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
ret = ftrace_test_record(rec, enable);
- ftrace_addr = (unsigned long)FTRACE_ADDR;
+ ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
/* converting nop to call */
return add_brk_on_nop(rec);
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
+ ftrace_addr = get_ftrace_old_addr(rec);
+ /* fall through */
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
* If not, don't touch the breakpoint, we make just create
* a disaster.
*/
- ftrace_addr = (unsigned long)FTRACE_ADDR;
+ ftrace_addr = get_ftrace_addr(rec);
+ nop = ftrace_call_replace(ip, ftrace_addr);
+
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
+ goto update;
+
+ /* Check both ftrace_addr and ftrace_old_addr */
+ ftrace_addr = get_ftrace_old_addr(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
return -EINVAL;
}
+ update:
return probe_kernel_write((void *)ip, &nop[0], 1);
}
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_test_record(rec, enable);
- ftrace_addr = (unsigned long)FTRACE_ADDR;
+ ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return add_update_call(rec, ftrace_addr);
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_update_record(rec, enable);
- ftrace_addr = (unsigned long)FTRACE_ADDR;
+ ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return finish_update_call(rec, ftrace_addr);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f250431..675a050 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -19,24 +19,17 @@
#include <asm/fpu-internal.h>
#include <asm/user.h>
-#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
-# include <asm/user32.h>
-#else
-# define save_i387_xstate_ia32 save_i387_xstate
-# define restore_i387_xstate_ia32 restore_i387_xstate
-# define _fpstate_ia32 _fpstate
-# define _xstate_ia32 _xstate
-# define sig_xstate_ia32_size sig_xstate_size
-# define fx_sw_reserved_ia32 fx_sw_reserved
-# define user_i387_ia32_struct user_i387_struct
-# define user32_fxsr_struct user_fxsr_struct
-#endif
-
/*
* Were we in an interrupt that interrupted kernel mode?
*
- * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * For now, with eagerfpu we will return interrupted kernel FPU
+ * state as not-idle. TBD: Ideally we can change the return value
+ * to something like __thread_has_fpu(current). But we need to
+ * be careful of doing __thread_clear_has_fpu() before saving
+ * the FPU etc for supporting nested uses etc. For now, take
+ * the simple route!
+ *
+ * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
* pair does nothing at all: the thread must not have fpu (so
* that we don't try to save the FPU state), and TS must
* be set (so that the clts/stts pair does nothing that is
@@ -44,6 +37,9 @@
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
+ if (use_eager_fpu())
+ return 0;
+
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
}
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
}
EXPORT_SYMBOL(irq_fpu_usable);
-void kernel_fpu_begin(void)
+void __kernel_fpu_begin(void)
{
struct task_struct *me = current;
- WARN_ON_ONCE(!irq_fpu_usable());
- preempt_disable();
if (__thread_has_fpu(me)) {
__save_init_fpu(me);
__thread_clear_has_fpu(me);
- /* We do 'stts()' in kernel_fpu_end() */
- } else {
+ /* We do 'stts()' in __kernel_fpu_end() */
+ } else if (!use_eager_fpu()) {
this_cpu_write(fpu_owner_task, NULL);
clts();
}
}
-EXPORT_SYMBOL(kernel_fpu_begin);
+EXPORT_SYMBOL(__kernel_fpu_begin);
-void kernel_fpu_end(void)
+void __kernel_fpu_end(void)
{
- stts();
- preempt_enable();
+ if (use_eager_fpu())
+ math_state_restore();
+ else
+ stts();
}
-EXPORT_SYMBOL(kernel_fpu_end);
+EXPORT_SYMBOL(__kernel_fpu_end);
void unlazy_fpu(struct task_struct *tsk)
{
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
}
EXPORT_SYMBOL(unlazy_fpu);
-#ifdef CONFIG_MATH_EMULATION
-# define HAVE_HWFP (boot_cpu_data.hard_math)
-#else
-# define HAVE_HWFP 1
-#endif
-
-static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
-unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
static void __cpuinit mxcsr_feature_mask_init(void)
{
unsigned long mask = 0;
- clts();
if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
mask = 0x0000ffbf;
}
mxcsr_feature_mask &= mask;
- stts();
}
static void __cpuinit init_thread_xstate(void)
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
init_thread_xstate();
mxcsr_feature_mask_init();
- /* clean state in init */
- current_thread_info()->status = 0;
- clear_used_math();
+ xsave_init();
+ eager_fpu_init();
}
void fpu_finit(struct fpu *fpu)
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
}
if (cpu_has_fxsr) {
- struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
- memset(fx, 0, xstate_size);
- fx->cwd = 0x37f;
- if (cpu_has_xmm)
- fx->mxcsr = MXCSR_DEFAULT;
+ fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
memset(fp, 0, xstate_size);
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
* FXSR floating point environment conversions.
*/
-static void
+void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0]));
}
-static void convert_to_fxsr(struct task_struct *tsk,
- const struct user_i387_ia32_struct *env)
+void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env)
{
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
}
/*
- * Signal frame handlers.
- */
-
-static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
-{
- struct task_struct *tsk = current;
- struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
-
- fp->status = fp->swd;
- if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
- return -1;
- return 1;
-}
-
-static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
-{
- struct task_struct *tsk = current;
- struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
- struct user_i387_ia32_struct env;
- int err = 0;
-
- convert_from_fxsr(&env, tsk);
- if (__copy_to_user(buf, &env, sizeof(env)))
- return -1;
-
- err |= __put_user(fx->swd, &buf->status);
- err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
- if (err)
- return -1;
-
- if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
- return -1;
- return 1;
-}
-
-static int save_i387_xsave(void __user *buf)
-{
- struct task_struct *tsk = current;
- struct _fpstate_ia32 __user *fx = buf;
- int err = 0;
-
-
- sanitize_i387_state(tsk);
-
- /*
- * For legacy compatible, we always set FP/SSE bits in the bit
- * vector while saving the state to the user context.
- * This will enable us capturing any changes(during sigreturn) to
- * the FP/SSE bits by the legacy applications which don't touch
- * xstate_bv in the xsave header.
- *
- * xsave aware applications can change the xstate_bv in the xsave
- * header as well as change any contents in the memory layout.
- * xrestore as part of sigreturn will capture all the changes.
- */
- tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
-
- if (save_i387_fxsave(fx) < 0)
- return -1;
-
- err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
- sizeof(struct _fpx_sw_bytes));
- err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 __user *) (buf + sig_xstate_ia32_size
- - FP_XSTATE_MAGIC2_SIZE));
- if (err)
- return -1;
-
- return 1;
-}
-
-int save_i387_xstate_ia32(void __user *buf)
-{
- struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
- struct task_struct *tsk = current;
-
- if (!used_math())
- return 0;
-
- if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
- return -EACCES;
- /*
- * This will cause a "finit" to be triggered by the next
- * attempted FPU operation by the 'current' process.
- */
- clear_used_math();
-
- if (!HAVE_HWFP) {
- return fpregs_soft_get(current, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- NULL, fp) ? -1 : 1;
- }
-
- unlazy_fpu(tsk);
-
- if (cpu_has_xsave)
- return save_i387_xsave(fp);
- if (cpu_has_fxsr)
- return save_i387_fxsave(fp);
- else
- return save_i387_fsave(fp);
-}
-
-static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
-{
- struct task_struct *tsk = current;
-
- return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
- sizeof(struct i387_fsave_struct));
-}
-
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
- unsigned int size)
-{
- struct task_struct *tsk = current;
- struct user_i387_ia32_struct env;
- int err;
-
- err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
- size);
- /* mxcsr reserved bits must be masked to zero for security reasons */
- tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
- if (err || __copy_from_user(&env, buf, sizeof(env)))
- return 1;
- convert_to_fxsr(tsk, &env);
-
- return 0;
-}
-
-static int restore_i387_xsave(void __user *buf)
-{
- struct _fpx_sw_bytes fx_sw_user;
- struct _fpstate_ia32 __user *fx_user =
- ((struct _fpstate_ia32 __user *) buf);
- struct i387_fxsave_struct __user *fx =
- (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
- struct xsave_hdr_struct *xsave_hdr =
- &current->thread.fpu.state->xsave.xsave_hdr;
- u64 mask;
- int err;
-
- if (check_for_xstate(fx, buf, &fx_sw_user))
- goto fx_only;
-
- mask = fx_sw_user.xstate_bv;
-
- err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
-
- xsave_hdr->xstate_bv &= pcntxt_mask;
- /*
- * These bits must be zero.
- */
- xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
-
- /*
- * Init the state that is not present in the memory layout
- * and enabled by the OS.
- */
- mask = ~(pcntxt_mask & ~mask);
- xsave_hdr->xstate_bv &= mask;
-
- return err;
-fx_only:
- /*
- * Couldn't find the extended state information in the memory
- * layout. Restore the FP/SSE and init the other extended state
- * enabled by the OS.
- */
- xsave_hdr->xstate_bv = XSTATE_FPSSE;
- return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
-}
-
-int restore_i387_xstate_ia32(void __user *buf)
-{
- int err;
- struct task_struct *tsk = current;
- struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
-
- if (HAVE_HWFP)
- clear_fpu(tsk);
-
- if (!buf) {
- if (used_math()) {
- clear_fpu(tsk);
- clear_used_math();
- }
-
- return 0;
- } else
- if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
- return -EACCES;
-
- if (!used_math()) {
- err = init_fpu(tsk);
- if (err)
- return err;
- }
-
- if (HAVE_HWFP) {
- if (cpu_has_xsave)
- err = restore_i387_xsave(buf);
- else if (cpu_has_fxsr)
- err = restore_i387_fxsave(fp, sizeof(struct
- i387_fxsave_struct));
- else
- err = restore_i387_fsave(fp);
- } else {
- err = fpregs_soft_set(current, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- NULL, fp) != 0;
- }
- set_used_math();
-
- return err;
-}
-
-/*
* FPU state for core dumps.
* This is only used for a.out dumps now.
* It is declared generically using elf_fpregset_t (which is
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 36d1853..9a5c460 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
* out of.
*/
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
- outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
}
static struct syscore_ops i8259_syscore_ops = {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d44f782..e4595f1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
+ irq_stats(j)->irq_tlb_count);
seq_printf(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
- sum += irq_stats(cpu)->irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751e..57916c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
+#ifdef KPROBES_CAN_USE_FTRACE
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+}
+#endif
+
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
+#ifdef KPROBES_CAN_USE_FTRACE
+ if (kprobe_ftrace(p)) {
+ skip_singlestep(p, regs, kcb);
+ return 1;
+ }
+#endif
setup_singlestep(p, regs, kcb, 0);
return 1;
}
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+ regs->ip = ip + sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ skip_singlestep(p, regs, kcb);
+ /*
+ * If pre_handler returns !0, it sets regs->ip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 82746f9..7720ff5 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,20 +75,113 @@ struct microcode_amd {
static struct equiv_cpu_entry *equiv_cpu_table;
-/* page-sized ucode patch buffer */
-void *patch;
+struct ucode_patch {
+ struct list_head plist;
+ void *data;
+ u32 patch_id;
+ u16 equiv_cpu;
+};
+
+static LIST_HEAD(pcache);
+
+static u16 find_equiv_id(unsigned int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ int i = 0;
+
+ if (!equiv_cpu_table)
+ return 0;
+
+ while (equiv_cpu_table[i].installed_cpu != 0) {
+ if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
+ return equiv_cpu_table[i].equiv_cpu;
+
+ i++;
+ }
+ return 0;
+}
+
+static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
+{
+ int i = 0;
+
+ BUG_ON(!equiv_cpu_table);
+
+ while (equiv_cpu_table[i].equiv_cpu != 0) {
+ if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
+ return equiv_cpu_table[i].installed_cpu;
+ i++;
+ }
+ return 0;
+}
+
+/*
+ * a small, trivial cache of per-family ucode patches
+ */
+static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
+{
+ struct ucode_patch *p;
+
+ list_for_each_entry(p, &pcache, plist)
+ if (p->equiv_cpu == equiv_cpu)
+ return p;
+ return NULL;
+}
+
+static void update_cache(struct ucode_patch *new_patch)
+{
+ struct ucode_patch *p;
+
+ list_for_each_entry(p, &pcache, plist) {
+ if (p->equiv_cpu == new_patch->equiv_cpu) {
+ if (p->patch_id >= new_patch->patch_id)
+ /* we already have the latest patch */
+ return;
+
+ list_replace(&p->plist, &new_patch->plist);
+ kfree(p->data);
+ kfree(p);
+ return;
+ }
+ }
+ /* no patch found, add it */
+ list_add_tail(&new_patch->plist, &pcache);
+}
+
+static void free_cache(void)
+{
+ struct ucode_patch *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &pcache, plist) {
+ __list_del(p->plist.prev, p->plist.next);
+ kfree(p->data);
+ kfree(p);
+ }
+}
+
+static struct ucode_patch *find_patch(unsigned int cpu)
+{
+ u16 equiv_id;
+
+ equiv_id = find_equiv_id(cpu);
+ if (!equiv_id)
+ return NULL;
+
+ return cache_find_patch(equiv_id);
+}
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ csig->sig = cpuid_eax(0x00000001);
csig->rev = c->microcode;
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
return 0;
}
-static unsigned int verify_ucode_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(int cpu, u32 patch_size,
unsigned int size)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
return patch_size;
}
-static u16 find_equiv_id(void)
+static int apply_microcode_amd(int cpu)
{
- unsigned int current_cpu_id, i = 0;
-
- BUG_ON(equiv_cpu_table == NULL);
-
- current_cpu_id = cpuid_eax(0x00000001);
-
- while (equiv_cpu_table[i].installed_cpu != 0) {
- if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
- i++;
- }
- return 0;
-}
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct microcode_amd *mc_amd;
+ struct ucode_cpu_info *uci;
+ struct ucode_patch *p;
+ u32 rev, dummy;
-/*
- * we signal a good patch is found by returning its size > 0
- */
-static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
- unsigned int leftover_size, int rev,
- unsigned int *current_size)
-{
- struct microcode_header_amd *mc_hdr;
- unsigned int actual_size, patch_size;
- u16 equiv_cpu_id;
+ BUG_ON(raw_smp_processor_id() != cpu);
- /* size of the current patch we're staring at */
- patch_size = *(u32 *)(ucode_ptr + 4);
- *current_size = patch_size + SECTION_HDR_SIZE;
+ uci = ucode_cpu_info + cpu;
- equiv_cpu_id = find_equiv_id();
- if (!equiv_cpu_id)
+ p = find_patch(cpu);
+ if (!p)
return 0;
- /*
- * let's look at the patch header itself now
- */
- mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
+ mc_amd = p->data;
+ uci->mc = p->data;
- if (mc_hdr->processor_rev_id != equiv_cpu_id)
- return 0;
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- /* ucode might be chipset specific -- currently we don't support this */
- if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
- pr_err("CPU%d: chipset specific code not yet supported\n",
- cpu);
+ /* need to apply patch? */
+ if (rev >= mc_amd->hdr.patch_id) {
+ c->microcode = rev;
return 0;
}
- if (mc_hdr->patch_id <= rev)
- return 0;
-
- /*
- * now that the header looks sane, verify its size
- */
- actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
- if (!actual_size)
- return 0;
-
- /* clear the patch buffer */
- memset(patch, 0, PAGE_SIZE);
-
- /* all looks ok, get the binary patch */
- get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
-
- return actual_size;
-}
-
-static int apply_microcode_amd(int cpu)
-{
- u32 rev, dummy;
- int cpu_num = raw_smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- struct microcode_amd *mc_amd = uci->mc;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- /* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
-
- if (mc_amd == NULL)
- return 0;
-
wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
- /* get patch id after patching */
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- /* check current patch id and patch's id for match */
+ /* verify patch application was successful */
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
if (rev != mc_amd->hdr.patch_id) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf)
return -ENOMEM;
}
- get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
+ memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
/* add header length */
return size + CONTAINER_HDR_SZ;
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void)
equiv_cpu_table = NULL;
}
-static enum ucode_state
-generic_load_microcode(int cpu, const u8 *data, size_t size)
+static void cleanup(void)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- struct microcode_header_amd *mc_hdr = NULL;
- unsigned int mc_size, leftover, current_size = 0;
+ free_equiv_cpu_table();
+ free_cache();
+}
+
+/*
+ * We return the current size even if some of the checks failed so that
+ * we can skip over the next patch. If we return a negative value, we
+ * signal a grave error like a memory allocation has failed and the
+ * driver cannot continue functioning normally. In such cases, we tear
+ * down everything we've used up so far and exit.
+ */
+static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct microcode_header_amd *mc_hdr;
+ struct ucode_patch *patch;
+ unsigned int patch_size, crnt_size, ret;
+ u32 proc_fam;
+ u16 proc_id;
+
+ patch_size = *(u32 *)(fw + 4);
+ crnt_size = patch_size + SECTION_HDR_SIZE;
+ mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+ proc_id = mc_hdr->processor_rev_id;
+
+ proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
+ if (!proc_fam) {
+ pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
+ return crnt_size;
+ }
+
+ /* check if patch is for the current family */
+ proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
+ if (proc_fam != c->x86)
+ return crnt_size;
+
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
+ mc_hdr->patch_id);
+ return crnt_size;
+ }
+
+ ret = verify_patch_size(cpu, patch_size, leftover);
+ if (!ret) {
+ pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
+ return crnt_size;
+ }
+
+ patch = kzalloc(sizeof(*patch), GFP_KERNEL);
+ if (!patch) {
+ pr_err("Patch allocation failure.\n");
+ return -EINVAL;
+ }
+
+ patch->data = kzalloc(patch_size, GFP_KERNEL);
+ if (!patch->data) {
+ pr_err("Patch data allocation failure.\n");
+ kfree(patch);
+ return -EINVAL;
+ }
+
+ /* All looks ok, copy patch... */
+ memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
+ INIT_LIST_HEAD(&patch->plist);
+ patch->patch_id = mc_hdr->patch_id;
+ patch->equiv_cpu = proc_id;
+
+ /* ... and add to cache. */
+ update_cache(patch);
+
+ return crnt_size;
+}
+
+static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+{
+ enum ucode_state ret = UCODE_ERROR;
+ unsigned int leftover;
+ u8 *fw = (u8 *)data;
+ int crnt_size = 0;
int offset;
- const u8 *ucode_ptr = data;
- void *new_mc = NULL;
- unsigned int new_rev = uci->cpu_sig.rev;
- enum ucode_state state = UCODE_ERROR;
- offset = install_equiv_cpu_table(ucode_ptr);
+ offset = install_equiv_cpu_table(data);
if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
- goto out;
+ return ret;
}
- ucode_ptr += offset;
+ fw += offset;
leftover = size - offset;
- if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
+ if (*(u32 *)fw != UCODE_UCODE_TYPE) {
pr_err("invalid type field in container file section header\n");
- goto free_table;
+ free_equiv_cpu_table();
+ return ret;
}
while (leftover) {
- mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
- new_rev, &current_size);
- if (mc_size) {
- mc_hdr = patch;
- new_mc = patch;
- new_rev = mc_hdr->patch_id;
- goto out_ok;
- }
-
- ucode_ptr += current_size;
- leftover -= current_size;
- }
+ crnt_size = verify_and_add_patch(cpu, fw, leftover);
+ if (crnt_size < 0)
+ return ret;
- if (!new_mc) {
- state = UCODE_NFOUND;
- goto free_table;
+ fw += crnt_size;
+ leftover -= crnt_size;
}
-out_ok:
- uci->mc = new_mc;
- state = UCODE_OK;
- pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
- cpu, uci->cpu_sig.rev, new_rev);
-
-free_table:
- free_equiv_cpu_table();
-
-out:
- return state;
+ return UCODE_OK;
}
/*
@@ -315,7 +402,7 @@ out:
*
* This legacy file is always smaller than 2K in size.
*
- * Starting at family 15h they are in family specific firmware files:
+ * Beginning with family 15h, they are in family-specific firmware files:
*
* amd-ucode/microcode_amd_fam15h.bin
* amd-ucode/microcode_amd_fam16h.bin
@@ -323,12 +410,17 @@ out:
*
* These might be larger than 2K.
*/
-static enum ucode_state request_microcode_amd(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+ bool refresh_fw)
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
- const struct firmware *fw;
- enum ucode_state ret = UCODE_NFOUND;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ enum ucode_state ret = UCODE_NFOUND;
+ const struct firmware *fw;
+
+ /* reload ucode container only on the boot cpu */
+ if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+ return UCODE_OK;
if (c->x86 >= 0x15)
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
goto fw_release;
}
- ret = generic_load_microcode(cpu, fw->data, fw->size);
+ /* free old equiv table */
+ free_equiv_cpu_table();
+
+ ret = load_microcode_amd(cpu, fw->data, fw->size);
+ if (ret != UCODE_OK)
+ cleanup();
-fw_release:
+ fw_release:
release_firmware(fw);
-out:
+ out:
return ret;
}
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
return NULL;
}
- patch = (void *)get_zeroed_page(GFP_KERNEL);
- if (!patch)
- return NULL;
-
return &microcode_amd_ops;
}
void __exit exit_amd_microcode(void)
{
- free_page((unsigned long)patch);
+ cleanup();
}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62..3a04b22 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
if (do_microcode_update(buf, len) == 0)
ret = (ssize_t)len;
+ if (ret > 0)
+ perf_check_microcode();
+
mutex_unlock(&microcode_mutex);
put_online_cpus();
@@ -276,19 +279,18 @@ static struct platform_device *microcode_pdev;
static int reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ enum ucode_state ustate;
int err = 0;
- if (uci->valid) {
- enum ucode_state ustate;
-
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
- if (ustate == UCODE_OK)
- apply_microcode_on_target(cpu);
- else
- if (ustate == UCODE_ERROR)
- err = -EINVAL;
- }
+ if (!uci->valid)
+ return err;
+ ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
+ if (ustate == UCODE_OK)
+ apply_microcode_on_target(cpu);
+ else
+ if (ustate == UCODE_ERROR)
+ err = -EINVAL;
return err;
}
@@ -370,18 +372,15 @@ static void microcode_fini_cpu(int cpu)
static enum ucode_state microcode_resume_cpu(int cpu)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- if (!uci->mc)
- return UCODE_NFOUND;
-
pr_debug("CPU%d updated upon resume\n", cpu);
- apply_microcode_on_target(cpu);
+
+ if (apply_microcode_on_target(cpu))
+ return UCODE_ERROR;
return UCODE_OK;
}
-static enum ucode_state microcode_init_cpu(int cpu)
+static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
{
enum ucode_state ustate;
@@ -392,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
if (system_state != SYSTEM_RUNNING)
return UCODE_NFOUND;
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+ ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
+ refresh_fw);
if (ustate == UCODE_OK) {
pr_debug("CPU%d updated upon init\n", cpu);
@@ -405,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
static enum ucode_state microcode_update_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- enum ucode_state ustate;
if (uci->valid)
- ustate = microcode_resume_cpu(cpu);
- else
- ustate = microcode_init_cpu(cpu);
+ return microcode_resume_cpu(cpu);
- return ustate;
+ return microcode_init_cpu(cpu, false);
}
static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -428,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR)
+ if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
return -EINVAL;
return err;
@@ -477,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
struct device *dev;
dev = get_cpu_device(cpu);
- switch (action) {
+
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
microcode_update_cpu(cpu);
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
pr_debug("CPU%d added\n", cpu);
+ /*
+ * "break" is missing on purpose here because we want to fall
+ * through in order to create the sysfs group.
+ */
+
+ case CPU_DOWN_FAILED:
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
break;
+
case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&dev->kobj, &mc_attr_group);
pr_debug("CPU%d removed\n", cpu);
break;
/*
+ * case CPU_DEAD:
+ *
* When a CPU goes offline, don't free up or invalidate the copy of
* the microcode in kernel memory, so that we can reuse it when the
* CPU comes back online without unnecessarily requesting the userspace
* for it again.
*/
- case CPU_UP_CANCELED_FROZEN:
- /* The CPU refused to come up during a system resume */
- microcode_fini_cpu(cpu);
- break;
}
+
+ /* The CPU refused to come up during a system resume */
+ if (action == CPU_UP_CANCELED_FROZEN)
+ microcode_fini_cpu(cpu);
+
return NOTIFY_OK;
}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0327e2b..3544aed 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
return 0;
}
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device,
+ bool refresh_fw)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb11369..a7c5661 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
goto out_chrdev;
}
msr_class->devnode = msr_devnode;
+ get_online_cpus();
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&msr_class_cpu_notifier);
+ put_online_cpus();
err = 0;
goto out;
@@ -271,6 +273,7 @@ out_class:
i = 0;
for_each_online_cpu(i)
msr_device_destroy(i);
+ put_online_cpus();
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
static void __exit msr_exit(void)
{
int cpu = 0;
+ get_online_cpus();
for_each_online_cpu(cpu)
msr_device_destroy(cpu);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+ put_online_cpus();
}
module_init(msr_init);
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 0000000..e309cc5
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,105 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_X86_32
+#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
+#else
+#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
+#endif
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
+ PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
+ PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
+ PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
+ PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
+ PT_REGS_OFFSET(PERF_REG_X86_SI, si),
+ PT_REGS_OFFSET(PERF_REG_X86_DI, di),
+ PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
+ PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
+ PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
+ PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
+ PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
+ PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
+#ifdef CONFIG_X86_32
+ PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
+ PT_REGS_OFFSET(PERF_REG_X86_ES, es),
+ PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
+ PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
+#else
+ /*
+ * The pt_regs struct does not store
+ * ds, es, fs, gs in 64 bit mode.
+ */
+ (unsigned int) -1,
+ (unsigned int) -1,
+ (unsigned int) -1,
+ (unsigned int) -1,
+#endif
+#ifdef CONFIG_X86_64
+ PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
+ PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
+ PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
+ PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
+ PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
+ PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
+ PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
+ PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
+#endif
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
+ return 0;
+
+ return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
+
+#ifdef CONFIG_X86_32
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ return PERF_SAMPLE_REGS_ABI_32;
+}
+#else /* CONFIG_X86_64 */
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+ (1ULL << PERF_REG_X86_ES) | \
+ (1ULL << PERF_REG_X86_FS) | \
+ (1ULL << PERF_REG_X86_GS))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ if (mask & REG_NOSUPPORT)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_IA32))
+ return PERF_SAMPLE_REGS_ABI_32;
+ else
+ return PERF_SAMPLE_REGS_ABI_64;
+}
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 0bc72e2..d5f15c3 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
return oprom;
}
-void *pci_map_biosrom(struct pci_dev *pdev)
+void __iomem *pci_map_biosrom(struct pci_dev *pdev)
{
struct resource *oprom = find_oprom(pdev);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a845..dc3567e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
int ret;
- unlazy_fpu(src);
-
*dst = *src;
if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
ret = fpu_alloc(&dst->thread.fpu);
if (ret)
return ret;
- fpu_copy(&dst->thread.fpu, &src->thread.fpu);
+ fpu_copy(dst, src);
}
return 0;
}
@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
SLAB_PANIC | SLAB_NOTRACK, NULL);
}
-static inline void drop_fpu(struct task_struct *tsk)
-{
- /*
- * Forget coprocessor state..
- */
- tsk->fpu_counter = 0;
- clear_fpu(tsk);
- clear_used_math();
-}
-
/*
* Free current thread data structures etc..
*/
@@ -163,7 +151,13 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- drop_fpu(tsk);
+ drop_init_fpu(tsk);
+ /*
+ * Free the FPU state for non xsave platforms. They get reallocated
+ * lazily at the first use.
+ */
+ if (!use_eager_fpu())
+ free_thread_xstate(tsk);
}
static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa18..b9ff83c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
- /*
- * Free the old FP and other extended state
- */
- free_thread_xstate(current);
}
EXPORT_SYMBOL_GPL(start_thread);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9..8a6d20c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
- /*
- * Free the old FP and other extended state
- */
- free_thread_xstate(current);
}
void
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c..b00b33a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
#define genregs32_get genregs_get
#define genregs32_set genregs_set
-#define user_i387_ia32_struct user_i387_struct
-#define user32_fxsr_struct user_fxsr_struct
-
#endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
+ rcu_user_exit();
+
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs)
!test_thread_flag(TIF_SYSCALL_EMU);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
+
+ rcu_user_enter();
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80..4f16547 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif
- x86_init.paging.pagetable_setup_start(swapper_pg_dir);
- paging_init();
- x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ x86_init.paging.pagetable_init();
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908..3160c26 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
+ err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate)
{
/* Default to using normal stack */
+ unsigned long math_size = 0;
unsigned long sp = regs->sp;
+ unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
-#ifdef CONFIG_X86_64
/* redzone */
- sp -= 128;
-#endif /* CONFIG_X86_64 */
+ if (config_enabled(CONFIG_X86_64))
+ sp -= 128;
if (!onsigstack) {
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (current->sas_ss_size)
sp = current->sas_ss_sp + current->sas_ss_size;
- } else {
-#ifdef CONFIG_X86_32
- /* This is the legacy signal stack switching. */
- if ((regs->ss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer)
+ } else if (config_enabled(CONFIG_X86_32) &&
+ (regs->ss & 0xffff) != __USER_DS &&
+ !(ka->sa.sa_flags & SA_RESTORER) &&
+ ka->sa.sa_restorer) {
+ /* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer;
-#endif /* CONFIG_X86_32 */
}
}
if (used_math()) {
- sp -= sig_xstate_size;
-#ifdef CONFIG_X86_64
- sp = round_down(sp, 64);
-#endif /* CONFIG_X86_64 */
+ sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ &buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (onsigstack && !likely(on_sig_stack(sp)))
return (void __user *)-1L;
- /* save i387 state */
- if (used_math() && save_i387_xstate(*fpstate) < 0)
+ /* save i387 and extended state */
+ if (used_math() &&
+ save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
return (void __user *)sp;
@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
#endif /* CONFIG_X86_32 */
+static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
+ siginfo_t *info, compat_sigset_t *set,
+ struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_X32_ABI
+ struct rt_sigframe_x32 __user *frame;
+ void __user *restorer;
+ int err = 0;
+ void __user *fpstate = NULL;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ return -EFAULT;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user32(&frame->info, info))
+ return -EFAULT;
+ }
+
+ put_user_try {
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(0, &frame->uc.uc_link);
+ put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ put_user_ex(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ put_user_ex(0, &frame->uc.uc__pad0);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ restorer = ka->sa.sa_restorer;
+ } else {
+ /* could use a vstub here */
+ restorer = NULL;
+ err |= -EFAULT;
+ }
+ put_user_ex(restorer, &frame->pretcode);
+ } put_user_catch(err);
+
+ if (err)
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
+
+ /* We use the x32 calling convention here... */
+ regs->di = sig;
+ regs->si = (unsigned long) &frame->info;
+ regs->dx = (unsigned long) &frame->uc;
+
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+
+ regs->cs = __USER_CS;
+ regs->ss = __USER_DS;
+#endif /* CONFIG_X86_X32_ABI */
+
+ return 0;
+}
+
#ifdef CONFIG_X86_32
/*
* Atomically swap in the new signal mask, and wait for a signal.
@@ -612,55 +678,22 @@ static int signr_convert(int sig)
return sig;
}
-#ifdef CONFIG_X86_32
-
-#define is_ia32 1
-#define ia32_setup_frame __setup_frame
-#define ia32_setup_rt_frame __setup_rt_frame
-
-#else /* !CONFIG_X86_32 */
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32 test_thread_flag(TIF_IA32)
-#else /* !CONFIG_IA32_EMULATION */
-#define is_ia32 0
-#endif /* CONFIG_IA32_EMULATION */
-
-#ifdef CONFIG_X86_X32_ABI
-#define is_x32 test_thread_flag(TIF_X32)
-
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
- siginfo_t *info, compat_sigset_t *set,
- struct pt_regs *regs);
-#else /* !CONFIG_X86_X32_ABI */
-#define is_x32 0
-#endif /* CONFIG_X86_X32_ABI */
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs *regs);
-
-#endif /* CONFIG_X86_32 */
-
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
sigset_t *set = sigmask_to_save();
+ compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
- if (is_ia32) {
+ if (is_ia32_frame()) {
if (ka->sa.sa_flags & SA_SIGINFO)
- return ia32_setup_rt_frame(usig, ka, info, set, regs);
+ return ia32_setup_rt_frame(usig, ka, info, cset, regs);
else
- return ia32_setup_frame(usig, ka, set, regs);
-#ifdef CONFIG_X86_X32_ABI
- } else if (is_x32) {
- return x32_setup_rt_frame(usig, ka, info,
- (compat_sigset_t *)set, regs);
-#endif
+ return ia32_setup_frame(usig, ka, cset, regs);
+ } else if (is_x32_frame()) {
+ return x32_setup_rt_frame(usig, ka, info, cset, regs);
} else {
return __setup_rt_frame(sig, ka, info, set, regs);
}
@@ -779,6 +812,8 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
+ rcu_user_exit();
+
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +839,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
+
+ rcu_user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -824,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
- siginfo_t *info, compat_sigset_t *set,
- struct pt_regs *regs)
-{
- struct rt_sigframe_x32 __user *frame;
- void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
-
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- return -EFAULT;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user32(&frame->info, info))
- return -EFAULT;
- }
-
- put_user_try {
- /* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- put_user_ex(0, &frame->uc.uc__pad0);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- if (ka->sa.sa_flags & SA_RESTORER) {
- restorer = ka->sa.sa_restorer;
- } else {
- /* could use a vstub here */
- restorer = NULL;
- err |= -EFAULT;
- }
- put_user_ex(restorer, &frame->pretcode);
- } put_user_catch(err);
-
- if (err)
- return -EFAULT;
-
- /* Set up registers for signal handler */
- regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ka->sa.sa_handler;
-
- /* We use the x32 calling convention here... */
- regs->di = sig;
- regs->si = (unsigned long) &frame->info;
- regs->dx = (unsigned long) &frame->uc;
-
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
-
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
-
- return 0;
-}
-
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c5a8c3..c80a33b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long boot_error = 0;
int timeout;
- alternatives_smp_switch(1);
+ /* Just in case we booted with a single CPU. */
+ alternatives_enable_smp();
idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
preempt_enable();
}
-void arch_disable_nonboot_cpus_begin(void)
-{
- /*
- * Avoid the smp alternatives switch during the disable_nonboot_cpus().
- * In the suspend path, we will be back in the SMP mode shortly anyways.
- */
- skip_smp_alternatives = true;
-}
-
-void arch_disable_nonboot_cpus_end(void)
-{
- skip_smp_alternatives = false;
-}
-
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
if (system_state == SYSTEM_RUNNING)
pr_info("CPU %u is now offline\n", cpu);
-
- if (1 == num_online_cpus())
- alternatives_smp_switch(0);
return;
}
msleep(100);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c346d11..cd3b243 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
return 1;
}
+void set_task_blockstep(struct task_struct *task, bool on)
+{
+ unsigned long debugctl;
+
+ /*
+ * Ensure irq/preemption can't change debugctl in between.
+ * Note also that both TIF_BLOCKSTEP and debugctl should
+ * be changed atomically wrt preemption.
+ * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
+ * wrong if task != current, SIGKILL can wakeup the stopped
+ * tracee and set/clear can play with the running task, this
+ * can confuse the next __switch_to_xtra().
+ */
+ local_irq_disable();
+ debugctl = get_debugctlmsr();
+ if (on) {
+ debugctl |= DEBUGCTLMSR_BTF;
+ set_tsk_thread_flag(task, TIF_BLOCKSTEP);
+ } else {
+ debugctl &= ~DEBUGCTLMSR_BTF;
+ clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
+ }
+ if (task == current)
+ update_debugctlmsr(debugctl);
+ local_irq_enable();
+}
+
/*
* Enable single or block step.
*/
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
* So no one should try to use debugger block stepping in a program
* that uses user-mode single stepping itself.
*/
- if (enable_single_step(child) && block) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl |= DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
- set_tsk_thread_flag(child, TIF_BLOCKSTEP);
- } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl &= ~DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
- clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
- }
+ if (enable_single_step(child) && block)
+ set_task_blockstep(child, true);
+ else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+ set_task_blockstep(child, false);
}
void user_enable_single_step(struct task_struct *child)
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
/*
* Make sure block stepping (BTF) is disabled.
*/
- if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl &= ~DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
- clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
- }
+ if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+ set_task_blockstep(child, false);
/* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341..8276dc6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
+#include <asm/rcu.h>
#include <asm/mach_traps.h>
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
dec_preempt_count();
}
-static void __kprobes
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
- long error_code, siginfo_t *info)
+static int __kprobes
+do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
+ struct pt_regs *regs, long error_code)
{
- struct task_struct *tsk = current;
-
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
/*
- * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
+ * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called.
*/
- if (trapnr < X86_TRAP_UD)
- goto vm86_trap;
- goto trap_signal;
+ if (trapnr < X86_TRAP_UD) {
+ if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, trapnr))
+ return 0;
+ }
+ return -1;
}
#endif
+ if (!user_mode(regs)) {
+ if (!fixup_exception(regs)) {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_nr = trapnr;
+ die(str, regs, error_code);
+ }
+ return 0;
+ }
- if (!user_mode(regs))
- goto kernel_trap;
+ return -1;
+}
-#ifdef CONFIG_X86_32
-trap_signal:
-#endif
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+ long error_code, siginfo_t *info)
+{
+ struct task_struct *tsk = current;
+
+
+ if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
+ return;
/*
* We want error_code and trap_nr set for userspace faults and
* kernelspace faults which result in die(), but not
@@ -158,33 +174,20 @@ trap_signal:
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
- return;
-
-kernel_trap:
- if (!fixup_exception(regs)) {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_nr = trapnr;
- die(str, regs, error_code);
- }
- return;
-
-#ifdef CONFIG_X86_32
-vm86_trap:
- if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
- error_code, trapnr))
- goto trap_signal;
- return;
-#endif
}
#define DO_ERROR(trapnr, signr, str, name) \
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ exception_enter(regs); \
+ if (notify_die(DIE_TRAP, str, regs, error_code, \
+ trapnr, signr) == NOTIFY_STOP) { \
+ exception_exit(regs); \
return; \
+ } \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
+ exception_exit(regs); \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void __user *)siaddr; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ exception_enter(regs); \
+ if (notify_die(DIE_TRAP, str, regs, error_code, \
+ trapnr, signr) == NOTIFY_STOP) { \
+ exception_exit(regs); \
return; \
+ } \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, &info); \
+ exception_exit(regs); \
}
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
/* Runs on IST stack */
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
{
+ exception_enter(regs);
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
- return;
- preempt_conditional_sti(regs);
- do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+ preempt_conditional_sti(regs);
+ do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+ preempt_conditional_cli(regs);
+ }
+ exception_exit(regs);
}
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
static const char str[] = "double fault";
struct task_struct *tsk = current;
+ exception_enter(regs);
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
+ exception_enter(regs);
conditional_sti(regs);
#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto gp_in_vm86;
+ if (regs->flags & X86_VM_MASK) {
+ local_irq_enable();
+ handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+ goto exit;
+ }
#endif
tsk = current;
- if (!user_mode(regs))
- goto gp_in_kernel;
+ if (!user_mode(regs)) {
+ if (fixup_exception(regs))
+ goto exit;
+
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_nr = X86_TRAP_GP;
+ if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
+ X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
+ die("general protection fault", regs, error_code);
+ goto exit;
+ }
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
force_sig(SIGSEGV, tsk);
- return;
-
-#ifdef CONFIG_X86_32
-gp_in_vm86:
- local_irq_enable();
- handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
- return;
-#endif
-
-gp_in_kernel:
- if (fixup_exception(regs))
- return;
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_nr = X86_TRAP_GP;
- if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
- X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
+exit:
+ exception_exit(regs);
}
/* May run on IST stack. */
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
ftrace_int3_handler(regs))
return;
#endif
+ exception_enter(regs);
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
- return;
+ goto exit;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
- return;
+ goto exit;
/*
* Let others (NMI) know that the debug stack is in use
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
+exit:
+ exception_exit(regs);
}
#ifdef CONFIG_X86_64
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
unsigned long dr6;
int si_code;
+ exception_enter(regs);
+
get_debugreg(dr6, 6);
/* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
- return;
+ goto exit;
/* DR6 may or may not be cleared by the CPU */
set_debugreg(0, 6);
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
- return;
+ goto exit;
/*
* Let others (NMI) know that the debug stack is in use
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
X86_TRAP_DB);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
- return;
+ goto exit;
}
/*
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
debug_stack_usage_dec();
- return;
+exit:
+ exception_exit(regs);
}
/*
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
ignore_fpu_irq = 1;
#endif
-
+ exception_enter(regs);
math_error(regs, error_code, X86_TRAP_MF);
+ exception_exit(regs);
}
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
+ exception_enter(regs);
math_error(regs, error_code, X86_TRAP_XF);
+ exception_exit(regs);
}
dotraplinkage void
@@ -613,11 +628,12 @@ void math_state_restore(void)
}
__thread_fpu_begin(tsk);
+
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
- __thread_fpu_end(tsk);
+ drop_init_fpu(tsk);
force_sig(SIGSEGV, tsk);
return;
}
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
+ exception_enter(regs);
+ BUG_ON(use_eager_fpu());
+
#ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
info.regs = regs;
math_emulate(&info);
+ exception_exit(regs);
return;
}
#endif
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
+ exception_exit(regs);
}
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
siginfo_t info;
+
+ exception_enter(regs);
local_irq_enable();
info.si_signo = SIGILL;
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
info.si_code = ILL_BADSTK;
info.si_addr = NULL;
if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
- X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
- return;
- do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
- &info);
+ X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
+ do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
+ &info);
+ }
+ exception_exit(regs);
}
#endif
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd420..9538f00 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,6 +41,9 @@
/* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL 0x2
+/* Instruction will modify TF, don't change it */
+#define UPROBE_FIX_SETF 0x4
+
#define UPROBE_FIX_RIP_AX 0x8000
#define UPROBE_FIX_RIP_CX 0x4000
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
insn_get_opcode(insn); /* should be a nop */
switch (OPCODE1(insn)) {
+ case 0x9d:
+ /* popf */
+ auprobe->fixups |= UPROBE_FIX_SETF;
+ break;
case 0xc3: /* ret/lret */
case 0xcb:
case 0xc2:
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* Skip these instructions as per the currently known x86 ISA.
* 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
*/
-bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
return false;
}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ bool ret = __skip_sstep(auprobe, regs);
+ if (ret && (regs->flags & X86_EFLAGS_TF))
+ send_sig(SIGTRAP, current, 0);
+ return ret;
+}
+
+void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
+{
+ struct task_struct *task = current;
+ struct arch_uprobe_task *autask = &task->utask->autask;
+ struct pt_regs *regs = task_pt_regs(task);
+
+ autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
+
+ regs->flags |= X86_EFLAGS_TF;
+ if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
+ set_task_blockstep(task, false);
+}
+
+void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
+{
+ struct task_struct *task = current;
+ struct arch_uprobe_task *autask = &task->utask->autask;
+ bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
+ struct pt_regs *regs = task_pt_regs(task);
+ /*
+ * The state of TIF_BLOCKSTEP was not saved so we can get an extra
+ * SIGTRAP if we do not clear TF. We need to examine the opcode to
+ * make it right.
+ */
+ if (unlikely(trapped)) {
+ if (!autask->saved_tf)
+ regs->flags &= ~X86_EFLAGS_TF;
+ } else {
+ if (autask->saved_tf)
+ send_sig(SIGTRAP, task, 0);
+ else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+ regs->flags &= ~X86_EFLAGS_TF;
+ }
+}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f..1330dd1 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
#include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
EXPORT_SYMBOL(mcount);
#endif
+#endif
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9f3167e..7a3d075 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
void __cpuinit x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_noop(pgd_t *unused) { }
int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { }
@@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = {
},
.paging = {
- .pagetable_setup_start = native_pagetable_setup_start,
- .pagetable_setup_done = native_pagetable_setup_done,
+ .pagetable_init = native_pagetable_init,
},
.timers = {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3d3e207..4e89b3d 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -10,9 +10,7 @@
#include <linux/compat.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
-#ifdef CONFIG_IA32_EMULATION
-#include <asm/sigcontext32.h>
-#endif
+#include <asm/sigframe.h>
#include <asm/xcr.h>
/*
@@ -23,13 +21,9 @@ u64 pcntxt_mask;
/*
* Represents init state for the supported extended state.
*/
-static struct xsave_struct *init_xstate_buf;
-
-struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-struct _fpx_sw_bytes fx_sw_reserved_ia32;
-#endif
+struct xsave_struct *init_xstate_buf;
+static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
/*
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
*/
void __sanitize_i387_state(struct task_struct *tsk)
{
- u64 xstate_bv;
- int feature_bit = 0x2;
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
+ int feature_bit = 0x2;
+ u64 xstate_bv;
if (!fx)
return;
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
*/
-int check_for_xstate(struct i387_fxsave_struct __user *buf,
- void __user *fpstate,
- struct _fpx_sw_bytes *fx_sw_user)
+static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
+ void __user *fpstate,
+ struct _fpx_sw_bytes *fx_sw)
{
int min_xstate_size = sizeof(struct i387_fxsave_struct) +
sizeof(struct xsave_hdr_struct);
unsigned int magic2;
- int err;
- err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
- sizeof(struct _fpx_sw_bytes));
- if (err)
- return -EFAULT;
+ if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
+ return -1;
- /*
- * First Magic check failed.
- */
- if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
- return -EINVAL;
+ /* Check for the first magic field and other error scenarios. */
+ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
+ fx_sw->xstate_size < min_xstate_size ||
+ fx_sw->xstate_size > xstate_size ||
+ fx_sw->xstate_size > fx_sw->extended_size)
+ return -1;
/*
- * Check for error scenarios.
- */
- if (fx_sw_user->xstate_size < min_xstate_size ||
- fx_sw_user->xstate_size > xstate_size ||
- fx_sw_user->xstate_size > fx_sw_user->extended_size)
- return -EINVAL;
-
- err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
- fx_sw_user->extended_size -
- FP_XSTATE_MAGIC2_SIZE));
- if (err)
- return err;
- /*
* Check for the presence of second magic word at the end of memory
* layout. This detects the case where the user just copied the legacy
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
- if (magic2 != FP_XSTATE_MAGIC2)
- return -EFAULT;
+ if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
+ || magic2 != FP_XSTATE_MAGIC2)
+ return -1;
return 0;
}
-#ifdef CONFIG_X86_64
/*
* Signal frame handlers.
*/
-
-int save_i387_xstate(void __user *buf)
+static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
{
- struct task_struct *tsk = current;
- int err = 0;
-
- if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
- return -EACCES;
+ if (use_fxsr()) {
+ struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct user_i387_ia32_struct env;
+ struct _fpstate_ia32 __user *fp = buf;
- BUG_ON(sig_xstate_size < xstate_size);
+ convert_from_fxsr(&env, tsk);
- if ((unsigned long)buf % 64)
- pr_err("%s: bad fpstate %p\n", __func__, buf);
-
- if (!used_math())
- return 0;
-
- if (user_has_fpu()) {
- if (use_xsave())
- err = xsave_user(buf);
- else
- err = fxsave_user(buf);
-
- if (err)
- return err;
- user_fpu_end();
+ if (__copy_to_user(buf, &env, sizeof(env)) ||
+ __put_user(xsave->i387.swd, &fp->status) ||
+ __put_user(X86_FXSR_MAGIC, &fp->magic))
+ return -1;
} else {
- sanitize_i387_state(tsk);
- if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
- xstate_size))
+ struct i387_fsave_struct __user *fp = buf;
+ u32 swd;
+ if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
return -1;
}
- clear_used_math(); /* trigger finit */
+ return 0;
+}
- if (use_xsave()) {
- struct _fpstate __user *fx = buf;
- struct _xstate __user *x = buf;
- u64 xstate_bv;
+static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
+{
+ struct xsave_struct __user *x = buf;
+ struct _fpx_sw_bytes *sw_bytes;
+ u32 xstate_bv;
+ int err;
- err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
- sizeof(struct _fpx_sw_bytes));
+ /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
+ sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
+ err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
- err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 __user *) (buf + sig_xstate_size
- - FP_XSTATE_MAGIC2_SIZE));
+ if (!use_xsave())
+ return err;
- /*
- * Read the xstate_bv which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers and
- * set the FP/SSE bits.
- */
- err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+ err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
- /*
- * For legacy compatible, we always set FP/SSE bits in the bit
- * vector while saving the state to the user context. This will
- * enable us capturing any changes(during sigreturn) to
- * the FP/SSE bits by the legacy applications which don't touch
- * xstate_bv in the xsave header.
- *
- * xsave aware apps can change the xstate_bv in the xsave
- * header as well as change any contents in the memory layout.
- * xrestore as part of sigreturn will capture all the changes.
- */
- xstate_bv |= XSTATE_FPSSE;
+ /*
+ * Read the xstate_bv which we copied (directly from the cpu or
+ * from the state in task struct) to the user buffers.
+ */
+ err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
- err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+ /*
+ * For legacy compatible, we always set FP/SSE bits in the bit
+ * vector while saving the state to the user context. This will
+ * enable us capturing any changes(during sigreturn) to
+ * the FP/SSE bits by the legacy applications which don't touch
+ * xstate_bv in the xsave header.
+ *
+ * xsave aware apps can change the xstate_bv in the xsave
+ * header as well as change any contents in the memory layout.
+ * xrestore as part of sigreturn will capture all the changes.
+ */
+ xstate_bv |= XSTATE_FPSSE;
- if (err)
- return err;
- }
+ err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
- return 1;
+ return err;
+}
+
+static inline int save_user_xstate(struct xsave_struct __user *buf)
+{
+ int err;
+
+ if (use_xsave())
+ err = xsave_user(buf);
+ else if (use_fxsr())
+ err = fxsave_user((struct i387_fxsave_struct __user *) buf);
+ else
+ err = fsave_user((struct i387_fsave_struct __user *) buf);
+
+ if (unlikely(err) && __clear_user(buf, xstate_size))
+ err = -EFAULT;
+ return err;
}
/*
- * Restore the extended state if present. Otherwise, restore the FP/SSE
- * state.
+ * Save the fpu, extended register state to the user signal frame.
+ *
+ * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
+ * state is copied.
+ * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
+ *
+ * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
+ * buf != buf_fx for 32-bit frames with fxstate.
+ *
+ * If the fpu, extended register state is live, save the state directly
+ * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
+ * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ *
+ * If this is a 32-bit frame with fxstate, put a fsave header before
+ * the aligned state at 'buf_fx'.
+ *
+ * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
+ * indicating the absence/presence of the extended state to the user.
*/
-static int restore_user_xstate(void __user *buf)
+int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
{
- struct _fpx_sw_bytes fx_sw_user;
- u64 mask;
- int err;
+ struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
+ struct task_struct *tsk = current;
+ int ia32_fxstate = (buf != buf_fx);
- if (((unsigned long)buf % 64) ||
- check_for_xstate(buf, buf, &fx_sw_user))
- goto fx_only;
+ ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+ config_enabled(CONFIG_IA32_EMULATION));
- mask = fx_sw_user.xstate_bv;
+ if (!access_ok(VERIFY_WRITE, buf, size))
+ return -EACCES;
- /*
- * restore the state passed by the user.
- */
- err = xrestore_user(buf, mask);
- if (err)
- return err;
+ if (!HAVE_HWFP)
+ return fpregs_soft_get(current, NULL, 0,
+ sizeof(struct user_i387_ia32_struct), NULL,
+ (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
- /*
- * init the state skipped by the user.
- */
- mask = pcntxt_mask & ~mask;
- if (unlikely(mask))
- xrstor_state(init_xstate_buf, mask);
+ if (user_has_fpu()) {
+ /* Save the live register state to the user directly. */
+ if (save_user_xstate(buf_fx))
+ return -1;
+ /* Update the thread's fxstate to save the fsave header. */
+ if (ia32_fxstate)
+ fpu_fxsave(&tsk->thread.fpu);
+ } else {
+ sanitize_i387_state(tsk);
+ if (__copy_to_user(buf_fx, xsave, xstate_size))
+ return -1;
+ }
+
+ /* Save the fsave header for the 32-bit frames. */
+ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
+ return -1;
+
+ if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
+ return -1;
+
+ drop_init_fpu(tsk); /* trigger finit */
return 0;
+}
-fx_only:
- /*
- * couldn't find the extended state information in the
- * memory layout. Restore just the FP/SSE and init all
- * the other extended state.
- */
- xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
- return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+static inline void
+sanitize_restored_xstate(struct task_struct *tsk,
+ struct user_i387_ia32_struct *ia32_env,
+ u64 xstate_bv, int fx_only)
+{
+ struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
+
+ if (use_xsave()) {
+ /* These bits must be zero. */
+ xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+ /*
+ * Init the state that is not present in the memory
+ * layout and not enabled by the OS.
+ */
+ if (fx_only)
+ xsave_hdr->xstate_bv = XSTATE_FPSSE;
+ else
+ xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
+ }
+
+ if (use_fxsr()) {
+ /*
+ * mscsr reserved bits must be masked to zero for security
+ * reasons.
+ */
+ xsave->i387.mxcsr &= mxcsr_feature_mask;
+
+ convert_to_fxsr(tsk, ia32_env);
+ }
}
/*
- * This restores directly out of user space. Exceptions are handled.
+ * Restore the extended state if present. Otherwise, restore the FP/SSE state.
*/
-int restore_i387_xstate(void __user *buf)
+static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
{
+ if (use_xsave()) {
+ if ((unsigned long)buf % 64 || fx_only) {
+ u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
+ xrstor_state(init_xstate_buf, init_bv);
+ return fxrstor_checking((__force void *) buf);
+ } else {
+ u64 init_bv = pcntxt_mask & ~xbv;
+ if (unlikely(init_bv))
+ xrstor_state(init_xstate_buf, init_bv);
+ return xrestore_user(buf, xbv);
+ }
+ } else if (use_fxsr()) {
+ return fxrstor_checking((__force void *) buf);
+ } else
+ return frstor_checking((__force void *) buf);
+}
+
+int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
+{
+ int ia32_fxstate = (buf != buf_fx);
struct task_struct *tsk = current;
- int err = 0;
+ int state_size = xstate_size;
+ u64 xstate_bv = 0;
+ int fx_only = 0;
+
+ ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+ config_enabled(CONFIG_IA32_EMULATION));
if (!buf) {
- if (used_math())
- goto clear;
+ drop_init_fpu(tsk);
return 0;
- } else
- if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
- return -EACCES;
+ }
- if (!used_math()) {
- err = init_fpu(tsk);
- if (err)
- return err;
+ if (!access_ok(VERIFY_READ, buf, size))
+ return -EACCES;
+
+ if (!used_math() && init_fpu(tsk))
+ return -1;
+
+ if (!HAVE_HWFP) {
+ return fpregs_soft_set(current, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ NULL, buf) != 0;
}
- user_fpu_begin();
- if (use_xsave())
- err = restore_user_xstate(buf);
- else
- err = fxrstor_checking((__force struct i387_fxsave_struct *)
- buf);
- if (unlikely(err)) {
+ if (use_xsave()) {
+ struct _fpx_sw_bytes fx_sw_user;
+ if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
+ /*
+ * Couldn't find the extended state information in the
+ * memory layout. Restore just the FP/SSE and init all
+ * the other extended state.
+ */
+ state_size = sizeof(struct i387_fxsave_struct);
+ fx_only = 1;
+ } else {
+ state_size = fx_sw_user.xstate_size;
+ xstate_bv = fx_sw_user.xstate_bv;
+ }
+ }
+
+ if (ia32_fxstate) {
+ /*
+ * For 32-bit frames with fxstate, copy the user state to the
+ * thread's fpu state, reconstruct fxstate from the fsave
+ * header. Sanitize the copied state etc.
+ */
+ struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct user_i387_ia32_struct env;
+ int err = 0;
+
+ /*
+ * Drop the current fpu which clears used_math(). This ensures
+ * that any context-switch during the copy of the new state,
+ * avoids the intermediate state from getting restored/saved.
+ * Thus avoiding the new restored state from getting corrupted.
+ * We will be ready to restore/save the state only after
+ * set_used_math() is again set.
+ */
+ drop_fpu(tsk);
+
+ if (__copy_from_user(xsave, buf_fx, state_size) ||
+ __copy_from_user(&env, buf, sizeof(env))) {
+ err = -1;
+ } else {
+ sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
+ set_used_math();
+ }
+
+ if (use_eager_fpu())
+ math_state_restore();
+
+ return err;
+ } else {
/*
- * Encountered an error while doing the restore from the
- * user buffer, clear the fpu state.
+ * For 64-bit frames and 32-bit fsave frames, restore the user
+ * state to the registers directly (with exceptions handled).
*/
-clear:
- clear_fpu(tsk);
- clear_used_math();
+ user_fpu_begin();
+ if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
+ drop_init_fpu(tsk);
+ return -1;
+ }
}
- return err;
+
+ return 0;
}
-#endif
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
@@ -321,31 +428,22 @@ clear:
*/
static void prepare_fx_sw_frame(void)
{
- int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
- FP_XSTATE_MAGIC2_SIZE;
+ int fsave_header_size = sizeof(struct i387_fsave_struct);
+ int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
- sig_xstate_size = sizeof(struct _fpstate) + size_extended;
-
-#ifdef CONFIG_IA32_EMULATION
- sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
-#endif
-
- memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+ if (config_enabled(CONFIG_X86_32))
+ size += fsave_header_size;
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
- fx_sw_reserved.extended_size = sig_xstate_size;
+ fx_sw_reserved.extended_size = size;
fx_sw_reserved.xstate_bv = pcntxt_mask;
fx_sw_reserved.xstate_size = xstate_size;
-#ifdef CONFIG_IA32_EMULATION
- memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
- sizeof(struct _fpx_sw_bytes));
- fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
-#endif
-}
-#ifdef CONFIG_X86_64
-unsigned int sig_xstate_size = sizeof(struct _fpstate);
-#endif
+ if (config_enabled(CONFIG_IA32_EMULATION)) {
+ fx_sw_reserved_ia32 = fx_sw_reserved;
+ fx_sw_reserved_ia32.extended_size += fsave_header_size;
+ }
+}
/*
* Enable the extended processor state save/restore feature
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
/*
* setup the xstate image representing the init state
*/
-static void __init setup_xstate_init(void)
+static void __init setup_init_fpu_buf(void)
{
- setup_xstate_features();
-
/*
* Setup init_xstate_buf to represent the init state of
* all the features managed by the xsave
*/
init_xstate_buf = alloc_bootmem_align(xstate_size,
__alignof__(struct xsave_struct));
- init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+ fx_finit(&init_xstate_buf->i387);
+
+ if (!cpu_has_xsave)
+ return;
+
+ setup_xstate_features();
- clts();
/*
* Init all the features state with header_bv being 0x0
*/
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
* of any feature which is not represented by all zero's.
*/
xsave_state(init_xstate_buf, -1);
- stts();
}
+static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static int __init eager_fpu_setup(char *s)
+{
+ if (!strcmp(s, "on"))
+ eagerfpu = ENABLE;
+ else if (!strcmp(s, "off"))
+ eagerfpu = DISABLE;
+ else if (!strcmp(s, "auto"))
+ eagerfpu = AUTO;
+ return 1;
+}
+__setup("eagerfpu=", eager_fpu_setup);
+
/*
* Enable and initialize the xsave feature.
*/
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame();
+ setup_init_fpu_buf();
- setup_xstate_init();
+ /* Auto enable eagerfpu for xsaveopt */
+ if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+ eagerfpu = ENABLE;
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
pcntxt_mask, xstate_size);
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
next_func = xstate_enable;
this_func();
}
+
+static inline void __init eager_fpu_init_bp(void)
+{
+ current->thread.fpu.state =
+ alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
+ if (!init_xstate_buf)
+ setup_init_fpu_buf();
+}
+
+void __cpuinit eager_fpu_init(void)
+{
+ static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
+
+ clear_used_math();
+ current_thread_info()->status = 0;
+
+ if (eagerfpu == ENABLE)
+ setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+
+ if (!cpu_has_eager_fpu) {
+ stts();
+ return;
+ }
+
+ if (boot_func) {
+ boot_func();
+ boot_func = NULL;
+ }
+
+ /*
+ * This is same as math_state_restore(). But use_xsave() is
+ * not yet patched to use math_state_restore().
+ */
+ init_fpu(current);
+ __thread_fpu_begin(current);
+ if (cpu_has_xsave)
+ xrstor_state(init_xstate_buf, -1);
+ else
+ fxrstor_checking(&init_xstate_buf->i387);
+}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e498b18..9fc9aa7 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -318,7 +318,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
if (val & 0x10) {
u8 edge_irr = s->irr & ~s->elcr;
int i;
- bool found;
+ bool found = false;
struct kvm_vcpu *vcpu;
s->init4 = val & 1;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a71faf7..bca63f0 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic,
#define KVM_ISA_VMX 1
#define KVM_ISA_SVM 2
-#define VMX_EXIT_REASONS \
- { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
- { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
- { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
- { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
- { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
- { EXIT_REASON_CPUID, "CPUID" }, \
- { EXIT_REASON_HLT, "HLT" }, \
- { EXIT_REASON_INVLPG, "INVLPG" }, \
- { EXIT_REASON_RDPMC, "RDPMC" }, \
- { EXIT_REASON_RDTSC, "RDTSC" }, \
- { EXIT_REASON_VMCALL, "VMCALL" }, \
- { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
- { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
- { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
- { EXIT_REASON_VMPTRST, "VMPTRST" }, \
- { EXIT_REASON_VMREAD, "VMREAD" }, \
- { EXIT_REASON_VMRESUME, "VMRESUME" }, \
- { EXIT_REASON_VMWRITE, "VMWRITE" }, \
- { EXIT_REASON_VMOFF, "VMOFF" }, \
- { EXIT_REASON_VMON, "VMON" }, \
- { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
- { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
- { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
- { EXIT_REASON_MSR_READ, "MSR_READ" }, \
- { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
- { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
- { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
- { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
- { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
- { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
- { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
- { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
- { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
- { EXIT_REASON_WBINVD, "WBINVD" }
-
-#define SVM_EXIT_REASONS \
- { SVM_EXIT_READ_CR0, "read_cr0" }, \
- { SVM_EXIT_READ_CR3, "read_cr3" }, \
- { SVM_EXIT_READ_CR4, "read_cr4" }, \
- { SVM_EXIT_READ_CR8, "read_cr8" }, \
- { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
- { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
- { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
- { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
- { SVM_EXIT_READ_DR0, "read_dr0" }, \
- { SVM_EXIT_READ_DR1, "read_dr1" }, \
- { SVM_EXIT_READ_DR2, "read_dr2" }, \
- { SVM_EXIT_READ_DR3, "read_dr3" }, \
- { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
- { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
- { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
- { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
- { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
- { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
- { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
- { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
- { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
- { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
- { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
- { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
- { SVM_EXIT_INTR, "interrupt" }, \
- { SVM_EXIT_NMI, "nmi" }, \
- { SVM_EXIT_SMI, "smi" }, \
- { SVM_EXIT_INIT, "init" }, \
- { SVM_EXIT_VINTR, "vintr" }, \
- { SVM_EXIT_CPUID, "cpuid" }, \
- { SVM_EXIT_INVD, "invd" }, \
- { SVM_EXIT_HLT, "hlt" }, \
- { SVM_EXIT_INVLPG, "invlpg" }, \
- { SVM_EXIT_INVLPGA, "invlpga" }, \
- { SVM_EXIT_IOIO, "io" }, \
- { SVM_EXIT_MSR, "msr" }, \
- { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
- { SVM_EXIT_SHUTDOWN, "shutdown" }, \
- { SVM_EXIT_VMRUN, "vmrun" }, \
- { SVM_EXIT_VMMCALL, "hypercall" }, \
- { SVM_EXIT_VMLOAD, "vmload" }, \
- { SVM_EXIT_VMSAVE, "vmsave" }, \
- { SVM_EXIT_STGI, "stgi" }, \
- { SVM_EXIT_CLGI, "clgi" }, \
- { SVM_EXIT_SKINIT, "skinit" }, \
- { SVM_EXIT_WBINVD, "wbinvd" }, \
- { SVM_EXIT_MONITOR, "monitor" }, \
- { SVM_EXIT_MWAIT, "mwait" }, \
- { SVM_EXIT_XSETBV, "xsetbv" }, \
- { SVM_EXIT_NPF, "npf" }
-
/*
* Tracepoint for kvm guest exit:
*/
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03d..851aa7c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (user_has_fpu())
- clts();
+ /*
+ * If the FPU is not active (through the host task or
+ * the guest vcpu), then restore the cr0.TS bit.
+ */
+ if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
+ stts();
load_gdt(&__get_cpu_var(host_gdt));
}
@@ -3619,6 +3623,7 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
+ struct page *page;
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
@@ -3633,7 +3638,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
if (r)
goto out;
- kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+ page = gfn_to_page(kvm, 0xfee00);
+ if (is_error_page(page)) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ kvm->arch.apic_access_page = page;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -3641,6 +3652,7 @@ out:
static int alloc_identity_pagetable(struct kvm *kvm)
{
+ struct page *page;
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
@@ -3656,8 +3668,13 @@ static int alloc_identity_pagetable(struct kvm *kvm)
if (r)
goto out;
- kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
- kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+ page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+ if (is_error_page(page)) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ kvm->arch.ept_identity_pagetable = page;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -3730,7 +3747,7 @@ static void vmx_set_constant_host_state(void)
unsigned long tmpl;
struct desc_ptr dt;
- vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */
+ vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
@@ -4530,7 +4547,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
- };
+ }
break;
case 2: /* clts */
handle_clts(vcpu);
@@ -6575,7 +6592,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
/* Exposing INVPCID only when PCID is exposed */
best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
if (vmx_invpcid_supported() &&
- best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
+ best && (best->ebx & bit(X86_FEATURE_INVPCID)) &&
guest_cpuid_has_pcid(vcpu)) {
exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
@@ -6585,7 +6602,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
exec_control);
if (best)
- best->ecx &= ~bit(X86_FEATURE_INVPCID);
+ best->ebx &= ~bit(X86_FEATURE_INVPCID);
}
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 148ed66..1f09552 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5113,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
!kvm_event_needs_reinjection(vcpu);
}
-static void vapic_enter(struct kvm_vcpu *vcpu)
+static int vapic_enter(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct page *page;
if (!apic || !apic->vapic_addr)
- return;
+ return 0;
page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+ if (is_error_page(page))
+ return -EFAULT;
vcpu->arch.apic->vapic_page = page;
+ return 0;
}
static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -5430,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- vapic_enter(vcpu);
+ r = vapic_enter(vcpu);
+ if (r) {
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ return r;
+ }
r = 1;
while (r > 0) {
@@ -5972,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
- unlazy_fpu(current);
+ __kernel_fpu_begin();
fpu_restore_checking(&vcpu->arch.guest_fpu);
trace_kvm_fpu(1);
}
@@ -5986,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
vcpu->guest_fpu_loaded = 0;
fpu_save_init(&vcpu->arch.guest_fpu);
+ __kernel_fpu_end();
++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d..7dde46d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
+#include <asm/rcu.h> /* exception_enter(), ... */
/*
* Page fault error code bits:
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
-dotraplinkage void __kprobes
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void __kprobes
+__do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
struct vm_area_struct *vma;
struct task_struct *tsk;
@@ -1209,3 +1210,11 @@ good_area:
up_read(&mm->mmap_sem);
}
+
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ exception_enter(regs);
+ __do_page_fault(regs, error_code);
+ exception_exit(regs);
+}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e0e6990..ab1f6a9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
*/
int devmem_is_allowed(unsigned long pagenr)
{
- if (pagenr <= 256)
+ if (pagenr < 256)
return 1;
if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
return 0;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f..4f04db1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
}
#endif /* CONFIG_HIGHMEM */
-void __init native_pagetable_setup_start(pgd_t *base)
+void __init native_pagetable_init(void)
{
unsigned long pfn, va;
- pgd_t *pgd;
+ pgd_t *pgd, *base = swapper_pg_dir;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -475,10 +475,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
pte_clear(NULL, va, pte);
}
paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
-}
-
-void __init native_pagetable_setup_done(pgd_t *base)
-{
+ paging_init();
}
/*
@@ -493,7 +490,7 @@ void __init native_pagetable_setup_done(pgd_t *base)
* If we're booting paravirtualized under a hypervisor, then there are
* more options: we may already be running PAE, and the pagetable may
* or may not be based in swapper_pg_dir. In any case,
- * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * paravirt_pagetable_init() will set up swapper_pg_dir
* appropriately for the rest of the initialization to work.
*
* In general, pagetable_init() assumes that the pagetable may already
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83..0777f04 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -98,6 +98,8 @@ static void flush_tlb_func(void *info)
{
struct flush_tlb_info *f = info;
+ inc_irq_stat(irq_tlb_count);
+
if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
return;
@@ -320,7 +322,7 @@ static ssize_t tlbflush_write_file(struct file *file,
if (kstrtos8(buf, 0, &shift))
return -EINVAL;
- if (shift > 64)
+ if (shift < -1 || shift >= BITS_PER_LONG)
return -EINVAL;
tlb_flushall_shift = shift;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 937bcec..704b9ec 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
while (i >= sizeof(struct acpi_mcfg_allocation)) {
entries++;
i -= sizeof(struct acpi_mcfg_allocation);
- };
+ }
if (entries == 0) {
pr_err(PREFIX "MMCONFIG has no entries\n");
return -ENODEV;
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 73b8be0..6db1cc4 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1 +1,2 @@
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
+obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
new file mode 100644
index 0000000..f6a0c1b
--- /dev/null
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2012 Intel Corporation
+ * Author: Josh Triplett <josh@joshtriplett.org>
+ *
+ * Based on the bgrt driver:
+ * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
+ * Author: Matthew Garrett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/efi-bgrt.h>
+
+struct acpi_table_bgrt *bgrt_tab;
+void *bgrt_image;
+size_t bgrt_image_size;
+
+struct bmp_header {
+ u16 id;
+ u32 size;
+} __packed;
+
+void efi_bgrt_init(void)
+{
+ acpi_status status;
+ void __iomem *image;
+ bool ioremapped = false;
+ struct bmp_header bmp_header;
+
+ if (acpi_disabled)
+ return;
+
+ status = acpi_get_table("BGRT", 0,
+ (struct acpi_table_header **)&bgrt_tab);
+ if (ACPI_FAILURE(status))
+ return;
+
+ if (bgrt_tab->version != 1)
+ return;
+ if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
+ return;
+
+ image = efi_lookup_mapped_addr(bgrt_tab->image_address);
+ if (!image) {
+ image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
+ ioremapped = true;
+ if (!image)
+ return;
+ }
+
+ memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
+ if (ioremapped)
+ iounmap(image);
+ bgrt_image_size = bmp_header.size;
+
+ bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
+ if (!bgrt_image)
+ return;
+
+ if (ioremapped) {
+ image = ioremap(bgrt_tab->image_address, bmp_header.size);
+ if (!image) {
+ kfree(bgrt_image);
+ bgrt_image = NULL;
+ return;
+ }
+ }
+
+ memcpy_fromio(bgrt_image, image, bgrt_image_size);
+ if (ioremapped)
+ iounmap(image);
+}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660eda..aded2a9 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
+#include <linux/efi-bgrt.h>
#include <linux/export.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
@@ -419,10 +420,21 @@ void __init efi_reserve_boot_services(void)
}
}
-static void __init efi_free_boot_services(void)
+static void __init efi_unmap_memmap(void)
+{
+ if (memmap.map) {
+ early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+ memmap.map = NULL;
+ }
+}
+
+void __init efi_free_boot_services(void)
{
void *p;
+ if (!efi_native)
+ return;
+
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
efi_memory_desc_t *md = p;
unsigned long long start = md->phys_addr;
@@ -438,6 +450,8 @@ static void __init efi_free_boot_services(void)
free_bootmem_late(start, size);
}
+
+ efi_unmap_memmap();
}
static int __init efi_systab_init(void *phys)
@@ -732,6 +746,11 @@ void __init efi_init(void)
#endif
}
+void __init efi_late_init(void)
+{
+ efi_bgrt_init();
+}
+
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
u64 addr, npages;
@@ -764,6 +783,34 @@ static void __init runtime_code_page_mkexec(void)
}
/*
+ * We can't ioremap data in EFI boot services RAM, because we've already mapped
+ * it as RAM. So, look it up in the existing EFI memory map instead. Only
+ * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+ */
+void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+{
+ void *p;
+ if (WARN_ON(!memmap.map))
+ return NULL;
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ efi_memory_desc_t *md = p;
+ u64 size = md->num_pages << EFI_PAGE_SHIFT;
+ u64 end = md->phys_addr + size;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+ md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+ continue;
+ if (!md->virt_addr)
+ continue;
+ if (phys_addr >= md->phys_addr && phys_addr < end) {
+ phys_addr += md->virt_addr - md->phys_addr;
+ return (__force void __iomem *)(unsigned long)phys_addr;
+ }
+ }
+ return NULL;
+}
+
+/*
* This function will switch the EFI runtime services to virtual mode.
* Essentially, look through the EFI memmap and map every region that
* has the runtime attribute bit set in its memory descriptor and update
@@ -787,8 +834,10 @@ void __init efi_enter_virtual_mode(void)
* non-native EFI
*/
- if (!efi_native)
- goto out;
+ if (!efi_native) {
+ efi_unmap_memmap();
+ return;
+ }
/* Merge contiguous regions of the same type and attribute */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -878,18 +927,12 @@ void __init efi_enter_virtual_mode(void)
}
/*
- * Thankfully, it does seem that no runtime services other than
- * SetVirtualAddressMap() will touch boot services code, so we can
- * get rid of it all at this point
- */
- efi_free_boot_services();
-
- /*
* Now that EFI is in virtual mode, update the function
* pointers in the runtime service table to the new virtual addresses.
*
* Call EFI services through wrapper functions.
*/
+ efi.runtime_version = efi_systab.fw_revision;
efi.get_time = virt_efi_get_time;
efi.set_time = virt_efi_set_time;
efi.get_wakeup_time = virt_efi_get_wakeup_time;
@@ -906,9 +949,6 @@ void __init efi_enter_virtual_mode(void)
if (__supported_pte_mask & _PAGE_NX)
runtime_code_page_mkexec();
-out:
- early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
- memmap.map = NULL;
kfree(new_memmap);
}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11..aeaff8b 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -21,6 +21,7 @@ config 64BIT
config X86_32
def_bool !64BIT
select HAVE_AOUT
+ select ARCH_WANT_IPC_PARSE_VERSION
config X86_64
def_bool 64BIT
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 5868526..46a9df9 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -7,9 +7,6 @@
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-#define STR(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
-
#define BLANK() asm volatile("\n->" : : )
#define OFFSET(sym, str, mem) \
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index bd9a89b..ca255a8 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,3 +1,5 @@
+extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid);
#ifdef __i386__
#include "syscalls_32.h"
#else
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index a508cea1..ba7363e 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
PT_REGS_AX(regs) = (unsigned long) sig;
PT_REGS_DX(regs) = (unsigned long) 0;
PT_REGS_CX(regs) = (unsigned long) 0;
-
- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
- ptrace_notify(SIGTRAP);
return 0;
}
@@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
PT_REGS_AX(regs) = (unsigned long) sig;
PT_REGS_DX(regs) = (unsigned long) &frame->info;
PT_REGS_CX(regs) = (unsigned long) &frame->uc;
-
- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
- ptrace_notify(SIGTRAP);
return 0;
}
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 68d1dc9..b5408ce 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -28,7 +28,7 @@
#define ptregs_execve sys_execve
#define ptregs_iopl sys_iopl
#define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
+#define ptregs_clone i386_clone
#define ptregs_vm86 sys_vm86
#define ptregs_sigaltstack sys_sigaltstack
#define ptregs_vfork sys_vfork
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index b853e86..db444c7 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -3,37 +3,24 @@
* Licensed under the GPL
*/
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/syscalls.h>
+#include <sysdep/syscalls.h>
/*
* The prototype on i386 is:
*
- * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
*
* and the "newtls" arg. on i386 is read by copy_thread directly from the
* register saved on the stack.
*/
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
- int __user *parent_tid, void *newtls, int __user *child_tid)
+long i386_clone(unsigned long clone_flags, unsigned long newsp,
+ int __user *parent_tid, void *newtls, int __user *child_tid)
{
- long ret;
-
- if (!newsp)
- newsp = UPT_SP(&current->thread.regs.regs);
-
- current->thread.forking = 1;
- ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
- child_tid);
- current->thread.forking = 0;
- return ret;
+ return sys_clone(clone_flags, newsp, parent_tid, child_tid);
}
+
long sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index f3d82bb..adb08eb 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -5,12 +5,9 @@
* Licensed under the GPL
*/
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <os.h>
long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
{
@@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr)
return arch_prctl(current, code, (unsigned long __user *) addr);
}
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid)
-{
- long ret;
-
- if (!newsp)
- newsp = UPT_SP(&current->thread.regs.regs);
- current->thread.forking = 1;
- ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
- child_tid);
- current->thread.forking = 0;
- return ret;
-}
-
void arch_switch_to(struct task_struct *to)
{
if ((to->thread.arch.fs == 0) || (to->mm == NULL))
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9642d4a..1fbe75a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void)
pci_request_acs();
xen_acpi_sleep_register();
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = x86_init_uint_noop;
}
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a761..7a769b7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,8 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
-static void __init xen_pagetable_setup_start(pgd_t *base)
+static void xen_post_allocator_init(void);
+
+static void __init xen_pagetable_init(void)
{
+ paging_init();
+ xen_setup_shared_info();
+ xen_post_allocator_init();
}
static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
@@ -1192,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
}
}
-static void xen_post_allocator_init(void);
-
-static void __init xen_pagetable_setup_done(pgd_t *base)
-{
- xen_setup_shared_info();
- xen_post_allocator_init();
-}
-
static void xen_write_cr2(unsigned long cr2)
{
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1283,7 +1280,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+ if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
args->op.cmd = MMUEXT_INVLPG_MULTI;
args->op.arg1.linear_addr = start;
}
@@ -2068,8 +2065,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
- x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
- x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
+ x86_init.paging.pagetable_init = xen_pagetable_init;
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index d4b2554..72213da 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -599,7 +599,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
if (p2m_index(set_pfn))
return false;
- for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
topidx = p2m_top_index(pfn);
if (!p2m_top[topidx])
@@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
- /* let's use dev_bus_addr to record the old mfn instead */
- kmap_op->dev_bus_addr = page->index;
- page->index = (unsigned long) kmap_op;
}
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
@@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
return 0;
}
EXPORT_SYMBOL_GPL(m2p_add_override);
-int m2p_remove_override(struct page *page, bool clear_pte)
+int m2p_remove_override(struct page *page,
+ struct gnttab_map_grant_ref *kmap_op)
{
unsigned long flags;
unsigned long mfn;
@@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
WARN_ON(!PagePrivate(page));
ClearPagePrivate(page);
- if (clear_pte) {
- struct gnttab_map_grant_ref *map_op =
- (struct gnttab_map_grant_ref *) page->index;
- set_phys_to_machine(pfn, map_op->dev_bus_addr);
+ set_phys_to_machine(pfn, page->index);
+ if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;
struct gnttab_unmap_grant_ref *unmap_op;
@@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte)
* issued. In this case handle is going to -1 because
* it hasn't been modified yet.
*/
- if (map_op->handle == -1)
+ if (kmap_op->handle == -1)
xen_mc_flush();
/*
- * Now if map_op->handle is negative it means that the
+ * Now if kmap_op->handle is negative it means that the
* hypercall actually returned an error.
*/
- if (map_op->handle == GNTST_general_error) {
+ if (kmap_op->handle == GNTST_general_error) {
printk(KERN_WARNING "m2p_remove_override: "
"pfn %lx mfn %lx, failed to modify kernel mappings",
pfn, mfn);
@@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
mcs = xen_mc_entry(
sizeof(struct gnttab_unmap_grant_ref));
unmap_op = mcs.args;
- unmap_op->host_addr = map_op->host_addr;
- unmap_op->handle = map_op->handle;
+ unmap_op->host_addr = kmap_op->host_addr;
+ unmap_op->handle = kmap_op->handle;
unmap_op->dev_bus_addr = 0;
MULTI_grant_table_op(mcs.mc,
@@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte)
set_pte_at(&init_mm, address, ptep,
pfn_pte(pfn, PAGE_KERNEL));
__flush_tlb_single(address);
- map_op->host_addr = 0;
+ kmap_op->host_addr = 0;
}
- } else
- set_phys_to_machine(pfn, page->index);
+ }
/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
* somewhere in this domain, even before being added to the
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d11ca11..e2d62d6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -17,6 +17,7 @@
#include <asm/e820.h>
#include <asm/setup.h>
#include <asm/acpi.h>
+#include <asm/numa.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -544,4 +545,7 @@ void __init xen_arch_setup(void)
disable_cpufreq();
WARN_ON(set_pm_idle_to_default());
fiddle_vdso();
+#ifdef CONFIG_NUMA
+ numa_off = 1;
+#endif
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index f58dca7..353c50f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
return rc;
if (num_online_cpus() == 1)
- alternatives_smp_switch(1);
+ /* Just in case we booted with a single CPU. */
+ alternatives_enable_smp();
rc = xen_smp_intr_init(cpu);
if (rc)
@@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
-
- if (num_online_cpus() == 1)
- alternatives_smp_switch(0);
}
static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 2c8d6a3..bc44311 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -31,6 +31,7 @@
#include <linux/mqueue.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -110,8 +111,10 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
platform_idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
OpenPOWER on IntegriCloud