summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig68
-rw-r--r--arch/x86/Kconfig.cpu11
-rw-r--r--arch/x86/Kconfig.debug25
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/Makefile5
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/Makefile11
-rw-r--r--arch/x86/boot/compressed/eboot.c1022
-rw-r--r--arch/x86/boot/compressed/eboot.h61
-rw-r--r--arch/x86/boot/compressed/efi_stub_32.S86
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S1
-rw-r--r--arch/x86/boot/compressed/head_32.S22
-rw-r--r--arch/x86/boot/compressed/head_64.S20
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c11
-rw-r--r--arch/x86/boot/compressed/relocs.c6
-rw-r--r--arch/x86/boot/compressed/string.c9
-rw-r--r--arch/x86/boot/header.S158
-rw-r--r--arch/x86/boot/string.c35
-rw-r--r--arch/x86/boot/tools/build.c61
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c36
-rw-r--r--arch/x86/crypto/blowfish_glue.c191
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S520
-rw-r--r--arch/x86/crypto/camellia_glue.c1952
-rw-r--r--arch/x86/crypto/crc32c-intel.c11
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S29
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S29
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c394
-rw-r--r--arch/x86/crypto/twofish_glue.c2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c265
-rw-r--r--arch/x86/ia32/Makefile1
-rw-r--r--arch/x86/ia32/ia32_aout.c18
-rw-r--r--arch/x86/ia32/ia32_signal.c1
-rw-r--r--arch/x86/ia32/ia32entry.S373
-rw-r--r--arch/x86/ia32/nosyscall.c7
-rw-r--r--arch/x86/ia32/syscall_ia32.c25
-rw-r--r--arch/x86/include/asm/Kbuild5
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/atomic64_32.h148
-rw-r--r--arch/x86/include/asm/bootparam.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h6
-rw-r--r--arch/x86/include/asm/cpu_device_id.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/debugreg.h22
-rw-r--r--arch/x86/include/asm/desc.h12
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h520
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/highmem.h2
-rw-r--r--arch/x86/include/asm/i387.h411
-rw-r--r--arch/x86/include/asm/ia32_unistd.h13
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/init.h2
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kvm_emulate.h16
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mrst.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/perf_event.h10
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/prom.h10
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h1
-rw-r--r--arch/x86/include/asm/syscall.h1
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/include/asm/unistd.h55
-rw-r--r--arch/x86/include/asm/unistd_32.h401
-rw-r--r--arch/x86/include/asm/unistd_64.h732
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h107
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c10
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c40
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c8
-rw-r--r--arch/x86/kernel/apm_32.c27
-rw-r--r--arch/x86/kernel/asm-offsets.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c8
-rw-r--r--arch/x86/kernel/asm-offsets_64.c19
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/common.c42
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/cpu/match.c91
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c205
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.c167
-rw-r--r--arch/x86/kernel/cpu/perf_event.h58
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c158
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c23
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c528
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/crash_dump_32.c6
-rw-r--r--arch/x86/kernel/devicetree.c101
-rw-r--r--arch/x86/kernel/dumpstack.c3
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/e820.c63
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S254
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i387.c83
-rw-r--r--arch/x86/kernel/irq_32.c16
-rw-r--r--arch/x86/kernel/irq_64.c35
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c25
-rw-r--r--arch/x86/kernel/microcode_core.c15
-rw-r--r--arch/x86/kernel/nmi.c102
-rw-r--r--arch/x86/kernel/nmi_selftest.c181
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/process_32.c31
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/ptrace.c26
-rw-r--r--arch/x86/kernel/reboot.c36
-rw-r--r--arch/x86/kernel/setup.c15
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/smp.c72
-rw-r--r--arch/x86/kernel/smpboot.c39
-rw-r--r--arch/x86/kernel/sys_x86_64.c34
-rw-r--r--arch/x86/kernel/syscall_32.c25
-rw-r--r--arch/x86/kernel/syscall_64.c20
-rw-r--r--arch/x86/kernel/syscall_table_32.S350
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/traps.c64
-rw-r--r--arch/x86/kernel/tsc.c37
-rw-r--r--arch/x86/kernel/tsc_sync.c29
-rw-r--r--arch/x86/kernel/vm86_32.c8
-rw-r--r--arch/x86/kernel/xsave.c13
-rw-r--r--arch/x86/kvm/emulate.c51
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/mmu_audit.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/svm.c5
-rw-r--r--arch/x86/kvm/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c58
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/lib/atomic64_32.c59
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S29
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
-rw-r--r--arch/x86/lib/usercopy_32.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/highmem_32.c4
-rw-r--r--arch/x86/mm/hugetlbpage.c30
-rw-r--r--arch/x86/mm/init.c23
-rw-r--r--arch/x86/mm/init_32.c29
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa.c12
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pageattr.c6
-rw-r--r--arch/x86/mm/srat.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c44
-rw-r--r--arch/x86/pci/Makefile5
-rw-r--r--arch/x86/pci/acpi.c22
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/platform/efi/efi.c377
-rw-r--r--arch/x86/platform/geode/Makefile1
-rw-r--r--arch/x86/platform/geode/alix.c78
-rw-r--r--arch/x86/platform/geode/net5501.c154
-rw-r--r--arch/x86/platform/iris/iris.c2
-rw-r--r--arch/x86/platform/mrst/Makefile7
-rw-r--r--arch/x86/platform/mrst/mrst.c90
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c72
-rw-r--r--arch/x86/platform/scx200/scx200_32.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c390
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_time.c6
-rw-r--r--arch/x86/power/cpu.c1
-rw-r--r--arch/x86/syscalls/Makefile43
-rw-r--r--arch/x86/syscalls/syscall_32.tbl357
-rw-r--r--arch/x86/syscalls/syscall_64.tbl320
-rw-r--r--arch/x86/syscalls/syscallhdr.sh27
-rw-r--r--arch/x86/syscalls/syscalltbl.sh15
-rw-r--r--arch/x86/um/Kconfig8
-rw-r--r--arch/x86/um/Makefile3
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h10
-rw-r--r--arch/x86/um/sys_call_table_32.S26
-rw-r--r--arch/x86/um/sys_call_table_32.c55
-rw-r--r--arch/x86/um/sys_call_table_64.c33
-rw-r--r--arch/x86/um/user-offsets.c15
-rw-r--r--arch/x86/xen/enlighten.c6
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/smp.c7
-rw-r--r--arch/x86/xen/spinlock.c27
223 files changed, 10694 insertions, 5891 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index 0280790..7cab8c0 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,3 +1,4 @@
boot/compressed/vmlinux
tools/test_get_len
+tools/insn_sanity
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a31254..9019523 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,9 @@ config X86
select PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
select ANON_INODES
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
+ select HAVE_CMPXCHG_LOCAL if !M386
+ select HAVE_CMPXCHG_DOUBLE
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
select ARCH_BINFMT_ELF_RANDOMIZE_PIE
@@ -79,6 +82,7 @@ config X86
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
+ select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
@@ -122,16 +126,6 @@ config HAVE_LATENCYTOP_SUPPORT
config MMU
def_bool y
-config ZONE_DMA
- bool "DMA memory allocation support" if EXPERT
- default y
- help
- DMA memory allocation support allows devices with less than 32-bit
- addressing to allocate within the first 16MB of address space.
- Disable if no such devices will be used.
-
- If unsure, say Y.
-
config SBUS
bool
@@ -186,6 +180,9 @@ config ARCH_HAS_DEFAULT_IDLE
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_HAS_CPU_AUTOPROBE
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y
@@ -252,6 +249,16 @@ source "kernel/Kconfig.freezer"
menu "Processor type and features"
+config ZONE_DMA
+ bool "DMA memory allocation support" if EXPERT
+ default y
+ help
+ DMA memory allocation support allows devices with less than 32-bit
+ addressing to allocate within the first 16MB of address space.
+ Disable if no such devices will be used.
+
+ If unsure, say Y.
+
source "kernel/time/Kconfig"
config SMP
@@ -357,7 +364,6 @@ config X86_NUMACHIP
depends on NUMA
depends on SMP
depends on X86_X2APIC
- depends on !EDAC_AMD64
---help---
Adds support for Numascale NumaChip large-SMP systems. Needed to
enable more than ~168 cores.
@@ -396,6 +402,7 @@ config X86_INTEL_CE
select X86_REBOOTFIXUPS
select OF
select OF_EARLY_FLATTREE
+ select IRQ_DOMAIN
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -415,23 +422,25 @@ if X86_WANT_INTEL_MID
config X86_INTEL_MID
bool
-config X86_MRST
- bool "Moorestown MID platform"
+config X86_MDFLD
+ bool "Medfield MID platform"
depends on PCI
depends on PCI_GOANY
depends on X86_IO_APIC
+ select X86_INTEL_MID
+ select SFI
+ select DW_APB_TIMER
select APB_TIMER
select I2C
select SPI
select INTEL_SCU_IPC
select X86_PLATFORM_DEVICES
- select X86_INTEL_MID
+ select MFD_INTEL_MSIC
---help---
- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
- Internet Device(MID) platform. Moorestown consists of two chips:
- Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
- Unlike standard x86 PCs, Moorestown does not have many legacy devices
- nor standard legacy replacement devices/features. e.g. Moorestown does
+ Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
+ Internet Device(MID) platform.
+ Unlike standard x86 PCs, Medfield does not have many legacy devices
+ nor standard legacy replacement devices/features. e.g. Medfield does
not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
endif
@@ -631,7 +640,7 @@ config X86_SUMMIT_NUMA
config X86_CYCLONE_TIMER
def_bool y
- depends on X86_32_NON_STANDARD
+ depends on X86_SUMMIT
source "arch/x86/Kconfig.cpu"
@@ -659,9 +668,10 @@ config HPET_EMULATE_RTC
depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
config APB_TIMER
- def_bool y if MRST
- prompt "Langwell APB Timer Support" if X86_MRST
+ def_bool y if X86_INTEL_MID
+ prompt "Intel MID APB Timer Support" if X86_INTEL_MID
select DW_APB_TIMER
+ depends on X86_INTEL_MID && SFI
help
APB timer is the replacement for 8254, HPET on X86 MID platforms.
The APBT provides a stable time base on SMP
@@ -1489,6 +1499,13 @@ config EFI
resultant kernel should continue to boot on existing non-EFI
platforms.
+config EFI_STUB
+ bool "EFI stub support"
+ depends on EFI
+ ---help---
+ This kernel feature allows a bzImage to be loaded directly
+ by EFI firmware without the use of a bootloader.
+
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
@@ -2044,6 +2061,7 @@ config OLPC
select GPIOLIB
select OF
select OF_PROMTREE
+ select IRQ_DOMAIN
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2101,6 +2119,12 @@ config ALIX
Note: You have to set alix.force=1 for boards with Award BIOS.
+config NET5501
+ bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
+ select GPIOLIB
+ ---help---
+ This option enables system support for the Soekris Engineering net5501.
+
endif # X86_32
config AMD_NB
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e3ca7e0..706e12e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -303,18 +303,11 @@ config X86_GENERIC
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
- default "7" if NUMA
default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
-config CMPXCHG_LOCAL
- def_bool X86_64 || (X86_32 && !M386)
-
-config CMPXCHG_DOUBLE
- def_bool y
-
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
@@ -447,7 +440,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
- depends on !64BIT
+ depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
@@ -501,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
- depends on !64BIT
+ depends on M386 || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf56e17..e46c21473 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,9 +43,9 @@ config EARLY_PRINTK
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
-config EARLY_PRINTK_MRST
- bool "Early printk for MRST platform support"
- depends on EARLY_PRINTK && X86_MRST
+config EARLY_PRINTK_INTEL_MID
+ bool "Early printk for Intel MID platform support"
+ depends on EARLY_PRINTK && X86_INTEL_MID
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
---help---
- This option will cause messages to be printed if free stack space
- drops below a certain limit.
+ Say Y here if you want to check the overflows of kernel, IRQ
+ and exception stacks. This option will cause messages of the
+ stacks in detail when free stack space drops below a certain
+ limit.
+ If in doubt, say "N".
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
If unsure, or if you run an older (pre 4.4) gcc, say N.
+config DEBUG_NMI_SELFTEST
+ bool "NMI Selftest"
+ depends on DEBUG_KERNEL && X86_LOCAL_APIC
+ ---help---
+ Enabling this option turns on a quick NMI selftest to verify
+ that the NMI behaves correctly.
+
+ This might help diagnose strange hangs that rely on NMI to
+ function properly.
+
+ If unsure, say N.
+
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b02e509..209ba12 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -118,6 +118,12 @@ KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
###
+# Syscall table generation
+
+archheaders:
+ $(Q)$(MAKE) $(build)=arch/x86/syscalls all
+
+###
# Kernel objects
head-y := arch/x86/kernel/head_$(BITS).o
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 95365a8..3e02148 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,8 +37,9 @@ setup-y += video-bios.o
targets += $(setup-y)
hostprogs-y := mkcpustr tools/build
-HOST_EXTRACFLAGS += $(LINUXINCLUDE)
-
+HOSTCFLAGS_mkcpustr.o := -I$(srctree)/arch/$(SRCARCH)/include
+HOST_EXTRACFLAGS += -I$(objtree)/include -I$(srctree)/tools/include \
+ -include $(srctree)/include/linux/kconfig.h
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index c7093bd..18997e5 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -67,7 +67,7 @@ static inline void outl(u32 v, u16 port)
{
asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
}
-static inline u32 inl(u32 port)
+static inline u32 inl(u16 port)
{
u32 v;
asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 09664ef..fd55a2f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,8 +22,17 @@ LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE
+VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
+ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
+ $(obj)/piggy.o
+
+ifeq ($(CONFIG_EFI_STUB), y)
+ VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
+endif
+
+$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
$(call if_changed,ld)
@:
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
new file mode 100644
index 0000000..0cdfc0d
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.c
@@ -0,0 +1,1022 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <asm/setup.h>
+#include <asm/desc.h>
+
+#include "eboot.h"
+
+static efi_system_table_t *sys_table;
+
+static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
+ unsigned long *desc_size)
+{
+ efi_memory_desc_t *m = NULL;
+ efi_status_t status;
+ unsigned long key;
+ u32 desc_version;
+
+ *map_size = sizeof(*m) * 32;
+again:
+ /*
+ * Add an additional efi_memory_desc_t because we're doing an
+ * allocation which may be in a new descriptor region.
+ */
+ *map_size += sizeof(*m);
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, *map_size, (void **)&m);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size,
+ m, &key, desc_size, &desc_version);
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ efi_call_phys1(sys_table->boottime->free_pool, m);
+ goto again;
+ }
+
+ if (status != EFI_SUCCESS)
+ efi_call_phys1(sys_table->boottime->free_pool, m);
+
+fail:
+ *map = m;
+ return status;
+}
+
+/*
+ * Allocate at the highest possible address that is not above 'max'.
+ */
+static efi_status_t high_alloc(unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long max)
+{
+ unsigned long map_size, desc_size;
+ efi_memory_desc_t *map;
+ efi_status_t status;
+ unsigned long nr_pages;
+ u64 max_addr = 0;
+ int i;
+
+ status = __get_map(&map, &map_size, &desc_size);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+again:
+ for (i = 0; i < map_size / desc_size; i++) {
+ efi_memory_desc_t *desc;
+ unsigned long m = (unsigned long)map;
+ u64 start, end;
+
+ desc = (efi_memory_desc_t *)(m + (i * desc_size));
+ if (desc->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (desc->num_pages < nr_pages)
+ continue;
+
+ start = desc->phys_addr;
+ end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
+
+ if ((start + size) > end || (start + size) > max)
+ continue;
+
+ if (end - size > max)
+ end = max;
+
+ if (round_down(end - size, align) < start)
+ continue;
+
+ start = round_down(end - size, align);
+
+ /*
+ * Don't allocate at 0x0. It will confuse code that
+ * checks pointers against NULL.
+ */
+ if (start == 0x0)
+ continue;
+
+ if (start > max_addr)
+ max_addr = start;
+ }
+
+ if (!max_addr)
+ status = EFI_NOT_FOUND;
+ else {
+ status = efi_call_phys4(sys_table->boottime->allocate_pages,
+ EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+ nr_pages, &max_addr);
+ if (status != EFI_SUCCESS) {
+ max = max_addr;
+ max_addr = 0;
+ goto again;
+ }
+
+ *addr = max_addr;
+ }
+
+free_pool:
+ efi_call_phys1(sys_table->boottime->free_pool, map);
+
+fail:
+ return status;
+}
+
+/*
+ * Allocate at the lowest possible address.
+ */
+static efi_status_t low_alloc(unsigned long size, unsigned long align,
+ unsigned long *addr)
+{
+ unsigned long map_size, desc_size;
+ efi_memory_desc_t *map;
+ efi_status_t status;
+ unsigned long nr_pages;
+ int i;
+
+ status = __get_map(&map, &map_size, &desc_size);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ for (i = 0; i < map_size / desc_size; i++) {
+ efi_memory_desc_t *desc;
+ unsigned long m = (unsigned long)map;
+ u64 start, end;
+
+ desc = (efi_memory_desc_t *)(m + (i * desc_size));
+
+ if (desc->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (desc->num_pages < nr_pages)
+ continue;
+
+ start = desc->phys_addr;
+ end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
+
+ /*
+ * Don't allocate at 0x0. It will confuse code that
+ * checks pointers against NULL. Skip the first 8
+ * bytes so we start at a nice even number.
+ */
+ if (start == 0x0)
+ start += 8;
+
+ start = round_up(start, align);
+ if ((start + size) > end)
+ continue;
+
+ status = efi_call_phys4(sys_table->boottime->allocate_pages,
+ EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+ nr_pages, &start);
+ if (status == EFI_SUCCESS) {
+ *addr = start;
+ break;
+ }
+ }
+
+ if (i == map_size / desc_size)
+ status = EFI_NOT_FOUND;
+
+free_pool:
+ efi_call_phys1(sys_table->boottime->free_pool, map);
+fail:
+ return status;
+}
+
+static void low_free(unsigned long size, unsigned long addr)
+{
+ unsigned long nr_pages;
+
+ nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ efi_call_phys2(sys_table->boottime->free_pages, addr, size);
+}
+
+static void find_bits(unsigned long mask, u8 *pos, u8 *size)
+{
+ u8 first, len;
+
+ first = 0;
+ len = 0;
+
+ if (mask) {
+ while (!(mask & 0x1)) {
+ mask = mask >> 1;
+ first++;
+ }
+
+ while (mask & 0x1) {
+ mask = mask >> 1;
+ len++;
+ }
+ }
+
+ *pos = first;
+ *size = len;
+}
+
+/*
+ * See if we have Graphics Output Protocol
+ */
+static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
+ unsigned long size)
+{
+ struct efi_graphics_output_protocol *gop, *first_gop;
+ struct efi_pixel_bitmask pixel_info;
+ unsigned long nr_gops;
+ efi_status_t status;
+ void **gop_handle;
+ u16 width, height;
+ u32 fb_base, fb_size;
+ u32 pixels_per_scan_line;
+ int pixel_format;
+ int i;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, size, &gop_handle);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, proto,
+ NULL, &size, gop_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ first_gop = NULL;
+
+ nr_gops = size / sizeof(void *);
+ for (i = 0; i < nr_gops; i++) {
+ struct efi_graphics_output_mode_info *info;
+ efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
+ void *pciio;
+ void *h = gop_handle[i];
+
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ h, proto, &gop);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ efi_call_phys3(sys_table->boottime->handle_protocol,
+ h, &pciio_proto, &pciio);
+
+ status = efi_call_phys4(gop->query_mode, gop,
+ gop->mode->mode, &size, &info);
+ if (status == EFI_SUCCESS && (!first_gop || pciio)) {
+ /*
+ * Apple provide GOPs that are not backed by
+ * real hardware (they're used to handle
+ * multiple displays). The workaround is to
+ * search for a GOP implementing the PCIIO
+ * protocol, and if one isn't found, to just
+ * fallback to the first GOP.
+ */
+ width = info->horizontal_resolution;
+ height = info->vertical_resolution;
+ fb_base = gop->mode->frame_buffer_base;
+ fb_size = gop->mode->frame_buffer_size;
+ pixel_format = info->pixel_format;
+ pixel_info = info->pixel_information;
+ pixels_per_scan_line = info->pixels_per_scan_line;
+
+ /*
+ * Once we've found a GOP supporting PCIIO,
+ * don't bother looking any further.
+ */
+ if (pciio)
+ break;
+
+ first_gop = gop;
+ }
+ }
+
+ /* Did we find any GOPs? */
+ if (!first_gop)
+ goto free_handle;
+
+ /* EFI framebuffer */
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+ si->lfb_width = width;
+ si->lfb_height = height;
+ si->lfb_base = fb_base;
+ si->lfb_size = fb_size;
+ si->pages = 1;
+
+ if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
+ si->lfb_depth = 32;
+ si->lfb_linelength = pixels_per_scan_line * 4;
+ si->red_size = 8;
+ si->red_pos = 0;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 16;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+ } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
+ si->lfb_depth = 32;
+ si->lfb_linelength = pixels_per_scan_line * 4;
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+ } else if (pixel_format == PIXEL_BIT_MASK) {
+ find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
+ find_bits(pixel_info.green_mask, &si->green_pos,
+ &si->green_size);
+ find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
+ find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
+ &si->rsvd_size);
+ si->lfb_depth = si->red_size + si->green_size +
+ si->blue_size + si->rsvd_size;
+ si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
+ } else {
+ si->lfb_depth = 4;
+ si->lfb_linelength = si->lfb_width / 2;
+ si->red_size = 0;
+ si->red_pos = 0;
+ si->green_size = 0;
+ si->green_pos = 0;
+ si->blue_size = 0;
+ si->blue_pos = 0;
+ si->rsvd_size = 0;
+ si->rsvd_pos = 0;
+ }
+
+free_handle:
+ efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
+ return status;
+}
+
+/*
+ * See if we have Universal Graphics Adapter (UGA) protocol
+ */
+static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto,
+ unsigned long size)
+{
+ struct efi_uga_draw_protocol *uga, *first_uga;
+ unsigned long nr_ugas;
+ efi_status_t status;
+ u32 width, height;
+ void **uga_handle = NULL;
+ int i;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, size, &uga_handle);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, uga_proto,
+ NULL, &size, uga_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ first_uga = NULL;
+
+ nr_ugas = size / sizeof(void *);
+ for (i = 0; i < nr_ugas; i++) {
+ efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
+ void *handle = uga_handle[i];
+ u32 w, h, depth, refresh;
+ void *pciio;
+
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ handle, uga_proto, &uga);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ efi_call_phys3(sys_table->boottime->handle_protocol,
+ handle, &pciio_proto, &pciio);
+
+ status = efi_call_phys5(uga->get_mode, uga, &w, &h,
+ &depth, &refresh);
+ if (status == EFI_SUCCESS && (!first_uga || pciio)) {
+ width = w;
+ height = h;
+
+ /*
+ * Once we've found a UGA supporting PCIIO,
+ * don't bother looking any further.
+ */
+ if (pciio)
+ break;
+
+ first_uga = uga;
+ }
+ }
+
+ if (!first_uga)
+ goto free_handle;
+
+ /* EFI framebuffer */
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+ si->lfb_depth = 32;
+ si->lfb_width = width;
+ si->lfb_height = height;
+
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+
+
+free_handle:
+ efi_call_phys1(sys_table->boottime->free_pool, uga_handle);
+ return status;
+}
+
+void setup_graphics(struct boot_params *boot_params)
+{
+ efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ struct screen_info *si;
+ efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
+ efi_status_t status;
+ unsigned long size;
+ void **gop_handle = NULL;
+ void **uga_handle = NULL;
+
+ si = &boot_params->screen_info;
+ memset(si, 0, sizeof(*si));
+
+ size = 0;
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, &graphics_proto,
+ NULL, &size, gop_handle);
+ if (status == EFI_BUFFER_TOO_SMALL)
+ status = setup_gop(si, &graphics_proto, size);
+
+ if (status != EFI_SUCCESS) {
+ size = 0;
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, &uga_proto,
+ NULL, &size, uga_handle);
+ if (status == EFI_BUFFER_TOO_SMALL)
+ setup_uga(si, &uga_proto, size);
+ }
+}
+
+struct initrd {
+ efi_file_handle_t *handle;
+ u64 size;
+};
+
+/*
+ * Check the cmdline for a LILO-style initrd= arguments.
+ *
+ * We only support loading an initrd from the same filesystem as the
+ * kernel image.
+ */
+static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
+ struct setup_header *hdr)
+{
+ struct initrd *initrds;
+ unsigned long initrd_addr;
+ efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
+ u64 initrd_total;
+ efi_file_io_interface_t *io;
+ efi_file_handle_t *fh;
+ efi_status_t status;
+ int nr_initrds;
+ char *str;
+ int i, j, k;
+
+ initrd_addr = 0;
+ initrd_total = 0;
+
+ str = (char *)(unsigned long)hdr->cmd_line_ptr;
+
+ j = 0; /* See close_handles */
+
+ if (!str || !*str)
+ return EFI_SUCCESS;
+
+ for (nr_initrds = 0; *str; nr_initrds++) {
+ str = strstr(str, "initrd=");
+ if (!str)
+ break;
+
+ str += 7;
+
+ /* Skip any leading slashes */
+ while (*str == '/' || *str == '\\')
+ str++;
+
+ while (*str && *str != ' ' && *str != '\n')
+ str++;
+ }
+
+ if (!nr_initrds)
+ return EFI_SUCCESS;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA,
+ nr_initrds * sizeof(*initrds),
+ &initrds);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ str = (char *)(unsigned long)hdr->cmd_line_ptr;
+ for (i = 0; i < nr_initrds; i++) {
+ struct initrd *initrd;
+ efi_file_handle_t *h;
+ efi_file_info_t *info;
+ efi_char16_t filename_16[256];
+ unsigned long info_sz;
+ efi_guid_t info_guid = EFI_FILE_INFO_ID;
+ efi_char16_t *p;
+ u64 file_sz;
+
+ str = strstr(str, "initrd=");
+ if (!str)
+ break;
+
+ str += 7;
+
+ initrd = &initrds[i];
+ p = filename_16;
+
+ /* Skip any leading slashes */
+ while (*str == '/' || *str == '\\')
+ str++;
+
+ while (*str && *str != ' ' && *str != '\n') {
+ if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
+ break;
+
+ *p++ = *str++;
+ }
+
+ *p = '\0';
+
+ /* Only open the volume once. */
+ if (!i) {
+ efi_boot_services_t *boottime;
+
+ boottime = sys_table->boottime;
+
+ status = efi_call_phys3(boottime->handle_protocol,
+ image->device_handle, &fs_proto, &io);
+ if (status != EFI_SUCCESS)
+ goto free_initrds;
+
+ status = efi_call_phys2(io->open_volume, io, &fh);
+ if (status != EFI_SUCCESS)
+ goto free_initrds;
+ }
+
+ status = efi_call_phys5(fh->open, fh, &h, filename_16,
+ EFI_FILE_MODE_READ, (u64)0);
+ if (status != EFI_SUCCESS)
+ goto close_handles;
+
+ initrd->handle = h;
+
+ info_sz = 0;
+ status = efi_call_phys4(h->get_info, h, &info_guid,
+ &info_sz, NULL);
+ if (status != EFI_BUFFER_TOO_SMALL)
+ goto close_handles;
+
+grow:
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, info_sz, &info);
+ if (status != EFI_SUCCESS)
+ goto close_handles;
+
+ status = efi_call_phys4(h->get_info, h, &info_guid,
+ &info_sz, info);
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ efi_call_phys1(sys_table->boottime->free_pool, info);
+ goto grow;
+ }
+
+ file_sz = info->file_size;
+ efi_call_phys1(sys_table->boottime->free_pool, info);
+
+ if (status != EFI_SUCCESS)
+ goto close_handles;
+
+ initrd->size = file_sz;
+ initrd_total += file_sz;
+ }
+
+ if (initrd_total) {
+ unsigned long addr;
+
+ /*
+ * Multiple initrd's need to be at consecutive
+ * addresses in memory, so allocate enough memory for
+ * all the initrd's.
+ */
+ status = high_alloc(initrd_total, 0x1000,
+ &initrd_addr, hdr->initrd_addr_max);
+ if (status != EFI_SUCCESS)
+ goto close_handles;
+
+ /* We've run out of free low memory. */
+ if (initrd_addr > hdr->initrd_addr_max) {
+ status = EFI_INVALID_PARAMETER;
+ goto free_initrd_total;
+ }
+
+ addr = initrd_addr;
+ for (j = 0; j < nr_initrds; j++) {
+ u64 size;
+
+ size = initrds[j].size;
+ while (size) {
+ u64 chunksize;
+ if (size > EFI_READ_CHUNK_SIZE)
+ chunksize = EFI_READ_CHUNK_SIZE;
+ else
+ chunksize = size;
+ status = efi_call_phys3(fh->read,
+ initrds[j].handle,
+ &chunksize, addr);
+ if (status != EFI_SUCCESS)
+ goto free_initrd_total;
+ addr += chunksize;
+ size -= chunksize;
+ }
+
+ efi_call_phys1(fh->close, initrds[j].handle);
+ }
+
+ }
+
+ efi_call_phys1(sys_table->boottime->free_pool, initrds);
+
+ hdr->ramdisk_image = initrd_addr;
+ hdr->ramdisk_size = initrd_total;
+
+ return status;
+
+free_initrd_total:
+ low_free(initrd_total, initrd_addr);
+
+close_handles:
+ for (k = j; k < nr_initrds; k++)
+ efi_call_phys1(fh->close, initrds[k].handle);
+free_initrds:
+ efi_call_phys1(sys_table->boottime->free_pool, initrds);
+fail:
+ hdr->ramdisk_image = 0;
+ hdr->ramdisk_size = 0;
+
+ return status;
+}
+
+/*
+ * Because the x86 boot code expects to be passed a boot_params we
+ * need to create one ourselves (usually the bootloader would create
+ * one for us).
+ */
+static efi_status_t make_boot_params(struct boot_params *boot_params,
+ efi_loaded_image_t *image,
+ void *handle)
+{
+ struct efi_info *efi = &boot_params->efi_info;
+ struct apm_bios_info *bi = &boot_params->apm_bios_info;
+ struct sys_desc_table *sdt = &boot_params->sys_desc_table;
+ struct e820entry *e820_map = &boot_params->e820_map[0];
+ struct e820entry *prev = NULL;
+ struct setup_header *hdr = &boot_params->hdr;
+ unsigned long size, key, desc_size, _size;
+ efi_memory_desc_t *mem_map;
+ void *options = image->load_options;
+ u32 load_options_size = image->load_options_size / 2; /* ASCII */
+ int options_size = 0;
+ efi_status_t status;
+ __u32 desc_version;
+ unsigned long cmdline;
+ u8 nr_entries;
+ u16 *s2;
+ u8 *s1;
+ int i;
+
+ hdr->type_of_loader = 0x21;
+
+ /* Convert unicode cmdline to ascii */
+ cmdline = 0;
+ s2 = (u16 *)options;
+
+ if (s2) {
+ while (*s2 && *s2 != '\n' && options_size < load_options_size) {
+ s2++;
+ options_size++;
+ }
+
+ if (options_size) {
+ if (options_size > hdr->cmdline_size)
+ options_size = hdr->cmdline_size;
+
+ options_size++; /* NUL termination */
+
+ status = low_alloc(options_size, 1, &cmdline);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ s1 = (u8 *)(unsigned long)cmdline;
+ s2 = (u16 *)options;
+
+ for (i = 0; i < options_size - 1; i++)
+ *s1++ = *s2++;
+
+ *s1 = '\0';
+ }
+ }
+
+ hdr->cmd_line_ptr = cmdline;
+
+ hdr->ramdisk_image = 0;
+ hdr->ramdisk_size = 0;
+
+ status = handle_ramdisks(image, hdr);
+ if (status != EFI_SUCCESS)
+ goto free_cmdline;
+
+ setup_graphics(boot_params);
+
+ /* Clear APM BIOS info */
+ memset(bi, 0, sizeof(*bi));
+
+ memset(sdt, 0, sizeof(*sdt));
+
+ memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
+
+ size = sizeof(*mem_map) * 32;
+
+again:
+ size += sizeof(*mem_map);
+ _size = size;
+ status = low_alloc(size, 1, (unsigned long *)&mem_map);
+ if (status != EFI_SUCCESS)
+ goto free_cmdline;
+
+ status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
+ mem_map, &key, &desc_size, &desc_version);
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ low_free(_size, (unsigned long)mem_map);
+ goto again;
+ }
+
+ if (status != EFI_SUCCESS)
+ goto free_mem_map;
+
+ efi->efi_systab = (unsigned long)sys_table;
+ efi->efi_memdesc_size = desc_size;
+ efi->efi_memdesc_version = desc_version;
+ efi->efi_memmap = (unsigned long)mem_map;
+ efi->efi_memmap_size = size;
+
+#ifdef CONFIG_X86_64
+ efi->efi_systab_hi = (unsigned long)sys_table >> 32;
+ efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
+#endif
+
+ /* Might as well exit boot services now */
+ status = efi_call_phys2(sys_table->boottime->exit_boot_services,
+ handle, key);
+ if (status != EFI_SUCCESS)
+ goto free_mem_map;
+
+ /* Historic? */
+ boot_params->alt_mem_k = 32 * 1024;
+
+ /*
+ * Convert the EFI memory map to E820.
+ */
+ nr_entries = 0;
+ for (i = 0; i < size / desc_size; i++) {
+ efi_memory_desc_t *d;
+ unsigned int e820_type = 0;
+ unsigned long m = (unsigned long)mem_map;
+
+ d = (efi_memory_desc_t *)(m + (i * desc_size));
+ switch (d->type) {
+ case EFI_RESERVED_TYPE:
+ case EFI_RUNTIME_SERVICES_CODE:
+ case EFI_RUNTIME_SERVICES_DATA:
+ case EFI_MEMORY_MAPPED_IO:
+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+ case EFI_PAL_CODE:
+ e820_type = E820_RESERVED;
+ break;
+
+ case EFI_UNUSABLE_MEMORY:
+ e820_type = E820_UNUSABLE;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ e820_type = E820_ACPI;
+ break;
+
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ e820_type = E820_RAM;
+ break;
+
+ case EFI_ACPI_MEMORY_NVS:
+ e820_type = E820_NVS;
+ break;
+
+ default:
+ continue;
+ }
+
+ /* Merge adjacent mappings */
+ if (prev && prev->type == e820_type &&
+ (prev->addr + prev->size) == d->phys_addr)
+ prev->size += d->num_pages << 12;
+ else {
+ e820_map->addr = d->phys_addr;
+ e820_map->size = d->num_pages << 12;
+ e820_map->type = e820_type;
+ prev = e820_map++;
+ nr_entries++;
+ }
+ }
+
+ boot_params->e820_entries = nr_entries;
+
+ return EFI_SUCCESS;
+
+free_mem_map:
+ low_free(_size, (unsigned long)mem_map);
+free_cmdline:
+ if (options_size)
+ low_free(options_size, hdr->cmd_line_ptr);
+fail:
+ return status;
+}
+
+/*
+ * On success we return a pointer to a boot_params structure, and NULL
+ * on failure.
+ */
+struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
+{
+ struct boot_params *boot_params;
+ unsigned long start, nr_pages;
+ struct desc_ptr *gdt, *idt;
+ efi_loaded_image_t *image;
+ struct setup_header *hdr;
+ efi_status_t status;
+ efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
+ struct desc_struct *desc;
+
+ sys_table = _table;
+
+ /* Check if we were booted by the EFI firmware */
+ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ goto fail;
+
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ handle, &proto, (void *)&image);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ memset(boot_params, 0x0, 0x4000);
+
+ /* Copy first two sectors to boot_params */
+ memcpy(boot_params, image->image_base, 1024);
+
+ hdr = &boot_params->hdr;
+
+ /*
+ * The EFI firmware loader could have placed the kernel image
+ * anywhere in memory, but the kernel has various restrictions
+ * on the max physical address it can run at. Attempt to move
+ * the kernel to boot_params.pref_address, or as low as
+ * possible.
+ */
+ start = hdr->pref_address;
+ nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+
+ status = efi_call_phys4(sys_table->boottime->allocate_pages,
+ EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+ nr_pages, &start);
+ if (status != EFI_SUCCESS) {
+ status = low_alloc(hdr->init_size, hdr->kernel_alignment,
+ &start);
+ if (status != EFI_SUCCESS)
+ goto fail;
+ }
+
+ hdr->code32_start = (__u32)start;
+ hdr->pref_address = (__u64)(unsigned long)image->image_base;
+
+ memcpy((void *)start, image->image_base, image->image_size);
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, sizeof(*gdt),
+ (void **)&gdt);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ gdt->size = 0x800;
+ status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, sizeof(*idt),
+ (void **)&idt);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ idt->size = 0;
+ idt->address = 0;
+
+ status = make_boot_params(boot_params, image, handle);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ memset((char *)gdt->address, 0x0, gdt->size);
+ desc = (struct desc_struct *)gdt->address;
+
+ /* The first GDT is a dummy and the second is unused. */
+ desc += 2;
+
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+
+ desc++;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+
+#ifdef CONFIG_X86_64
+ /* Task segment value */
+ desc++;
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+#endif /* CONFIG_X86_64 */
+
+ asm volatile ("lidt %0" : : "m" (*idt));
+ asm volatile ("lgdt %0" : : "m" (*gdt));
+
+ asm volatile("cli");
+
+ return boot_params;
+fail:
+ return NULL;
+}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
new file mode 100644
index 0000000..3925166
--- /dev/null
+++ b/arch/x86/boot/compressed/eboot.h
@@ -0,0 +1,61 @@
+#ifndef BOOT_COMPRESSED_EBOOT_H
+#define BOOT_COMPRESSED_EBOOT_H
+
+#define SEG_TYPE_DATA (0 << 3)
+#define SEG_TYPE_READ_WRITE (1 << 1)
+#define SEG_TYPE_CODE (1 << 3)
+#define SEG_TYPE_EXEC_READ (1 << 1)
+#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
+#define SEG_OP_SIZE_32BIT (1 << 0)
+#define SEG_GRANULARITY_4KB (1 << 0)
+
+#define DESC_TYPE_CODE_DATA (1 << 0)
+
+#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
+#define EFI_READ_CHUNK_SIZE (1024 * 1024)
+
+#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
+#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
+#define PIXEL_BIT_MASK 2
+#define PIXEL_BLT_ONLY 3
+#define PIXEL_FORMAT_MAX 4
+
+struct efi_pixel_bitmask {
+ u32 red_mask;
+ u32 green_mask;
+ u32 blue_mask;
+ u32 reserved_mask;
+};
+
+struct efi_graphics_output_mode_info {
+ u32 version;
+ u32 horizontal_resolution;
+ u32 vertical_resolution;
+ int pixel_format;
+ struct efi_pixel_bitmask pixel_information;
+ u32 pixels_per_scan_line;
+} __packed;
+
+struct efi_graphics_output_protocol_mode {
+ u32 max_mode;
+ u32 mode;
+ unsigned long info;
+ unsigned long size_of_info;
+ u64 frame_buffer_base;
+ unsigned long frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol {
+ void *query_mode;
+ unsigned long set_mode;
+ unsigned long blt;
+ struct efi_graphics_output_protocol_mode *mode;
+};
+
+struct efi_uga_draw_protocol {
+ void *get_mode;
+ void *set_mode;
+ void *blt;
+};
+
+#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
new file mode 100644
index 0000000..a53440e
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_32.S
@@ -0,0 +1,86 @@
+/*
+ * EFI call stub for IA32.
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off. Note that this implementation is different from the one in
+ * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
+ * mode at this point.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page_types.h>
+
+/*
+ * efi_call_phys(void *, ...) is a function with variable parameters.
+ * All the callers of this function assure that all the parameters are 4-bytes.
+ */
+
+/*
+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
+ * So we'd better save all of them at the beginning of this function and restore
+ * at the end no matter how many we use, because we can not assure EFI runtime
+ * service functions will comply with gcc calling convention, too.
+ */
+
+.text
+ENTRY(efi_call_phys)
+ /*
+ * 0. The function can only be called in Linux kernel. So CS has been
+ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
+ * the values of these registers are the same. And, the corresponding
+ * GDT entries are identical. So I will do nothing about segment reg
+ * and GDT, but change GDT base register in prelog and epilog.
+ */
+
+ /*
+ * 1. Because we haven't been relocated by this point we need to
+ * use relative addressing.
+ */
+ call 1f
+1: popl %edx
+ subl $1b, %edx
+
+ /*
+ * 2. Now on the top of stack is the return
+ * address in the caller of efi_call_phys(), then parameter 1,
+ * parameter 2, ..., param n. To make things easy, we save the return
+ * address of efi_call_phys in a global variable.
+ */
+ popl %ecx
+ movl %ecx, saved_return_addr(%edx)
+ /* get the function pointer into ECX*/
+ popl %ecx
+ movl %ecx, efi_rt_function_ptr(%edx)
+
+ /*
+ * 3. Call the physical function.
+ */
+ call *%ecx
+
+ /*
+ * 4. Balance the stack. And because EAX contain the return value,
+ * we'd better not clobber it. We need to calculate our address
+ * again because %ecx and %edx are not preserved across EFI function
+ * calls.
+ */
+ call 1f
+1: popl %edx
+ subl $1b, %edx
+
+ movl efi_rt_function_ptr(%edx), %ecx
+ pushl %ecx
+
+ /*
+ * 10. Push the saved return address onto the stack and return.
+ */
+ movl saved_return_addr(%edx), %ecx
+ pushl %ecx
+ ret
+ENDPROC(efi_call_phys)
+.previous
+
+.data
+saved_return_addr:
+ .long 0
+efi_rt_function_ptr:
+ .long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
new file mode 100644
index 0000000..cedc60d
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -0,0 +1 @@
+#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 67a655a..a055993 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -32,6 +32,28 @@
__HEAD
ENTRY(startup_32)
+#ifdef CONFIG_EFI_STUB
+ /*
+ * We don't need the return address, so set up the stack so
+ * efi_main() can find its arugments.
+ */
+ add $0x4, %esp
+
+ call efi_main
+ cmpl $0, %eax
+ je preferred_addr
+ movl %eax, %esi
+ call 1f
+1:
+ popl %eax
+ subl $1b, %eax
+ subl BP_pref_address(%esi), %eax
+ add BP_code32_start(%esi), %eax
+ leal preferred_addr(%eax), %eax
+ jmp *%eax
+
+preferred_addr:
+#endif
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 35af09d..558d76c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -199,6 +199,26 @@ ENTRY(startup_64)
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
*/
+#ifdef CONFIG_EFI_STUB
+ pushq %rsi
+ mov %rcx, %rdi
+ mov %rdx, %rsi
+ call efi_main
+ popq %rsi
+ cmpq $0,%rax
+ je preferred_addr
+ movq %rax,%rsi
+ call 1f
+1:
+ popq %rax
+ subq $1b, %rax
+ subq BP_pref_address(%rsi), %rax
+ add BP_code32_start(%esi), %eax
+ leaq preferred_addr(%rax), %rax
+ jmp *%rax
+
+preferred_addr:
+#endif
/* Setup data segments. */
xorl %eax, %eax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 3a19d04..7116dcb 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -321,6 +321,8 @@ static void parse_elf(void *output)
default: /* Ignore other PT_* */ break;
}
}
+
+ free(phdrs);
}
asmlinkage void decompress_kernel(void *rmode, memptr heap,
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 46a8238..958a641 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -29,14 +29,7 @@
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
-
-static uint32_t getle32(const void *p)
-{
- const uint8_t *cp = p;
-
- return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
- ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
-}
+#include <tools/le_byteshift.h>
int main(int argc, char *argv[])
{
@@ -69,7 +62,7 @@ int main(int argc, char *argv[])
}
ilen = ftell(f);
- olen = getle32(&olen);
+ olen = get_unaligned_le32(&olen);
fclose(f);
/*
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 89bbf4e..d3c0b02 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -10,6 +10,7 @@
#define USE_BSD
#include <endian.h>
#include <regex.h>
+#include <tools/le_byteshift.h>
static void die(char *fmt, ...);
@@ -605,10 +606,7 @@ static void emit_relocs(int as_text)
fwrite("\0\0\0\0", 4, 1, stdout);
/* Now print each relocation */
for (i = 0; i < reloc_count; i++) {
- buf[0] = (relocs[i] >> 0) & 0xff;
- buf[1] = (relocs[i] >> 8) & 0xff;
- buf[2] = (relocs[i] >> 16) & 0xff;
- buf[3] = (relocs[i] >> 24) & 0xff;
+ put_unaligned_le32(relocs[i], buf);
fwrite(buf, 4, 1, stdout);
}
}
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19b3e69..ffb9c5c 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -1,2 +1,11 @@
#include "misc.h"
+
+int memcmp(const void *s1, const void *s2, size_t len)
+{
+ u8 diff;
+ asm("repe; cmpsb; setnz %0"
+ : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
#include "../string.c"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index bdb4d45..f1bbeeb 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
.global bootsect_start
bootsect_start:
+#ifdef CONFIG_EFI_STUB
+ # "MZ", MS-DOS header
+ .byte 0x4d
+ .byte 0x5a
+#endif
# Normalize the start address
ljmp $BOOTSEG, $start2
@@ -79,6 +84,14 @@ bs_die:
# invoke the BIOS reset code...
ljmp $0xf000,$0xfff0
+#ifdef CONFIG_EFI_STUB
+ .org 0x3c
+ #
+ # Offset to the PE header.
+ #
+ .long pe_header
+#endif /* CONFIG_EFI_STUB */
+
.section ".bsdata", "a"
bugger_off_msg:
.ascii "Direct booting from floppy is no longer supported.\r\n"
@@ -87,6 +100,141 @@ bugger_off_msg:
.ascii "Remove disk and press any key to reboot . . .\r\n"
.byte 0
+#ifdef CONFIG_EFI_STUB
+pe_header:
+ .ascii "PE"
+ .word 0
+
+coff_header:
+#ifdef CONFIG_X86_32
+ .word 0x14c # i386
+#else
+ .word 0x8664 # x86-64
+#endif
+ .word 2 # nr_sections
+ .long 0 # TimeDateStamp
+ .long 0 # PointerToSymbolTable
+ .long 1 # NumberOfSymbols
+ .word section_table - optional_header # SizeOfOptionalHeader
+#ifdef CONFIG_X86_32
+ .word 0x306 # Characteristics.
+ # IMAGE_FILE_32BIT_MACHINE |
+ # IMAGE_FILE_DEBUG_STRIPPED |
+ # IMAGE_FILE_EXECUTABLE_IMAGE |
+ # IMAGE_FILE_LINE_NUMS_STRIPPED
+#else
+ .word 0x206 # Characteristics
+ # IMAGE_FILE_DEBUG_STRIPPED |
+ # IMAGE_FILE_EXECUTABLE_IMAGE |
+ # IMAGE_FILE_LINE_NUMS_STRIPPED
+#endif
+
+optional_header:
+#ifdef CONFIG_X86_32
+ .word 0x10b # PE32 format
+#else
+ .word 0x20b # PE32+ format
+#endif
+ .byte 0x02 # MajorLinkerVersion
+ .byte 0x14 # MinorLinkerVersion
+
+ # Filled in by build.c
+ .long 0 # SizeOfCode
+
+ .long 0 # SizeOfInitializedData
+ .long 0 # SizeOfUninitializedData
+
+ # Filled in by build.c
+ .long 0x0000 # AddressOfEntryPoint
+
+ .long 0x0000 # BaseOfCode
+#ifdef CONFIG_X86_32
+ .long 0 # data
+#endif
+
+extra_header_fields:
+#ifdef CONFIG_X86_32
+ .long 0 # ImageBase
+#else
+ .quad 0 # ImageBase
+#endif
+ .long 0x1000 # SectionAlignment
+ .long 0x200 # FileAlignment
+ .word 0 # MajorOperatingSystemVersion
+ .word 0 # MinorOperatingSystemVersion
+ .word 0 # MajorImageVersion
+ .word 0 # MinorImageVersion
+ .word 0 # MajorSubsystemVersion
+ .word 0 # MinorSubsystemVersion
+ .long 0 # Win32VersionValue
+
+ #
+ # The size of the bzImage is written in tools/build.c
+ #
+ .long 0 # SizeOfImage
+
+ .long 0x200 # SizeOfHeaders
+ .long 0 # CheckSum
+ .word 0xa # Subsystem (EFI application)
+ .word 0 # DllCharacteristics
+#ifdef CONFIG_X86_32
+ .long 0 # SizeOfStackReserve
+ .long 0 # SizeOfStackCommit
+ .long 0 # SizeOfHeapReserve
+ .long 0 # SizeOfHeapCommit
+#else
+ .quad 0 # SizeOfStackReserve
+ .quad 0 # SizeOfStackCommit
+ .quad 0 # SizeOfHeapReserve
+ .quad 0 # SizeOfHeapCommit
+#endif
+ .long 0 # LoaderFlags
+ .long 0x1 # NumberOfRvaAndSizes
+
+ .quad 0 # ExportTable
+ .quad 0 # ImportTable
+ .quad 0 # ResourceTable
+ .quad 0 # ExceptionTable
+ .quad 0 # CertificationTable
+ .quad 0 # BaseRelocationTable
+
+ # Section table
+section_table:
+ .ascii ".text"
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x60500020 # Characteristics (section flags)
+
+ #
+ # The EFI application loader requires a relocation section
+ # because EFI applications are relocatable and not having
+ # this section seems to confuse it. But since we don't need
+ # the loader to fixup any relocs for us just fill it with a
+ # single dummy reloc.
+ #
+ .ascii ".reloc"
+ .byte 0
+ .byte 0
+ .long reloc_end - reloc_start
+ .long reloc_start
+ .long reloc_end - reloc_start # SizeOfRawData
+ .long reloc_start # PointerToRawData
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x42100040 # Characteristics (section flags)
+#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
# header, from the old boot sector.
@@ -318,3 +466,13 @@ die:
setup_corrupt:
.byte 7
.string "No setup signature found...\n"
+
+ .data
+dummy: .long 0
+
+ .section .reloc
+reloc_start:
+ .long dummy - reloc_start
+ .long 10
+ .word 0
+reloc_end:
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 3cbc405..574dedf 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
return result;
}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index fdc60a0..ed54976 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -29,18 +29,18 @@
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/sysmacros.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
-#include <asm/boot.h>
+#include <tools/le_byteshift.h>
typedef unsigned char u8;
typedef unsigned short u16;
-typedef unsigned long u32;
+typedef unsigned int u32;
#define DEFAULT_MAJOR_ROOT 0
#define DEFAULT_MINOR_ROOT 0
+#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
/* Minimal number of setup sectors */
#define SETUP_SECT_MIN 5
@@ -135,6 +135,9 @@ static void usage(void)
int main(int argc, char ** argv)
{
+#ifdef CONFIG_EFI_STUB
+ unsigned int file_sz, pe_header;
+#endif
unsigned int i, sz, setup_sectors;
int c;
u32 sys_size;
@@ -156,7 +159,7 @@ int main(int argc, char ** argv)
die("read-error on `setup'");
if (c < 1024)
die("The setup must be at least 1024 bytes");
- if (buf[510] != 0x55 || buf[511] != 0xaa)
+ if (get_unaligned_le16(&buf[510]) != 0xAA55)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
@@ -168,8 +171,7 @@ int main(int argc, char ** argv)
memset(buf+c, 0, i-c);
/* Set the default root device */
- buf[508] = DEFAULT_MINOR_ROOT;
- buf[509] = DEFAULT_MAJOR_ROOT;
+ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
@@ -189,10 +191,44 @@ int main(int argc, char ** argv)
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
- buf[0x1f4] = sys_size;
- buf[0x1f5] = sys_size >> 8;
- buf[0x1f6] = sys_size >> 16;
- buf[0x1f7] = sys_size >> 24;
+ put_unaligned_le32(sys_size, &buf[0x1f4]);
+
+#ifdef CONFIG_EFI_STUB
+ file_sz = sz + i + ((sys_size * 16) - sz);
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+
+ /* Size of code */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
+
+ /* Size of image */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
+
+#ifdef CONFIG_X86_32
+ /* Address of entry point */
+ put_unaligned_le32(i, &buf[pe_header + 0x28]);
+
+ /* .text size */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
+
+ /* .text size of initialised data */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
+#else
+ /*
+ * Address of entry point. startup_32 is at the beginning and
+ * the 64-bit entry point (startup_64) is always 512 bytes
+ * after.
+ */
+ put_unaligned_le32(i + 512, &buf[pe_header + 0x28]);
+
+ /* .text size */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
+
+ /* .text size of initialised data */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
+
+#endif /* CONFIG_X86_32 */
+#endif /* CONFIG_EFI_STUB */
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, stdout) != i)
@@ -211,8 +247,9 @@ int main(int argc, char ** argv)
}
/* Write the CRC */
- fprintf(stderr, "CRC %lx\n", crc);
- if (fwrite(&crc, 1, 4, stdout) != 4)
+ fprintf(stderr, "CRC %x\n", crc);
+ put_unaligned_le32(crc, buf);
+ if (fwrite(buf, 1, 4, stdout) != 4)
die("Writing CRC failed");
close(fd);
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2b0b963..e191ac0 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce..c799352 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,7 @@
#include <crypto/aes.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
+#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/aes.h>
#include <crypto/scatterwalk.h>
@@ -1107,12 +1108,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
- src = scatterwalk_map(&src_sg_walk, 0);
- assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ src = scatterwalk_map(&src_sg_walk);
+ assoc = scatterwalk_map(&assoc_sg_walk);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk, 0);
+ dst = scatterwalk_map(&dst_sg_walk);
}
} else {
@@ -1136,11 +1137,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
* back to the packet. */
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst, 0);
+ scatterwalk_unmap(dst);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
- scatterwalk_unmap(src, 0);
- scatterwalk_unmap(assoc, 0);
+ scatterwalk_unmap(src);
+ scatterwalk_unmap(assoc);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
@@ -1189,12 +1190,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
- src = scatterwalk_map(&src_sg_walk, 0);
- assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ src = scatterwalk_map(&src_sg_walk);
+ assoc = scatterwalk_map(&assoc_sg_walk);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk, 0);
+ dst = scatterwalk_map(&dst_sg_walk);
}
} else {
@@ -1219,11 +1220,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst, 0);
+ scatterwalk_unmap(dst);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
- scatterwalk_unmap(src, 0);
- scatterwalk_unmap(assoc, 0);
+ scatterwalk_unmap(src);
+ scatterwalk_unmap(assoc);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
@@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = {
};
#endif
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
static int __init aesni_init(void)
{
int err;
- if (!cpu_has_aes) {
- printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
+ if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
- }
if ((err = crypto_fpu_init()))
goto fpu_err;
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index b05aa16..7967474 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,6 +25,7 @@
*
*/
+#include <asm/processor.h>
#include <crypto/blowfish.h>
#include <linux/crypto.h>
#include <linux/init.h>
@@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
-static struct crypto_alg bf_alg = {
- .cra_name = "blowfish",
- .cra_driver_name = "blowfish-asm",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 3,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(bf_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = BF_MIN_KEY_SIZE,
- .cia_max_keysize = BF_MAX_KEY_SIZE,
- .cia_setkey = blowfish_setkey,
- .cia_encrypt = blowfish_encrypt,
- .cia_decrypt = blowfish_decrypt,
- }
- }
-};
-
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
@@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "ecb(blowfish)",
- .cra_driver_name = "ecb-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "cbc(blowfish)",
- .cra_driver_name = "cbc-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
@@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
+static struct crypto_alg bf_algs[4] = { {
+ .cra_name = "blowfish",
+ .cra_driver_name = "blowfish-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = BF_MIN_KEY_SIZE,
+ .cia_max_keysize = BF_MAX_KEY_SIZE,
+ .cia_setkey = blowfish_setkey,
+ .cia_encrypt = blowfish_encrypt,
+ .cia_decrypt = blowfish_decrypt,
+ }
+ }
+}, {
+ .cra_name = "ecb(blowfish)",
+ .cra_driver_name = "ecb-blowfish-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .setkey = blowfish_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(blowfish)",
+ .cra_driver_name = "cbc-blowfish-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .ivsize = BF_BLOCK_SIZE,
+ .setkey = blowfish_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-asm",
.cra_priority = 300,
@@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+ .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
@@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = {
.decrypt = ctr_crypt,
},
},
-};
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, blowfish-x86_64 is slower than generic C
+ * implementation because use of 64bit rotates (which are really
+ * slow on P4). Therefore blacklist P4s.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
- int err;
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "blowfish-x86_64: performance on this CPU "
+ "would be suboptimal: disabling "
+ "blowfish-x86_64.\n");
+ return -ENODEV;
+ }
- err = crypto_register_alg(&bf_alg);
- if (err)
- goto bf_err;
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto ctr_err;
-
- return 0;
-
-ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
- crypto_unregister_alg(&bf_alg);
-bf_err:
- return err;
+ return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
- crypto_unregister_alg(&bf_alg);
+ crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
new file mode 100644
index 0000000..0b33743
--- /dev/null
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -0,0 +1,520 @@
+/*
+ * Camellia Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+.file "camellia-x86_64-asm_64.S"
+.text
+
+.extern camellia_sp10011110;
+.extern camellia_sp22000222;
+.extern camellia_sp03303033;
+.extern camellia_sp00444404;
+.extern camellia_sp02220222;
+.extern camellia_sp30333033;
+.extern camellia_sp44044404;
+.extern camellia_sp11101110;
+
+#define sp10011110 camellia_sp10011110
+#define sp22000222 camellia_sp22000222
+#define sp03303033 camellia_sp03303033
+#define sp00444404 camellia_sp00444404
+#define sp02220222 camellia_sp02220222
+#define sp30333033 camellia_sp30333033
+#define sp44044404 camellia_sp44044404
+#define sp11101110 camellia_sp11101110
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct camellia_ctx: */
+#define key_table 0
+#define key_length CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RIOd %esi
+
+#define RAB0 %rax
+#define RCD0 %rcx
+#define RAB1 %rbx
+#define RCD1 %rdx
+
+#define RAB0d %eax
+#define RCD0d %ecx
+#define RAB1d %ebx
+#define RCD1d %edx
+
+#define RAB0bl %al
+#define RCD0bl %cl
+#define RAB1bl %bl
+#define RCD1bl %dl
+
+#define RAB0bh %ah
+#define RCD0bh %ch
+#define RAB1bh %bh
+#define RCD1bh %dh
+
+#define RT0 %rsi
+#define RT1 %rbp
+#define RT2 %r8
+
+#define RT0d %esi
+#define RT1d %ebp
+#define RT2d %r8d
+
+#define RT2bl %r8b
+
+#define RXOR %r9
+#define RRBP %r10
+#define RDST %r11
+
+#define RXORd %r9d
+#define RXORbl %r9b
+
+#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
+ movzbl ab ## bl, tmp2 ## d; \
+ movzbl ab ## bh, tmp1 ## d; \
+ rorq $16, ab; \
+ xorq T0(, tmp2, 8), dst; \
+ xorq T1(, tmp1, 8), dst;
+
+/**********************************************************************
+ 1-way camellia
+ **********************************************************************/
+#define roundsm(ab, subkey, cd) \
+ movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+ \
+ xorq RT2, cd ## 0;
+
+#define fls(l, r, kl, kr) \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
+ andl l ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, l ## 0; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
+ orq r ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, r ## 0; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
+ orq l ## 0, RT2; \
+ shrq $32, RT2; \
+ xorq RT2, l ## 0; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
+ andl r ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, r ## 0;
+
+#define enc_rounds(i) \
+ roundsm(RAB, i + 2, RCD); \
+ roundsm(RCD, i + 3, RAB); \
+ roundsm(RAB, i + 4, RCD); \
+ roundsm(RCD, i + 5, RAB); \
+ roundsm(RAB, i + 6, RCD); \
+ roundsm(RCD, i + 7, RAB);
+
+#define enc_fls(i) \
+ fls(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack() \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rolq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rorq $32, RCD0; \
+ xorq key_table(CTX), RAB0;
+
+#define enc_outunpack(op, max) \
+ xorq key_table(CTX, max, 8), RCD0; \
+ rorq $32, RCD0; \
+ bswapq RCD0; \
+ op ## q RCD0, (RIO); \
+ rolq $32, RAB0; \
+ bswapq RAB0; \
+ op ## q RAB0, 4*2(RIO);
+
+#define dec_rounds(i) \
+ roundsm(RAB, i + 7, RCD); \
+ roundsm(RCD, i + 6, RAB); \
+ roundsm(RAB, i + 5, RCD); \
+ roundsm(RCD, i + 4, RAB); \
+ roundsm(RAB, i + 3, RCD); \
+ roundsm(RCD, i + 2, RAB);
+
+#define dec_fls(i) \
+ fls(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack(max) \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rolq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rorq $32, RCD0; \
+ xorq key_table(CTX, max, 8), RAB0;
+
+#define dec_outunpack() \
+ xorq key_table(CTX), RCD0; \
+ rorq $32, RCD0; \
+ bswapq RCD0; \
+ movq RCD0, (RIO); \
+ rolq $32, RAB0; \
+ bswapq RAB0; \
+ movq RAB0, 4*2(RIO);
+
+.global __camellia_enc_blk;
+.type __camellia_enc_blk,@function;
+
+__camellia_enc_blk:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool xor
+ */
+ movq %rbp, RRBP;
+
+ movq %rcx, RXOR;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ enc_inpack();
+
+ enc_rounds(0);
+ enc_fls(8);
+ enc_rounds(8);
+ enc_fls(16);
+ enc_rounds(16);
+ movl $24, RT1d; /* max */
+
+ cmpb $16, key_length(CTX);
+ je __enc_done;
+
+ enc_fls(24);
+ enc_rounds(24);
+ movl $32, RT1d; /* max */
+
+__enc_done:
+ testb RXORbl, RXORbl;
+ movq RDST, RIO;
+
+ jnz __enc_xor;
+
+ enc_outunpack(mov, RT1);
+
+ movq RRBP, %rbp;
+ ret;
+
+__enc_xor:
+ enc_outunpack(xor, RT1);
+
+ movq RRBP, %rbp;
+ ret;
+
+.global camellia_dec_blk;
+.type camellia_dec_blk,@function;
+
+camellia_dec_blk:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ cmpl $16, key_length(CTX);
+ movl $32, RT2d;
+ movl $24, RXORd;
+ cmovel RXORd, RT2d; /* max */
+
+ movq %rbp, RRBP;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ dec_inpack(RT2);
+
+ cmpb $24, RT2bl;
+ je __dec_rounds16;
+
+ dec_rounds(24);
+ dec_fls(24);
+
+__dec_rounds16:
+ dec_rounds(16);
+ dec_fls(16);
+ dec_rounds(8);
+ dec_fls(8);
+ dec_rounds(0);
+
+ movq RDST, RIO;
+
+ dec_outunpack();
+
+ movq RRBP, %rbp;
+ ret;
+
+/**********************************************************************
+ 2-way camellia
+ **********************************************************************/
+#define roundsm2(ab, subkey, cd) \
+ movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
+ xorq RT2, cd ## 1; \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
+ xorq RT2, cd ## 0; \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
+
+#define fls2(l, r, kl, kr) \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
+ andl l ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, l ## 0; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
+ orq r ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, r ## 0; \
+ \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
+ andl l ## 1d, RT2d; \
+ roll $1, RT2d; \
+ shlq $32, RT2; \
+ xorq RT2, l ## 1; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
+ orq r ## 1, RT0; \
+ shrq $32, RT0; \
+ xorq RT0, r ## 1; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
+ orq l ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, l ## 0; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
+ andl r ## 0d, RT2d; \
+ roll $1, RT2d; \
+ shlq $32, RT2; \
+ xorq RT2, r ## 0; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
+ orq l ## 1, RT0; \
+ shrq $32, RT0; \
+ xorq RT0, l ## 1; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
+ andl r ## 1d, RT1d; \
+ roll $1, RT1d; \
+ shlq $32, RT1; \
+ xorq RT1, r ## 1;
+
+#define enc_rounds2(i) \
+ roundsm2(RAB, i + 2, RCD); \
+ roundsm2(RCD, i + 3, RAB); \
+ roundsm2(RAB, i + 4, RCD); \
+ roundsm2(RCD, i + 5, RAB); \
+ roundsm2(RAB, i + 6, RCD); \
+ roundsm2(RCD, i + 7, RAB);
+
+#define enc_fls2(i) \
+ fls2(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack2() \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rorq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rolq $32, RCD0; \
+ xorq key_table(CTX), RAB0; \
+ \
+ movq 8*2(RIO), RAB1; \
+ bswapq RAB1; \
+ rorq $32, RAB1; \
+ movq 12*2(RIO), RCD1; \
+ bswapq RCD1; \
+ rolq $32, RCD1; \
+ xorq key_table(CTX), RAB1;
+
+#define enc_outunpack2(op, max) \
+ xorq key_table(CTX, max, 8), RCD0; \
+ rolq $32, RCD0; \
+ bswapq RCD0; \
+ op ## q RCD0, (RIO); \
+ rorq $32, RAB0; \
+ bswapq RAB0; \
+ op ## q RAB0, 4*2(RIO); \
+ \
+ xorq key_table(CTX, max, 8), RCD1; \
+ rolq $32, RCD1; \
+ bswapq RCD1; \
+ op ## q RCD1, 8*2(RIO); \
+ rorq $32, RAB1; \
+ bswapq RAB1; \
+ op ## q RAB1, 12*2(RIO);
+
+#define dec_rounds2(i) \
+ roundsm2(RAB, i + 7, RCD); \
+ roundsm2(RCD, i + 6, RAB); \
+ roundsm2(RAB, i + 5, RCD); \
+ roundsm2(RCD, i + 4, RAB); \
+ roundsm2(RAB, i + 3, RCD); \
+ roundsm2(RCD, i + 2, RAB);
+
+#define dec_fls2(i) \
+ fls2(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack2(max) \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rorq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rolq $32, RCD0; \
+ xorq key_table(CTX, max, 8), RAB0; \
+ \
+ movq 8*2(RIO), RAB1; \
+ bswapq RAB1; \
+ rorq $32, RAB1; \
+ movq 12*2(RIO), RCD1; \
+ bswapq RCD1; \
+ rolq $32, RCD1; \
+ xorq key_table(CTX, max, 8), RAB1;
+
+#define dec_outunpack2() \
+ xorq key_table(CTX), RCD0; \
+ rolq $32, RCD0; \
+ bswapq RCD0; \
+ movq RCD0, (RIO); \
+ rorq $32, RAB0; \
+ bswapq RAB0; \
+ movq RAB0, 4*2(RIO); \
+ \
+ xorq key_table(CTX), RCD1; \
+ rolq $32, RCD1; \
+ bswapq RCD1; \
+ movq RCD1, 8*2(RIO); \
+ rorq $32, RAB1; \
+ bswapq RAB1; \
+ movq RAB1, 12*2(RIO);
+
+.global __camellia_enc_blk_2way;
+.type __camellia_enc_blk_2way,@function;
+
+__camellia_enc_blk_2way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool xor
+ */
+ pushq %rbx;
+
+ movq %rbp, RRBP;
+ movq %rcx, RXOR;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ enc_inpack2();
+
+ enc_rounds2(0);
+ enc_fls2(8);
+ enc_rounds2(8);
+ enc_fls2(16);
+ enc_rounds2(16);
+ movl $24, RT2d; /* max */
+
+ cmpb $16, key_length(CTX);
+ je __enc2_done;
+
+ enc_fls2(24);
+ enc_rounds2(24);
+ movl $32, RT2d; /* max */
+
+__enc2_done:
+ test RXORbl, RXORbl;
+ movq RDST, RIO;
+ jnz __enc2_xor;
+
+ enc_outunpack2(mov, RT2);
+
+ movq RRBP, %rbp;
+ popq %rbx;
+ ret;
+
+__enc2_xor:
+ enc_outunpack2(xor, RT2);
+
+ movq RRBP, %rbp;
+ popq %rbx;
+ ret;
+
+.global camellia_dec_blk_2way;
+.type camellia_dec_blk_2way,@function;
+
+camellia_dec_blk_2way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ cmpl $16, key_length(CTX);
+ movl $32, RT2d;
+ movl $24, RXORd;
+ cmovel RXORd, RT2d; /* max */
+
+ movq %rbx, RXOR;
+ movq %rbp, RRBP;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ dec_inpack2(RT2);
+
+ cmpb $24, RT2bl;
+ je __dec2_rounds16;
+
+ dec_rounds2(24);
+ dec_fls2(24);
+
+__dec2_rounds16:
+ dec_rounds2(16);
+ dec_fls2(16);
+ dec_rounds2(8);
+ dec_fls2(8);
+ dec_rounds2(0);
+
+ movq RDST, RIO;
+
+ dec_outunpack2();
+
+ movq RRBP, %rbp;
+ movq RXOR, %rbx;
+ ret;
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
new file mode 100644
index 0000000..1ca36a9
--- /dev/null
+++ b/arch/x86/crypto/camellia_glue.c
@@ -0,0 +1,1952 @@
+/*
+ * Glue Code for assembler optimized version of Camellia
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Camellia parts based on code by:
+ * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+
+#define CAMELLIA_MIN_KEY_SIZE 16
+#define CAMELLIA_MAX_KEY_SIZE 32
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+struct camellia_ctx {
+ u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ u32 key_length;
+};
+
+/* regular block cipher functions */
+asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+/* 2-way parallel cipher functions */
+asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, true);
+}
+
+static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, true);
+}
+
+static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+/* camellia sboxes */
+const u64 camellia_sp10011110[256] = {
+ 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
+ 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
+ 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
+ 0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
+ 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
+ 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
+ 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
+ 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
+ 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
+ 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
+ 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
+ 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
+ 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
+ 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
+ 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
+ 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
+ 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
+ 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
+ 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
+ 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
+ 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
+ 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
+ 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
+ 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
+ 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
+ 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
+ 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
+ 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
+ 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
+ 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
+ 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
+ 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
+ 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
+ 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
+ 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
+ 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
+ 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
+ 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
+ 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
+ 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
+ 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
+ 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
+ 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
+ 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
+ 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
+ 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
+ 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
+ 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
+ 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
+ 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
+ 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
+ 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
+ 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
+ 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
+ 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
+ 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
+ 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
+ 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
+ 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
+ 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
+ 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
+ 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
+ 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
+ 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
+ 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
+ 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
+ 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
+ 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
+ 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
+ 0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
+ 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
+ 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
+ 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
+ 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
+ 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
+ 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
+ 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
+ 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
+ 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
+ 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
+ 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
+ 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
+ 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
+ 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
+ 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
+ 0x9e00009e9e9e9e00,
+};
+
+const u64 camellia_sp22000222[256] = {
+ 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
+ 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
+ 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
+ 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
+ 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
+ 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
+ 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
+ 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
+ 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
+ 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
+ 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
+ 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
+ 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
+ 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
+ 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
+ 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
+ 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
+ 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
+ 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
+ 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
+ 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
+ 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
+ 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
+ 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
+ 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
+ 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
+ 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
+ 0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
+ 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
+ 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
+ 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
+ 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
+ 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
+ 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
+ 0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
+ 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
+ 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
+ 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
+ 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
+ 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
+ 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
+ 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
+ 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
+ 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
+ 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
+ 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
+ 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
+ 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
+ 0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
+ 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
+ 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
+ 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
+ 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
+ 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
+ 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
+ 0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
+ 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
+ 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
+ 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
+ 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
+ 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
+ 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
+ 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
+ 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
+ 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
+ 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
+ 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
+ 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
+ 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
+ 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
+ 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
+ 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
+ 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
+ 0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
+ 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
+ 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
+ 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
+ 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
+ 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
+ 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
+ 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
+ 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
+ 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
+ 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
+ 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
+ 0x3d3d0000003d3d3d,
+};
+
+const u64 camellia_sp03303033[256] = {
+ 0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
+ 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
+ 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
+ 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
+ 0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
+ 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
+ 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
+ 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
+ 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
+ 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
+ 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
+ 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
+ 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
+ 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
+ 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
+ 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
+ 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
+ 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
+ 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
+ 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
+ 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
+ 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
+ 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
+ 0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
+ 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
+ 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
+ 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
+ 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
+ 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
+ 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
+ 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
+ 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
+ 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
+ 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
+ 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
+ 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
+ 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
+ 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
+ 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
+ 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
+ 0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
+ 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
+ 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
+ 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
+ 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
+ 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
+ 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
+ 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
+ 0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
+ 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
+ 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
+ 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
+ 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
+ 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
+ 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
+ 0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
+ 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
+ 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
+ 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
+ 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
+ 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
+ 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
+ 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
+ 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
+ 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
+ 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
+ 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
+ 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
+ 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
+ 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
+ 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
+ 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
+ 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
+ 0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
+ 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
+ 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
+ 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
+ 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
+ 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
+ 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
+ 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
+ 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
+ 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
+ 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
+ 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
+ 0x004f4f004f004f4f,
+};
+
+const u64 camellia_sp00444404[256] = {
+ 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
+ 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
+ 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
+ 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
+ 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
+ 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
+ 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
+ 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
+ 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
+ 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
+ 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
+ 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
+ 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
+ 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
+ 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
+ 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
+ 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
+ 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
+ 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
+ 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
+ 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
+ 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
+ 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
+ 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
+ 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
+ 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
+ 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
+ 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
+ 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
+ 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
+ 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
+ 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
+ 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
+ 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
+ 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
+ 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
+ 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
+ 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
+ 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
+ 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
+ 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
+ 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
+ 0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
+ 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
+ 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
+ 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
+ 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
+ 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
+ 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
+ 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
+ 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
+ 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
+ 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
+ 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
+ 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
+ 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
+ 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
+ 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
+ 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
+ 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
+ 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
+ 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
+ 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
+ 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
+ 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
+ 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
+ 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
+ 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
+ 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
+ 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
+ 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
+ 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
+ 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
+ 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
+ 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
+ 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
+ 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
+ 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
+ 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
+ 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
+ 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
+ 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
+ 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
+ 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
+ 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
+ 0x00009e9e9e9e009e,
+};
+
+const u64 camellia_sp02220222[256] = {
+ 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
+ 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
+ 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
+ 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
+ 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
+ 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
+ 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
+ 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
+ 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
+ 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
+ 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
+ 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
+ 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
+ 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
+ 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
+ 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
+ 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
+ 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
+ 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
+ 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
+ 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
+ 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
+ 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
+ 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
+ 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
+ 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
+ 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
+ 0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
+ 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
+ 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
+ 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
+ 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
+ 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
+ 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
+ 0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
+ 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
+ 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
+ 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
+ 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
+ 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
+ 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
+ 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
+ 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
+ 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
+ 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
+ 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
+ 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
+ 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
+ 0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
+ 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
+ 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
+ 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
+ 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
+ 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
+ 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
+ 0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
+ 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
+ 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
+ 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
+ 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
+ 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
+ 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
+ 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
+ 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
+ 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
+ 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
+ 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
+ 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
+ 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
+ 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
+ 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
+ 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
+ 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
+ 0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
+ 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
+ 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
+ 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
+ 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
+ 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
+ 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
+ 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
+ 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
+ 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
+ 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
+ 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
+ 0x003d3d3d003d3d3d,
+};
+
+const u64 camellia_sp30333033[256] = {
+ 0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
+ 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
+ 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
+ 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
+ 0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
+ 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
+ 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
+ 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
+ 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
+ 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
+ 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
+ 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
+ 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
+ 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
+ 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
+ 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
+ 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
+ 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
+ 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
+ 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
+ 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
+ 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
+ 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
+ 0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
+ 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
+ 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
+ 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
+ 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
+ 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
+ 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
+ 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
+ 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
+ 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
+ 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
+ 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
+ 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
+ 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
+ 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
+ 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
+ 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
+ 0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
+ 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
+ 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
+ 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
+ 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
+ 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
+ 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
+ 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
+ 0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
+ 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
+ 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
+ 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
+ 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
+ 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
+ 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
+ 0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
+ 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
+ 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
+ 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
+ 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
+ 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
+ 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
+ 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
+ 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
+ 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
+ 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
+ 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
+ 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
+ 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
+ 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
+ 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
+ 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
+ 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
+ 0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
+ 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
+ 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
+ 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
+ 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
+ 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
+ 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
+ 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
+ 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
+ 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
+ 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
+ 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
+ 0x4f004f4f4f004f4f,
+};
+
+const u64 camellia_sp44044404[256] = {
+ 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
+ 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
+ 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
+ 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
+ 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
+ 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
+ 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
+ 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
+ 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
+ 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
+ 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
+ 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
+ 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
+ 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
+ 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
+ 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
+ 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
+ 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
+ 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
+ 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
+ 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
+ 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
+ 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
+ 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
+ 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
+ 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
+ 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
+ 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
+ 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
+ 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
+ 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
+ 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
+ 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
+ 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
+ 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
+ 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
+ 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
+ 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
+ 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
+ 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
+ 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
+ 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
+ 0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
+ 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
+ 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
+ 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
+ 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
+ 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
+ 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
+ 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
+ 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
+ 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
+ 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
+ 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
+ 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
+ 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
+ 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
+ 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
+ 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
+ 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
+ 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
+ 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
+ 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
+ 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
+ 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
+ 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
+ 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
+ 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
+ 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
+ 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
+ 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
+ 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
+ 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
+ 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
+ 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
+ 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
+ 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
+ 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
+ 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
+ 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
+ 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
+ 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
+ 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
+ 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
+ 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
+ 0x9e9e009e9e9e009e,
+};
+
+const u64 camellia_sp11101110[256] = {
+ 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
+ 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
+ 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
+ 0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
+ 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
+ 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
+ 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
+ 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
+ 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
+ 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
+ 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
+ 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
+ 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
+ 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
+ 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
+ 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
+ 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
+ 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
+ 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
+ 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
+ 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
+ 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
+ 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
+ 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
+ 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
+ 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
+ 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
+ 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
+ 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
+ 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
+ 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
+ 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
+ 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
+ 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
+ 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
+ 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
+ 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
+ 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
+ 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
+ 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
+ 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
+ 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
+ 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
+ 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
+ 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
+ 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
+ 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
+ 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
+ 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
+ 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
+ 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
+ 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
+ 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
+ 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
+ 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
+ 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
+ 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
+ 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
+ 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
+ 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
+ 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
+ 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
+ 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
+ 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
+ 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
+ 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
+ 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
+ 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
+ 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
+ 0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
+ 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
+ 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
+ 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
+ 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
+ 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
+ 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
+ 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
+ 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
+ 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
+ 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
+ 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
+ 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
+ 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
+ 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
+ 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
+ 0x9e9e9e009e9e9e00,
+};
+
+/* key constants */
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/* macros */
+#define ROLDQ(l, r, bits) ({ \
+ u64 t = l; \
+ l = (l << bits) | (r >> (64 - bits)); \
+ r = (r << bits) | (t >> (64 - bits)); \
+})
+
+#define CAMELLIA_F(x, kl, kr, y) ({ \
+ u64 ii = x ^ (((u64)kl << 32) | kr); \
+ y = camellia_sp11101110[(uint8_t)ii]; \
+ y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp30333033[(uint8_t)ii]; \
+ y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp00444404[(uint8_t)ii]; \
+ y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp22000222[(uint8_t)ii]; \
+ y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \
+ y = ror64(y, 32); \
+})
+
+#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
+
+static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
+{
+ u64 kw4, tt;
+ u32 dw, tl, tr;
+
+ /* absorb kw2 to other subkeys */
+ /* round 2 */
+ subRL[3] ^= subRL[1];
+ /* round 4 */
+ subRL[5] ^= subRL[1];
+ /* round 6 */
+ subRL[7] ^= subRL[1];
+
+ subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
+ /* modified for FLinv(kl2) */
+ dw = (subRL[1] & subRL[9]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 8 */
+ subRL[11] ^= subRL[1];
+ /* round 10 */
+ subRL[13] ^= subRL[1];
+ /* round 12 */
+ subRL[15] ^= subRL[1];
+
+ subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
+ /* modified for FLinv(kl4) */
+ dw = (subRL[1] & subRL[17]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 14 */
+ subRL[19] ^= subRL[1];
+ /* round 16 */
+ subRL[21] ^= subRL[1];
+ /* round 18 */
+ subRL[23] ^= subRL[1];
+
+ if (max == 24) {
+ /* kw3 */
+ subRL[24] ^= subRL[1];
+
+ /* absorb kw4 to other subkeys */
+ kw4 = subRL[25];
+ } else {
+ subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
+ /* modified for FLinv(kl6) */
+ dw = (subRL[1] & subRL[25]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 20 */
+ subRL[27] ^= subRL[1];
+ /* round 22 */
+ subRL[29] ^= subRL[1];
+ /* round 24 */
+ subRL[31] ^= subRL[1];
+ /* kw3 */
+ subRL[32] ^= subRL[1];
+
+ /* absorb kw4 to other subkeys */
+ kw4 = subRL[33];
+ /* round 23 */
+ subRL[30] ^= kw4;
+ /* round 21 */
+ subRL[28] ^= kw4;
+ /* round 19 */
+ subRL[26] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[24]) << 32;
+ /* modified for FL(kl5) */
+ dw = (kw4 & subRL[24]) >> 32,
+ kw4 ^= rol32(dw, 1);
+ }
+
+ /* round 17 */
+ subRL[22] ^= kw4;
+ /* round 15 */
+ subRL[20] ^= kw4;
+ /* round 13 */
+ subRL[18] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[16]) << 32;
+ /* modified for FL(kl3) */
+ dw = (kw4 & subRL[16]) >> 32,
+ kw4 ^= rol32(dw, 1);
+
+ /* round 11 */
+ subRL[14] ^= kw4;
+ /* round 9 */
+ subRL[12] ^= kw4;
+ /* round 7 */
+ subRL[10] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[8]) << 32;
+ /* modified for FL(kl1) */
+ dw = (kw4 & subRL[8]) >> 32,
+ kw4 ^= rol32(dw, 1);
+
+ /* round 5 */
+ subRL[6] ^= kw4;
+ /* round 3 */
+ subRL[4] ^= kw4;
+ /* round 1 */
+ subRL[2] ^= kw4;
+ /* kw1 */
+ subRL[0] ^= kw4;
+
+ /* key XOR is end of F-function */
+ SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */
+ SET_SUBKEY_LR(2, subRL[3]); /* round 1 */
+ SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */
+ SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */
+ SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */
+ SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */
+
+ tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
+ dw = tl & (subRL[8] >> 32), /* FL(kl1) */
+ tr = subRL[10] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */
+ SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */
+ SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */
+
+ tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
+ dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */
+ tr = subRL[7] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */
+ SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */
+ SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */
+ SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */
+ SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */
+
+ tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
+ dw = tl & (subRL[16] >> 32), /* FL(kl3) */
+ tr = subRL[18] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */
+ SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */
+ SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */
+
+ tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
+ dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */
+ tr = subRL[15] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */
+ SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */
+ SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */
+ SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */
+ SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */
+
+ if (max == 24) {
+ SET_SUBKEY_LR(23, subRL[22]); /* round 18 */
+ SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */
+ } else {
+ tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
+ dw = tl & (subRL[24] >> 32), /* FL(kl5) */
+ tr = subRL[26] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */
+ SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */
+ SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */
+
+ tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
+ dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */
+ tr = subRL[23] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */
+ SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */
+ SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */
+ SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */
+ SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */
+ SET_SUBKEY_LR(31, subRL[30]); /* round 24 */
+ SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */
+ }
+}
+
+static void camellia_setup128(const unsigned char *key, u64 *subkey)
+{
+ u64 kl, kr, ww;
+ u64 subRL[26];
+
+ /**
+ * k == kl || kr (|| is concatenation)
+ */
+ kl = get_unaligned_be64(key);
+ kr = get_unaligned_be64(key + 8);
+
+ /* generate KL dependent subkeys */
+ /* kw1 */
+ subRL[0] = kl;
+ /* kw2 */
+ subRL[1] = kr;
+
+ /* rotation left shift 15bit */
+ ROLDQ(kl, kr, 15);
+
+ /* k3 */
+ subRL[4] = kl;
+ /* k4 */
+ subRL[5] = kr;
+
+ /* rotation left shift 15+30bit */
+ ROLDQ(kl, kr, 30);
+
+ /* k7 */
+ subRL[10] = kl;
+ /* k8 */
+ subRL[11] = kr;
+
+ /* rotation left shift 15+30+15bit */
+ ROLDQ(kl, kr, 15);
+
+ /* k10 */
+ subRL[13] = kr;
+ /* rotation left shift 15+30+15+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* kl3 */
+ subRL[16] = kl;
+ /* kl4 */
+ subRL[17] = kr;
+
+ /* rotation left shift 15+30+15+17+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* k13 */
+ subRL[18] = kl;
+ /* k14 */
+ subRL[19] = kr;
+
+ /* rotation left shift 15+30+15+17+17+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* k17 */
+ subRL[22] = kl;
+ /* k18 */
+ subRL[23] = kr;
+
+ /* generate KA */
+ kl = subRL[0];
+ kr = subRL[1];
+ CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+
+ /* current status == (kll, klr, w0, w1) */
+ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+ kl ^= ww;
+
+ /* generate KA dependent subkeys */
+ /* k1, k2 */
+ subRL[2] = kl;
+ subRL[3] = kr;
+ ROLDQ(kl, kr, 15);
+ /* k5,k6 */
+ subRL[6] = kl;
+ subRL[7] = kr;
+ ROLDQ(kl, kr, 15);
+ /* kl1, kl2 */
+ subRL[8] = kl;
+ subRL[9] = kr;
+ ROLDQ(kl, kr, 15);
+ /* k9 */
+ subRL[12] = kl;
+ ROLDQ(kl, kr, 15);
+ /* k11, k12 */
+ subRL[14] = kl;
+ subRL[15] = kr;
+ ROLDQ(kl, kr, 34);
+ /* k15, k16 */
+ subRL[20] = kl;
+ subRL[21] = kr;
+ ROLDQ(kl, kr, 17);
+ /* kw3, kw4 */
+ subRL[24] = kl;
+ subRL[25] = kr;
+
+ camellia_setup_tail(subkey, subRL, 24);
+}
+
+static void camellia_setup256(const unsigned char *key, u64 *subkey)
+{
+ u64 kl, kr; /* left half of key */
+ u64 krl, krr; /* right half of key */
+ u64 ww; /* temporary variables */
+ u64 subRL[34];
+
+ /**
+ * key = (kl || kr || krl || krr) (|| is concatenation)
+ */
+ kl = get_unaligned_be64(key);
+ kr = get_unaligned_be64(key + 8);
+ krl = get_unaligned_be64(key + 16);
+ krr = get_unaligned_be64(key + 24);
+
+ /* generate KL dependent subkeys */
+ /* kw1 */
+ subRL[0] = kl;
+ /* kw2 */
+ subRL[1] = kr;
+ ROLDQ(kl, kr, 45);
+ /* k9 */
+ subRL[12] = kl;
+ /* k10 */
+ subRL[13] = kr;
+ ROLDQ(kl, kr, 15);
+ /* kl3 */
+ subRL[16] = kl;
+ /* kl4 */
+ subRL[17] = kr;
+ ROLDQ(kl, kr, 17);
+ /* k17 */
+ subRL[22] = kl;
+ /* k18 */
+ subRL[23] = kr;
+ ROLDQ(kl, kr, 34);
+ /* k23 */
+ subRL[30] = kl;
+ /* k24 */
+ subRL[31] = kr;
+
+ /* generate KR dependent subkeys */
+ ROLDQ(krl, krr, 15);
+ /* k3 */
+ subRL[4] = krl;
+ /* k4 */
+ subRL[5] = krr;
+ ROLDQ(krl, krr, 15);
+ /* kl1 */
+ subRL[8] = krl;
+ /* kl2 */
+ subRL[9] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k13 */
+ subRL[18] = krl;
+ /* k14 */
+ subRL[19] = krr;
+ ROLDQ(krl, krr, 34);
+ /* k19 */
+ subRL[26] = krl;
+ /* k20 */
+ subRL[27] = krr;
+ ROLDQ(krl, krr, 34);
+
+ /* generate KA */
+ kl = subRL[0] ^ krl;
+ kr = subRL[1] ^ krr;
+
+ CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+ kl ^= krl;
+ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+ kr ^= ww ^ krr;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+ kl ^= ww;
+
+ /* generate KB */
+ krl ^= kl;
+ krr ^= kr;
+ CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
+ krr ^= ww;
+ CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
+ krl ^= ww;
+
+ /* generate KA dependent subkeys */
+ ROLDQ(kl, kr, 15);
+ /* k5 */
+ subRL[6] = kl;
+ /* k6 */
+ subRL[7] = kr;
+ ROLDQ(kl, kr, 30);
+ /* k11 */
+ subRL[14] = kl;
+ /* k12 */
+ subRL[15] = kr;
+ /* rotation left shift 32bit */
+ ROLDQ(kl, kr, 32);
+ /* kl5 */
+ subRL[24] = kl;
+ /* kl6 */
+ subRL[25] = kr;
+ /* rotation left shift 17 from k11,k12 -> k21,k22 */
+ ROLDQ(kl, kr, 17);
+ /* k21 */
+ subRL[28] = kl;
+ /* k22 */
+ subRL[29] = kr;
+
+ /* generate KB dependent subkeys */
+ /* k1 */
+ subRL[2] = krl;
+ /* k2 */
+ subRL[3] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k7 */
+ subRL[10] = krl;
+ /* k8 */
+ subRL[11] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k15 */
+ subRL[20] = krl;
+ /* k16 */
+ subRL[21] = krr;
+ ROLDQ(krl, krr, 51);
+ /* kw3 */
+ subRL[32] = krl;
+ /* kw4 */
+ subRL[33] = krr;
+
+ camellia_setup_tail(subkey, subRL, 32);
+}
+
+static void camellia_setup192(const unsigned char *key, u64 *subkey)
+{
+ unsigned char kk[32];
+ u64 krl, krr;
+
+ memcpy(kk, key, 24);
+ memcpy((unsigned char *)&krl, key+16, 8);
+ krr = ~krl;
+ memcpy(kk+24, (unsigned char *)&krr, 8);
+ camellia_setup256(kk, subkey);
+}
+
+static int __camellia_setkey(struct camellia_ctx *cctx,
+ const unsigned char *key,
+ unsigned int key_len, u32 *flags)
+{
+ if (key_len != 16 && key_len != 24 && key_len != 32) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ cctx->key_length = key_len;
+
+ switch (key_len) {
+ case 16:
+ camellia_setup128(key, cctx->key_table);
+ break;
+ case 24:
+ camellia_setup192(key, cctx->key_table);
+ break;
+ case 32:
+ camellia_setup256(key, cctx->key_table);
+ break;
+ }
+
+ return 0;
+}
+
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+ &tfm->crt_flags);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+ void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
+ void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes;
+ int err;
+
+ err = blkcipher_walk_virt(desc, walk);
+
+ while ((nbytes = walk->nbytes)) {
+ u8 *wsrc = walk->src.virt.addr;
+ u8 *wdst = walk->dst.virt.addr;
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ fn_2way(ctx, wdst, wsrc);
+
+ wsrc += bsize * 2;
+ wdst += bsize * 2;
+ nbytes -= bsize * 2;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ do {
+ fn(ctx, wdst, wsrc);
+
+ wsrc += bsize;
+ wdst += bsize;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+done:
+ err = blkcipher_walk_done(desc, walk, nbytes);
+ }
+
+ return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 *iv = (u128 *)walk->iv;
+
+ do {
+ u128_xor(dst, src, iv);
+ camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+ iv = dst;
+
+ src += 1;
+ dst += 1;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+ u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+ return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ nbytes = __cbc_encrypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 ivs[2 - 1];
+ u128 last_iv;
+
+ /* Start of the last block. */
+ src += nbytes / bsize - 1;
+ dst += nbytes / bsize - 1;
+
+ last_iv = *src;
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ nbytes -= bsize * (2 - 1);
+ src -= 2 - 1;
+ dst -= 2 - 1;
+
+ ivs[0] = src[0];
+
+ camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+
+ u128_xor(dst + 1, dst + 1, ivs + 0);
+
+ nbytes -= bsize;
+ if (nbytes < bsize)
+ goto done;
+
+ u128_xor(dst, dst, src - 1);
+ src -= 1;
+ dst -= 1;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ for (;;) {
+ camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+ nbytes -= bsize;
+ if (nbytes < bsize)
+ break;
+
+ u128_xor(dst, dst, src - 1);
+ src -= 1;
+ dst -= 1;
+ }
+
+done:
+ u128_xor(dst, dst, (u128 *)walk->iv);
+ *(u128 *)walk->iv = last_iv;
+
+ return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ nbytes = __cbc_decrypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ return err;
+}
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+ dst->a = cpu_to_be64(src->a);
+ dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+ dst->a = be64_to_cpu(src->a);
+ dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+ i->b++;
+ if (!i->b)
+ i->a++;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 keystream[CAMELLIA_BLOCK_SIZE];
+ u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ unsigned int nbytes = walk->nbytes;
+ u128 ctrblk;
+
+ memcpy(keystream, src, nbytes);
+ camellia_enc_blk_xor(ctx, keystream, walk->iv);
+ memcpy(dst, keystream, nbytes);
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+ u128_inc(&ctrblk);
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 ctrblk;
+ be128 ctrblocks[2];
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ if (dst != src) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ }
+
+ /* create ctrblks for parallel encrypt */
+ u128_to_be128(&ctrblocks[0], &ctrblk);
+ u128_inc(&ctrblk);
+ u128_to_be128(&ctrblocks[1], &ctrblk);
+ u128_inc(&ctrblk);
+
+ camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
+ (u8 *)ctrblocks);
+
+ src += 2;
+ dst += 2;
+ nbytes -= bsize * 2;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ do {
+ if (dst != src)
+ *dst = *src;
+
+ u128_to_be128(&ctrblocks[0], &ctrblk);
+ u128_inc(&ctrblk);
+
+ camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+ src += 1;
+ dst += 1;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+done:
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+ return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
+
+ while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
+ nbytes = __ctr_crypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ if (walk.nbytes) {
+ ctr_crypt_final(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ return err;
+}
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct camellia_ctx *ctx = priv;
+ int i;
+
+ while (nbytes >= 2 * bsize) {
+ camellia_enc_blk_2way(ctx, srcdst, srcdst);
+ srcdst += bsize * 2;
+ nbytes -= bsize * 2;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_enc_blk(ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct camellia_ctx *ctx = priv;
+ int i;
+
+ while (nbytes >= 2 * bsize) {
+ camellia_dec_blk_2way(ctx, srcdst, srcdst);
+ srcdst += bsize * 2;
+ nbytes -= bsize * 2;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_dec_blk(ctx, srcdst, srcdst);
+}
+
+struct camellia_lrw_ctx {
+ struct lrw_table_ctx lrw_table;
+ struct camellia_ctx camellia_ctx;
+};
+
+static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = __camellia_setkey(&ctx->camellia_ctx, key,
+ keylen - CAMELLIA_BLOCK_SIZE,
+ &tfm->crt_flags);
+ if (err)
+ return err;
+
+ return lrw_init_table(&ctx->lrw_table,
+ key + keylen - CAMELLIA_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &ctx->camellia_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+
+ return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &ctx->camellia_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+
+ return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ lrw_free_table(&ctx->lrw_table);
+}
+
+struct camellia_xts_ctx {
+ struct camellia_ctx tweak_ctx;
+ struct camellia_ctx crypt_ctx;
+};
+
+static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+ int err;
+
+ /* key consists of keys of equal size concatenated, therefore
+ * the length must be even
+ */
+ if (keylen % 2) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ /* first half of xts-key is for crypt */
+ err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+ flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &ctx->crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+
+ return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &ctx->crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+
+ return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static struct crypto_alg camellia_algs[6] = { {
+ .cra_name = "camellia",
+ .cra_driver_name = "camellia-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .cia_setkey = camellia_setkey,
+ .cia_encrypt = camellia_encrypt,
+ .cia_decrypt = camellia_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(camellia)",
+ .cra_driver_name = "ecb-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(camellia)",
+ .cra_driver_name = "cbc-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(camellia)",
+ .cra_driver_name = "ctr-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "lrw(camellia)",
+ .cra_driver_name = "lrw-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = lrw_camellia_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "xts(camellia)",
+ .cra_driver_name = "xts-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = xts_camellia_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, camellia-asm is slower than original assembler
+ * implementation because excessive uses of 64bit rotate and
+ * left-shifts (which are really slow on P4) needed to store and
+ * handle 128bit block in two 64bit registers.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
+
+int __init init(void)
+{
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "camellia-x86_64: performance on this CPU "
+ "would be suboptimal: disabling "
+ "camellia-x86_64.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+void __exit fini(void)
+{
+ crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
+MODULE_ALIAS("camellia");
+MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index b9d0026..493f959 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -31,6 +31,7 @@
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -173,13 +174,17 @@ static struct shash_alg alg = {
}
};
+static const struct x86_cpu_id crc32c_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
- if (cpu_has_xmm4_2)
- return crypto_register_shash(&alg);
- else
+ if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
+ return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 976aa64..b4bf0a6 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,6 +20,7 @@
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/i387.h>
+#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = {
},
};
+static const struct x86_cpu_id pcmul_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
+
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
- if (!cpu_has_pclmulqdq) {
- printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
- " detected.\n");
+ if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
- }
err = crypto_register_shash(&ghash_alg);
if (err)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 4e37677..c00053d 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -463,23 +463,20 @@
pand x0, x4; \
pxor x2, x4;
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
- movdqa x2, t3; \
- movdqa x0, t1; \
- unpcklps x3, t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
- unpcklps x1, t1; \
- unpckhps x1, t2; \
- movdqa t3, x1; \
- unpckhps x3, x2; \
- movdqa t1, x0; \
- movhlps t1, x1; \
- movdqa t2, t1; \
- movlhps t3, x0; \
- movlhps x2, t1; \
- movhlps t2, x2; \
- movdqa x2, x3; \
- movdqa t1, x2;
+ punpckldq x1, x0; \
+ punpckhdq x1, t2; \
+ movdqa x2, t1; \
+ punpckhdq x3, x2; \
+ punpckldq x3, t1; \
+ movdqa x0, x1; \
+ punpcklqdq t1, x0; \
+ punpckhqdq t1, x1; \
+ movdqa t2, x3; \
+ punpcklqdq x2, t2; \
+ punpckhqdq x2, x3; \
+ movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 7f24a15..3ee1ff0 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -585,23 +585,20 @@
get_key(i, 1, RK1); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
- movdqa x2, t3; \
- movdqa x0, t1; \
- unpcklps x3, t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
- unpcklps x1, t1; \
- unpckhps x1, t2; \
- movdqa t3, x1; \
- unpckhps x3, x2; \
- movdqa t1, x0; \
- movhlps t1, x1; \
- movdqa t2, t1; \
- movlhps t3, x0; \
- movlhps x2, t1; \
- movhlps t2, x2; \
- movdqa x2, x3; \
- movdqa t1, x2;
+ punpckldq x1, x0; \
+ punpckhdq x1, t2; \
+ movdqa x2, t1; \
+ punpckhdq x3, x2; \
+ punpckldq x3, t1; \
+ movdqa x0, x1; \
+ punpcklqdq t1, x0; \
+ punpckhqdq t1, x1; \
+ movdqa t2, x3; \
+ punpcklqdq x2, t2; \
+ punpckhqdq x2, x3; \
+ movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 7955a9b..4b21be8 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, false);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "__ecb-serpent-sse2",
- .cra_driver_name = "__driver-ecb-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "__cbc-serpent-sse2",
- .cra_driver_name = "__driver-cbc-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
@@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
- .cra_name = "__ctr-serpent-sse2",
- .cra_driver_name = "__driver-ctr-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-};
-
struct crypt_priv {
struct serpent_ctx *ctx;
bool fpu_enabled;
@@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
lrw_free_table(&ctx->lrw_table);
}
-static struct crypto_alg blk_lrw_alg = {
- .cra_name = "__lrw-serpent-sse2",
- .cra_driver_name = "__driver-lrw-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = lrw_serpent_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-};
-
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
struct serpent_ctx crypt_ctx;
@@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
-static struct crypto_alg blk_xts_alg = {
- .cra_name = "__xts-serpent-sse2",
- .cra_driver_name = "__driver-xts-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-};
-
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
@@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm)
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
-static void ablk_init_common(struct crypto_tfm *tfm,
- struct cryptd_ablkcipher *cryptd_tfm)
+static int ablk_init(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct cryptd_ablkcipher *cryptd_tfm;
+ char drv_name[CRYPTO_MAX_ALG_NAME];
+
+ snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+ crypto_tfm_alg_driver_name(tfm));
+
+ cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-}
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
return 0;
}
-static struct crypto_alg ablk_ecb_alg = {
+static struct crypto_alg serpent_algs[10] = { {
+ .cra_name = "__ecb-serpent-sse2",
+ .cra_driver_name = "__driver-ecb-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-serpent-sse2",
+ .cra_driver_name = "__driver-cbc-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__ctr-serpent-sse2",
+ .cra_driver_name = "__driver-ctr-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "__lrw-serpent-sse2",
+ .cra_driver_name = "__driver-lrw-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = lrw_serpent_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__xts-serpent-sse2",
+ .cra_driver_name = "__driver-xts-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
+ .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = xts_serpent_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+}, {
.cra_name = "ecb(serpent)",
.cra_driver_name = "ecb-serpent-sse2",
.cra_priority = 400,
@@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
- .cra_init = ablk_ecb_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_cbc_alg = {
+}, {
.cra_name = "cbc(serpent)",
.cra_driver_name = "cbc-serpent-sse2",
.cra_priority = 400,
@@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
- .cra_init = ablk_cbc_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_ctr_alg = {
+}, {
.cra_name = "ctr(serpent)",
.cra_driver_name = "ctr-serpent-sse2",
.cra_priority = 400,
@@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
- .cra_init = ablk_ctr_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = {
.geniv = "chainiv",
},
},
-};
-
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_lrw_alg = {
+}, {
.cra_name = "lrw(serpent)",
.cra_driver_name = "lrw-serpent-sse2",
.cra_priority = 400,
@@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
- .cra_init = ablk_lrw_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_xts_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_xts_alg = {
+}, {
.cra_name = "xts(serpent)",
.cra_driver_name = "xts-serpent-sse2",
.cra_priority = 400,
@@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list),
- .cra_init = ablk_xts_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = {
.decrypt = ablk_decrypt,
},
},
-};
+} };
static int __init serpent_sse2_init(void)
{
- int err;
-
if (!cpu_has_xmm2) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
}
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto blk_ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto blk_cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto blk_ctr_err;
- err = crypto_register_alg(&ablk_ecb_alg);
- if (err)
- goto ablk_ecb_err;
- err = crypto_register_alg(&ablk_cbc_alg);
- if (err)
- goto ablk_cbc_err;
- err = crypto_register_alg(&ablk_ctr_alg);
- if (err)
- goto ablk_ctr_err;
- err = crypto_register_alg(&blk_lrw_alg);
- if (err)
- goto blk_lrw_err;
- err = crypto_register_alg(&ablk_lrw_alg);
- if (err)
- goto ablk_lrw_err;
- err = crypto_register_alg(&blk_xts_alg);
- if (err)
- goto blk_xts_err;
- err = crypto_register_alg(&ablk_xts_alg);
- if (err)
- goto ablk_xts_err;
- return err;
-
- crypto_unregister_alg(&ablk_xts_alg);
-ablk_xts_err:
- crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
- crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
- crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
- crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
- crypto_unregister_alg(&ablk_cbc_alg);
-ablk_cbc_err:
- crypto_unregister_alg(&ablk_ecb_alg);
-ablk_ecb_err:
- crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-blk_cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-blk_ecb_err:
- return err;
+ return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_sse2_exit(void)
{
- crypto_unregister_alg(&ablk_xts_alg);
- crypto_unregister_alg(&blk_xts_alg);
- crypto_unregister_alg(&ablk_lrw_alg);
- crypto_unregister_alg(&blk_lrw_alg);
- crypto_unregister_alg(&ablk_ctr_alg);
- crypto_unregister_alg(&ablk_cbc_alg);
- crypto_unregister_alg(&ablk_ecb_alg);
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index dc6b3fb..359ae08 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -68,7 +68,7 @@ static struct crypto_alg alg = {
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 3,
+ .cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_u = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 7fee8c1..408fc0c 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,6 +25,7 @@
*
*/
+#include <asm/processor.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "ecb(twofish)",
- .cra_driver_name = "ecb-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "cbc(twofish)",
- .cra_driver_name = "cbc-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
@@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
- .cra_name = "ctr(twofish)",
- .cra_driver_name = "ctr-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-};
-
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
@@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
lrw_free_table(&ctx->lrw_table);
}
-static struct crypto_alg blk_lrw_alg = {
- .cra_name = "lrw(twofish)",
- .cra_driver_name = "lrw-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = lrw_twofish_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-};
-
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
@@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return xts_crypt(desc, dst, src, nbytes, &req);
}
-static struct crypto_alg blk_xts_alg = {
+static struct crypto_alg tf_algs[5] = { {
+ .cra_name = "ecb(twofish)",
+ .cra_driver_name = "ecb-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(twofish)",
+ .cra_driver_name = "cbc-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(twofish)",
+ .cra_driver_name = "ctr-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "lrw(twofish)",
+ .cra_driver_name = "lrw-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = lrw_twofish_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
.cra_name = "xts(twofish)",
.cra_driver_name = "xts-twofish-3way",
.cra_priority = 300,
@@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
+ .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
@@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = {
.decrypt = xts_decrypt,
},
},
-};
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x06 &&
+ (boot_cpu_data.x86_model == 0x1c ||
+ boot_cpu_data.x86_model == 0x26 ||
+ boot_cpu_data.x86_model == 0x36)) {
+ /*
+ * On Atom, twofish-3way is slower than original assembler
+ * implementation. Twofish-3way trades off some performance in
+ * storing blocks in 64bit registers to allow three blocks to
+ * be processed parallel. Parallel operation then allows gaining
+ * more performance than was trade off, on out-of-order CPUs.
+ * However Atom does not benefit from this parallellism and
+ * should be blacklisted.
+ */
+ return true;
+ }
+
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, twofish-3way is slower than original assembler
+ * implementation because excessive uses of 64bit rotate and
+ * left-shifts (which are really slow on P4) needed to store and
+ * handle 128bit block in two 64bit registers.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
int __init init(void)
{
- int err;
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "twofish-x86_64-3way: performance on this CPU "
+ "would be suboptimal: disabling "
+ "twofish-x86_64-3way.\n");
+ return -ENODEV;
+ }
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto ctr_err;
- err = crypto_register_alg(&blk_lrw_alg);
- if (err)
- goto blk_lrw_err;
- err = crypto_register_alg(&blk_xts_alg);
- if (err)
- goto blk_xts_err;
-
- return 0;
-
- crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
- crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
- crypto_unregister_alg(&blk_ctr_alg);
-ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
- return err;
+ return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
void __exit fini(void)
{
- crypto_unregister_alg(&blk_xts_alg);
- crypto_unregister_alg(&blk_lrw_alg);
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
module_init(init);
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index 52d0ccf..455646e 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
+obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
sysv-$(CONFIG_SYSVIPC) := ipc32.o
obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index fd84387..4c2e59a 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -315,8 +315,14 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
current->mm->cached_hole_size = 0;
+ retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
+ return retval;
+ }
+
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
@@ -410,13 +416,6 @@ beyond_if:
set_brk(current->mm->start_brk, current->mm->brk);
- retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
- return retval;
- }
-
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
/* start thread */
@@ -519,7 +518,8 @@ out:
static int __init init_aout_binfmt(void)
{
- return register_binfmt(&aout_format);
+ register_binfmt(&aout_format);
+ return 0;
}
static void __exit exit_aout_binfmt(void)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 6557769..5563ba1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,6 +24,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3e27456..e3e7340 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
#include <asm/segment.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
+#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -27,8 +28,6 @@
.section .entry.text, "ax"
-#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
-
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
@@ -191,7 +190,7 @@ sysexit_from_sys_call:
movl %ebx,%edx /* 3rd arg: 1st syscall arg */
movl %eax,%esi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
@@ -208,12 +207,13 @@ sysexit_from_sys_call:
TRACE_IRQS_ON
sti
movl %eax,%esi /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpl $-MAX_ERRNO,%eax /* is it an error ? */
+ jbe 1f
+ movslq %eax, %rsi /* if error sign extend to 64 bits */
+1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
- movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
+ call __audit_syscall_exit
+ movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli
TRACE_IRQS_OFF
@@ -447,9 +447,6 @@ ia32_badsys:
movq $-ENOSYS,%rax
jmp ia32_sysret
-quiet_ni_syscall:
- movq $-ENOSYS,%rax
- ret
CFI_ENDPROC
.macro PTREGSCALL label, func, arg
@@ -494,357 +491,3 @@ ia32_ptregs_common:
jmp ia32_sysret /* misbalances the return cache */
CFI_ENDPROC
END(ia32_ptregs_common)
-
- .section .rodata,"a"
- .align 8
-ia32_sys_call_table:
- .quad sys_restart_syscall
- .quad sys_exit
- .quad stub32_fork
- .quad sys_read
- .quad sys_write
- .quad compat_sys_open /* 5 */
- .quad sys_close
- .quad sys32_waitpid
- .quad sys_creat
- .quad sys_link
- .quad sys_unlink /* 10 */
- .quad stub32_execve
- .quad sys_chdir
- .quad compat_sys_time
- .quad sys_mknod
- .quad sys_chmod /* 15 */
- .quad sys_lchown16
- .quad quiet_ni_syscall /* old break syscall holder */
- .quad sys_stat
- .quad sys32_lseek
- .quad sys_getpid /* 20 */
- .quad compat_sys_mount /* mount */
- .quad sys_oldumount /* old_umount */
- .quad sys_setuid16
- .quad sys_getuid16
- .quad compat_sys_stime /* stime */ /* 25 */
- .quad compat_sys_ptrace /* ptrace */
- .quad sys_alarm
- .quad sys_fstat /* (old)fstat */
- .quad sys_pause
- .quad compat_sys_utime /* 30 */
- .quad quiet_ni_syscall /* old stty syscall holder */
- .quad quiet_ni_syscall /* old gtty syscall holder */
- .quad sys_access
- .quad sys_nice
- .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
- .quad sys_sync
- .quad sys32_kill
- .quad sys_rename
- .quad sys_mkdir
- .quad sys_rmdir /* 40 */
- .quad sys_dup
- .quad sys_pipe
- .quad compat_sys_times
- .quad quiet_ni_syscall /* old prof syscall holder */
- .quad sys_brk /* 45 */
- .quad sys_setgid16
- .quad sys_getgid16
- .quad sys_signal
- .quad sys_geteuid16
- .quad sys_getegid16 /* 50 */
- .quad sys_acct
- .quad sys_umount /* new_umount */
- .quad quiet_ni_syscall /* old lock syscall holder */
- .quad compat_sys_ioctl
- .quad compat_sys_fcntl64 /* 55 */
- .quad quiet_ni_syscall /* old mpx syscall holder */
- .quad sys_setpgid
- .quad quiet_ni_syscall /* old ulimit syscall holder */
- .quad sys_olduname
- .quad sys_umask /* 60 */
- .quad sys_chroot
- .quad compat_sys_ustat
- .quad sys_dup2
- .quad sys_getppid
- .quad sys_getpgrp /* 65 */
- .quad sys_setsid
- .quad sys32_sigaction
- .quad sys_sgetmask
- .quad sys_ssetmask
- .quad sys_setreuid16 /* 70 */
- .quad sys_setregid16
- .quad sys32_sigsuspend
- .quad compat_sys_sigpending
- .quad sys_sethostname
- .quad compat_sys_setrlimit /* 75 */
- .quad compat_sys_old_getrlimit /* old_getrlimit */
- .quad compat_sys_getrusage
- .quad compat_sys_gettimeofday
- .quad compat_sys_settimeofday
- .quad sys_getgroups16 /* 80 */
- .quad sys_setgroups16
- .quad compat_sys_old_select
- .quad sys_symlink
- .quad sys_lstat
- .quad sys_readlink /* 85 */
- .quad sys_uselib
- .quad sys_swapon
- .quad sys_reboot
- .quad compat_sys_old_readdir
- .quad sys32_mmap /* 90 */
- .quad sys_munmap
- .quad sys_truncate
- .quad sys_ftruncate
- .quad sys_fchmod
- .quad sys_fchown16 /* 95 */
- .quad sys_getpriority
- .quad sys_setpriority
- .quad quiet_ni_syscall /* old profil syscall holder */
- .quad compat_sys_statfs
- .quad compat_sys_fstatfs /* 100 */
- .quad sys_ioperm
- .quad compat_sys_socketcall
- .quad sys_syslog
- .quad compat_sys_setitimer
- .quad compat_sys_getitimer /* 105 */
- .quad compat_sys_newstat
- .quad compat_sys_newlstat
- .quad compat_sys_newfstat
- .quad sys_uname
- .quad stub32_iopl /* 110 */
- .quad sys_vhangup
- .quad quiet_ni_syscall /* old "idle" system call */
- .quad sys32_vm86_warning /* vm86old */
- .quad compat_sys_wait4
- .quad sys_swapoff /* 115 */
- .quad compat_sys_sysinfo
- .quad sys32_ipc
- .quad sys_fsync
- .quad stub32_sigreturn
- .quad stub32_clone /* 120 */
- .quad sys_setdomainname
- .quad sys_newuname
- .quad sys_modify_ldt
- .quad compat_sys_adjtimex
- .quad sys32_mprotect /* 125 */
- .quad compat_sys_sigprocmask
- .quad quiet_ni_syscall /* create_module */
- .quad sys_init_module
- .quad sys_delete_module
- .quad quiet_ni_syscall /* 130 get_kernel_syms */
- .quad sys32_quotactl
- .quad sys_getpgid
- .quad sys_fchdir
- .quad quiet_ni_syscall /* bdflush */
- .quad sys_sysfs /* 135 */
- .quad sys_personality
- .quad quiet_ni_syscall /* for afs_syscall */
- .quad sys_setfsuid16
- .quad sys_setfsgid16
- .quad sys_llseek /* 140 */
- .quad compat_sys_getdents
- .quad compat_sys_select
- .quad sys_flock
- .quad sys_msync
- .quad compat_sys_readv /* 145 */
- .quad compat_sys_writev
- .quad sys_getsid
- .quad sys_fdatasync
- .quad compat_sys_sysctl /* sysctl */
- .quad sys_mlock /* 150 */
- .quad sys_munlock
- .quad sys_mlockall
- .quad sys_munlockall
- .quad sys_sched_setparam
- .quad sys_sched_getparam /* 155 */
- .quad sys_sched_setscheduler
- .quad sys_sched_getscheduler
- .quad sys_sched_yield
- .quad sys_sched_get_priority_max
- .quad sys_sched_get_priority_min /* 160 */
- .quad sys32_sched_rr_get_interval
- .quad compat_sys_nanosleep
- .quad sys_mremap
- .quad sys_setresuid16
- .quad sys_getresuid16 /* 165 */
- .quad sys32_vm86_warning /* vm86 */
- .quad quiet_ni_syscall /* query_module */
- .quad sys_poll
- .quad quiet_ni_syscall /* old nfsservctl */
- .quad sys_setresgid16 /* 170 */
- .quad sys_getresgid16
- .quad sys_prctl
- .quad stub32_rt_sigreturn
- .quad sys32_rt_sigaction
- .quad sys32_rt_sigprocmask /* 175 */
- .quad sys32_rt_sigpending
- .quad compat_sys_rt_sigtimedwait
- .quad sys32_rt_sigqueueinfo
- .quad sys_rt_sigsuspend
- .quad sys32_pread /* 180 */
- .quad sys32_pwrite
- .quad sys_chown16
- .quad sys_getcwd
- .quad sys_capget
- .quad sys_capset
- .quad stub32_sigaltstack
- .quad sys32_sendfile
- .quad quiet_ni_syscall /* streams1 */
- .quad quiet_ni_syscall /* streams2 */
- .quad stub32_vfork /* 190 */
- .quad compat_sys_getrlimit
- .quad sys_mmap_pgoff
- .quad sys32_truncate64
- .quad sys32_ftruncate64
- .quad sys32_stat64 /* 195 */
- .quad sys32_lstat64
- .quad sys32_fstat64
- .quad sys_lchown
- .quad sys_getuid
- .quad sys_getgid /* 200 */
- .quad sys_geteuid
- .quad sys_getegid
- .quad sys_setreuid
- .quad sys_setregid
- .quad sys_getgroups /* 205 */
- .quad sys_setgroups
- .quad sys_fchown
- .quad sys_setresuid
- .quad sys_getresuid
- .quad sys_setresgid /* 210 */
- .quad sys_getresgid
- .quad sys_chown
- .quad sys_setuid
- .quad sys_setgid
- .quad sys_setfsuid /* 215 */
- .quad sys_setfsgid
- .quad sys_pivot_root
- .quad sys_mincore
- .quad sys_madvise
- .quad compat_sys_getdents64 /* 220 getdents64 */
- .quad compat_sys_fcntl64
- .quad quiet_ni_syscall /* tux */
- .quad quiet_ni_syscall /* security */
- .quad sys_gettid
- .quad sys32_readahead /* 225 */
- .quad sys_setxattr
- .quad sys_lsetxattr
- .quad sys_fsetxattr
- .quad sys_getxattr
- .quad sys_lgetxattr /* 230 */
- .quad sys_fgetxattr
- .quad sys_listxattr
- .quad sys_llistxattr
- .quad sys_flistxattr
- .quad sys_removexattr /* 235 */
- .quad sys_lremovexattr
- .quad sys_fremovexattr
- .quad sys_tkill
- .quad sys_sendfile64
- .quad compat_sys_futex /* 240 */
- .quad compat_sys_sched_setaffinity
- .quad compat_sys_sched_getaffinity
- .quad sys_set_thread_area
- .quad sys_get_thread_area
- .quad compat_sys_io_setup /* 245 */
- .quad sys_io_destroy
- .quad compat_sys_io_getevents
- .quad compat_sys_io_submit
- .quad sys_io_cancel
- .quad sys32_fadvise64 /* 250 */
- .quad quiet_ni_syscall /* free_huge_pages */
- .quad sys_exit_group
- .quad sys32_lookup_dcookie
- .quad sys_epoll_create
- .quad sys_epoll_ctl /* 255 */
- .quad sys_epoll_wait
- .quad sys_remap_file_pages
- .quad sys_set_tid_address
- .quad compat_sys_timer_create
- .quad compat_sys_timer_settime /* 260 */
- .quad compat_sys_timer_gettime
- .quad sys_timer_getoverrun
- .quad sys_timer_delete
- .quad compat_sys_clock_settime
- .quad compat_sys_clock_gettime /* 265 */
- .quad compat_sys_clock_getres
- .quad compat_sys_clock_nanosleep
- .quad compat_sys_statfs64
- .quad compat_sys_fstatfs64
- .quad sys_tgkill /* 270 */
- .quad compat_sys_utimes
- .quad sys32_fadvise64_64
- .quad quiet_ni_syscall /* sys_vserver */
- .quad sys_mbind
- .quad compat_sys_get_mempolicy /* 275 */
- .quad sys_set_mempolicy
- .quad compat_sys_mq_open
- .quad sys_mq_unlink
- .quad compat_sys_mq_timedsend
- .quad compat_sys_mq_timedreceive /* 280 */
- .quad compat_sys_mq_notify
- .quad compat_sys_mq_getsetattr
- .quad compat_sys_kexec_load /* reserved for kexec */
- .quad compat_sys_waitid
- .quad quiet_ni_syscall /* 285: sys_altroot */
- .quad sys_add_key
- .quad sys_request_key
- .quad sys_keyctl
- .quad sys_ioprio_set
- .quad sys_ioprio_get /* 290 */
- .quad sys_inotify_init
- .quad sys_inotify_add_watch
- .quad sys_inotify_rm_watch
- .quad sys_migrate_pages
- .quad compat_sys_openat /* 295 */
- .quad sys_mkdirat
- .quad sys_mknodat
- .quad sys_fchownat
- .quad compat_sys_futimesat
- .quad sys32_fstatat /* 300 */
- .quad sys_unlinkat
- .quad sys_renameat
- .quad sys_linkat
- .quad sys_symlinkat
- .quad sys_readlinkat /* 305 */
- .quad sys_fchmodat
- .quad sys_faccessat
- .quad compat_sys_pselect6
- .quad compat_sys_ppoll
- .quad sys_unshare /* 310 */
- .quad compat_sys_set_robust_list
- .quad compat_sys_get_robust_list
- .quad sys_splice
- .quad sys32_sync_file_range
- .quad sys_tee /* 315 */
- .quad compat_sys_vmsplice
- .quad compat_sys_move_pages
- .quad sys_getcpu
- .quad sys_epoll_pwait
- .quad compat_sys_utimensat /* 320 */
- .quad compat_sys_signalfd
- .quad sys_timerfd_create
- .quad sys_eventfd
- .quad sys32_fallocate
- .quad compat_sys_timerfd_settime /* 325 */
- .quad compat_sys_timerfd_gettime
- .quad compat_sys_signalfd4
- .quad sys_eventfd2
- .quad sys_epoll_create1
- .quad sys_dup3 /* 330 */
- .quad sys_pipe2
- .quad sys_inotify_init1
- .quad compat_sys_preadv
- .quad compat_sys_pwritev
- .quad compat_sys_rt_tgsigqueueinfo /* 335 */
- .quad sys_perf_event_open
- .quad compat_sys_recvmmsg
- .quad sys_fanotify_init
- .quad sys32_fanotify_mark
- .quad sys_prlimit64 /* 340 */
- .quad sys_name_to_handle_at
- .quad compat_sys_open_by_handle_at
- .quad compat_sys_clock_adjtime
- .quad sys_syncfs
- .quad compat_sys_sendmmsg /* 345 */
- .quad sys_setns
- .quad compat_sys_process_vm_readv
- .quad compat_sys_process_vm_writev
-ia32_syscall_end:
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
new file mode 100644
index 0000000..51ecd5b
--- /dev/null
+++ b/arch/x86/ia32/nosyscall.c
@@ -0,0 +1,7 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+long compat_ni_syscall(void)
+{
+ return -ENOSYS;
+}
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
new file mode 100644
index 0000000..4754ba0
--- /dev/null
+++ b/arch/x86/ia32/syscall_ia32.c
@@ -0,0 +1,25 @@
+/* System call table for ia32 emulation. */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+#include <asm/asm-offsets.h>
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
+#include <asm/syscalls_32.h>
+#undef __SYSCALL_I386
+
+#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
+
+typedef void (*sys_call_ptr_t)(void);
+
+extern void compat_ni_syscall(void);
+
+const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
+#include <asm/syscalls_32.h>
+};
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 6fa90a8..b57e6a4 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -19,7 +19,8 @@ header-y += processor-flags.h
header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
-header-y += unistd_32.h
-header-y += unistd_64.h
header-y += vm86.h
header-y += vsyscall.h
+
+genhdr-y += unistd_32.h
+genhdr-y += unistd_64.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 37ad100..49331be 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -145,6 +145,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_OUTPUT2(a...) a
+/*
+ * use this macro if you need clobbers but no inputs in
+ * alternative_{input,io,call}()
+ */
+#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+
struct paravirt_patch_site;
#ifdef CONFIG_PARAVIRT
void apply_paravirt(struct paravirt_patch_site *start,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3ab9bdd..a9371c9 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -288,6 +288,7 @@ struct apic {
int (*probe)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+ int (*apic_id_valid)(int apicid);
int (*apic_id_registered)(void);
u32 irq_delivery_mode;
@@ -532,6 +533,11 @@ static inline unsigned int read_apic_id(void)
return apic->get_apic_id(reg);
}
+static inline int default_apic_id_valid(int apicid)
+{
+ return x2apic_mode || (apicid < 255);
+}
+
extern void default_setup_apic_routing(void);
extern struct apic apic_noop;
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 24098aa..1981199 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,13 +14,52 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
+#ifndef ATOMIC64_EXPORT
+#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
+#else
+#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \
+ ATOMIC64_EXPORT(atomic64_##sym)
+#endif
+
#ifdef CONFIG_X86_CMPXCHG64
-#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8"
+#define __alternative_atomic64(f, g, out, in...) \
+ asm volatile("call %P[func]" \
+ : out : [func] "i" (atomic64_##g##_cx8), ## in)
+
+#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8)
+#define __alternative_atomic64(f, g, out, in...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+
+#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
+ ATOMIC64_DECL_ONE(sym##_386)
+
+ATOMIC64_DECL_ONE(add_386);
+ATOMIC64_DECL_ONE(sub_386);
+ATOMIC64_DECL_ONE(inc_386);
+ATOMIC64_DECL_ONE(dec_386);
#endif
-#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f)
+#define alternative_atomic64(f, out, in...) \
+ __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+
+ATOMIC64_DECL(read);
+ATOMIC64_DECL(set);
+ATOMIC64_DECL(xchg);
+ATOMIC64_DECL(add_return);
+ATOMIC64_DECL(sub_return);
+ATOMIC64_DECL(inc_return);
+ATOMIC64_DECL(dec_return);
+ATOMIC64_DECL(dec_if_positive);
+ATOMIC64_DECL(inc_not_zero);
+ATOMIC64_DECL(add_unless);
+
+#undef ATOMIC64_DECL
+#undef ATOMIC64_DECL_ONE
+#undef __ATOMIC64_DECL
+#undef ATOMIC64_EXPORT
/**
* atomic64_cmpxchg - cmpxchg atomic64 variable
@@ -50,11 +89,9 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
long long o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- asm volatile(ATOMIC64_ALTERNATIVE(xchg)
- : "=A" (o), "+b" (low), "+c" (high)
- : "S" (v)
- : "memory"
- );
+ alternative_atomic64(xchg, "=&A" (o),
+ "S" (v), "b" (low), "c" (high)
+ : "memory");
return o;
}
@@ -69,11 +106,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- asm volatile(ATOMIC64_ALTERNATIVE(set)
- : "+b" (low), "+c" (high)
- : "S" (v)
- : "eax", "edx", "memory"
- );
+ alternative_atomic64(set, /* no output */,
+ "S" (v), "b" (low), "c" (high)
+ : "eax", "edx", "memory");
}
/**
@@ -82,13 +117,10 @@ static inline void atomic64_set(atomic64_t *v, long long i)
*
* Atomically reads the value of @v and returns it.
*/
-static inline long long atomic64_read(atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
{
long long r;
- asm volatile(ATOMIC64_ALTERNATIVE(read)
- : "=A" (r), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
}
@@ -101,10 +133,9 @@ static inline long long atomic64_read(atomic64_t *v)
*/
static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE(add_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(add_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -113,32 +144,25 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
*/
static inline long long atomic64_sub_return(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE(sub_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(sub_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
static inline long long atomic64_inc_return(atomic64_t *v)
{
long long a;
- asm volatile(ATOMIC64_ALTERNATIVE(inc_return)
- : "=A" (a)
- : "S" (v)
- : "memory", "ecx"
- );
+ alternative_atomic64(inc_return, "=&A" (a),
+ "S" (v) : "memory", "ecx");
return a;
}
static inline long long atomic64_dec_return(atomic64_t *v)
{
long long a;
- asm volatile(ATOMIC64_ALTERNATIVE(dec_return)
- : "=A" (a)
- : "S" (v)
- : "memory", "ecx"
- );
+ alternative_atomic64(dec_return, "=&A" (a),
+ "S" (v) : "memory", "ecx");
return a;
}
@@ -151,10 +175,9 @@ static inline long long atomic64_dec_return(atomic64_t *v)
*/
static inline long long atomic64_add(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ __alternative_atomic64(add, add_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -167,10 +190,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
*/
static inline long long atomic64_sub(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ __alternative_atomic64(sub, sub_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -196,10 +218,8 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
*/
static inline void atomic64_inc(atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return)
- : : "S" (v)
- : "memory", "eax", "ecx", "edx"
- );
+ __alternative_atomic64(inc, inc_return, /* no output */,
+ "S" (v) : "memory", "eax", "ecx", "edx");
}
/**
@@ -210,10 +230,8 @@ static inline void atomic64_inc(atomic64_t *v)
*/
static inline void atomic64_dec(atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return)
- : : "S" (v)
- : "memory", "eax", "ecx", "edx"
- );
+ __alternative_atomic64(dec, dec_return, /* no output */,
+ "S" (v) : "memory", "eax", "ecx", "edx");
}
/**
@@ -263,15 +281,15 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v)
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns non-zero if the add was done, zero otherwise.
*/
static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
- asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t"
- : "+A" (a), "+c" (v), "+S" (low), "+D" (high)
- : : "memory");
+ alternative_atomic64(add_unless,
+ ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
+ "S" (v) : "memory");
return (int)a;
}
@@ -279,26 +297,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
static inline int atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero)
- : "=a" (r)
- : "S" (v)
- : "ecx", "edx", "memory"
- );
+ alternative_atomic64(inc_not_zero, "=&a" (r),
+ "S" (v) : "ecx", "edx", "memory");
return r;
}
static inline long long atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
- asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive)
- : "=A" (r)
- : "S" (v)
- : "ecx", "memory"
- );
+ alternative_atomic64(dec_if_positive, "=&A" (r),
+ "S" (v) : "ecx", "memory");
return r;
}
-#undef ATOMIC64_ALTERNATIVE
-#undef ATOMIC64_ALTERNATIVE_
+#undef alternative_atomic64
+#undef __alternative_atomic64
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index e020d88..2f90c51 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -64,6 +64,8 @@ struct setup_header {
__u32 payload_offset;
__u32 payload_length;
__u64 setup_data;
+ __u64 pref_address;
+ __u32 init_size;
} __attribute__((packed));
struct sys_desc_table {
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 0c9fa27..b3b7332 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -145,13 +145,13 @@ extern void __add_wrong_size(void)
#ifdef __HAVE_ARCH_CMPXCHG
#define cmpxchg(ptr, old, new) \
- __cmpxchg((ptr), (old), (new), sizeof(*ptr))
+ __cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define sync_cmpxchg(ptr, old, new) \
- __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+ __sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define cmpxchg_local(ptr, old, new) \
- __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
+ __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
#endif
/*
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
new file mode 100644
index 0000000..ff501e5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -0,0 +1,13 @@
+#ifndef _CPU_DEVICE_ID
+#define _CPU_DEVICE_ID 1
+
+/*
+ * Declare drivers belonging to specific x86 CPUs
+ * Similar in spirit to pci_device_id and related PCI functions
+ */
+
+#include <linux/mod_devicetable.h>
+
+extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 17c5d4b..340ee49 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -159,6 +159,7 @@
#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
@@ -176,6 +177,7 @@
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
@@ -198,10 +200,13 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 078ad0c..b903d5e 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void);
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(int, debug_stack_usage);
+static inline void debug_stack_usage_inc(void)
+{
+ __get_cpu_var(debug_stack_usage)++;
+}
+static inline void debug_stack_usage_dec(void)
+{
+ __get_cpu_var(debug_stack_usage)--;
+}
+int is_debug_stack(unsigned long addr);
+void debug_stack_set_zero(void);
+void debug_stack_reset(void);
+#else /* !X86_64 */
+static inline int is_debug_stack(unsigned long addr) { return 0; }
+static inline void debug_stack_set_zero(void) { }
+static inline void debug_stack_reset(void) { }
+static inline void debug_stack_usage_inc(void) { }
+static inline void debug_stack_usage_dec(void) { }
+#endif /* X86_64 */
+
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fa..e95822d 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
+extern struct desc_ptr nmi_idt_descr;
+extern gate_desc nmi_idt_table[];
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit = (limit >> 16) & 0xf;
}
+#ifdef CONFIG_X86_64
+static inline void set_nmi_gate(int gate, void *addr)
+{
+ gate_desc s;
+
+ pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+ write_idt_entry(nmi_idt_table, gate, &s);
+}
+#endif
+
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 7093e4a..c9dcc18 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,8 @@
#ifdef CONFIG_X86_32
+#define EFI_LOADER_SIGNATURE "EL32"
+
extern unsigned long asmlinkage efi_call_phys(void *, ...);
#define efi_call_phys0(f) efi_call_phys(f)
@@ -37,6 +39,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#else /* !CONFIG_X86_32 */
+#define EFI_LOADER_SIGNATURE "EL64"
+
extern u64 efi_call0(void *fp);
extern u64 efi_call1(void *fp, u64 arg1);
extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
@@ -91,7 +95,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
extern int add_efi_memmap;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
-extern void efi_memblock_x86_reserve_range(void);
+extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 460c74e..4da3c0c 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -117,7 +117,7 @@ enum fixed_addresses {
#endif
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
-#ifdef CONFIG_X86_MRST
+#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
__end_of_permanent_fixed_addresses,
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
new file mode 100644
index 0000000..4fa8815
--- /dev/null
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -0,0 +1,520 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _FPU_INTERNAL_H
+#define _FPU_INTERNAL_H
+
+#include <linux/kernel_stat.h>
+#include <linux/regset.h>
+#include <linux/slab.h>
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/uaccess.h>
+#include <asm/xsave.h>
+
+extern unsigned int sig_xstate_size;
+extern void fpu_init(void);
+
+DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
+
+extern user_regset_active_fn fpregs_active, xfpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+ xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+ xstateregs_set;
+
+
+/*
+ * xstateregs_active == fpregs_active. Please refer to the comment
+ * at the definition of fpregs_active.
+ */
+#define xstateregs_active fpregs_active
+
+extern struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+extern unsigned int sig_xstate_ia32_size;
+extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
+struct _fpstate_ia32;
+struct _xstate_ia32;
+extern int save_i387_xstate_ia32(void __user *buf);
+extern int restore_i387_xstate_ia32(void __user *buf);
+#endif
+
+#ifdef CONFIG_MATH_EMULATION
+extern void finit_soft_fpu(struct i387_soft_struct *soft);
+#else
+static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
+#endif
+
+#define X87_FSW_ES (1 << 7) /* Exception Summary */
+
+static __always_inline __pure bool use_xsaveopt(void)
+{
+ return static_cpu_has(X86_FEATURE_XSAVEOPT);
+}
+
+static __always_inline __pure bool use_xsave(void)
+{
+ return static_cpu_has(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+ return static_cpu_has(X86_FEATURE_FXSR);
+}
+
+extern void __sanitize_i387_state(struct task_struct *);
+
+static inline void sanitize_i387_state(struct task_struct *tsk)
+{
+ if (!use_xsaveopt())
+ return;
+ __sanitize_i387_state(tsk);
+}
+
+#ifdef CONFIG_X86_64
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+{
+ int err;
+
+ /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+ asm volatile("1: fxrstorq %[fx]\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err)
+ : [fx] "m" (*fx), "0" (0));
+#else
+ asm volatile("1: rex64/fxrstor (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err)
+ : [fx] "R" (fx), "m" (*fx), "0" (0));
+#endif
+ return err;
+}
+
+static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
+{
+ int err;
+
+ /*
+ * Clear the bytes not touched by the fxsave and reserved
+ * for the SW usage.
+ */
+ err = __clear_user(&fx->sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+ if (unlikely(err))
+ return -EFAULT;
+
+ /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+ asm volatile("1: fxsaveq %[fx]\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err), [fx] "=m" (*fx)
+ : "0" (0));
+#else
+ asm volatile("1: rex64/fxsave (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err), "=m" (*fx)
+ : [fx] "R" (fx), "0" (0));
+#endif
+ if (unlikely(err) &&
+ __clear_user(fx, sizeof(struct i387_fxsave_struct)))
+ err = -EFAULT;
+ /* No need to clear here because the caller clears USED_MATH */
+ return err;
+}
+
+static inline void fpu_fxsave(struct fpu *fpu)
+{
+ /* Using "rex64; fxsave %0" is broken because, if the memory operand
+ uses any extended registers for addressing, a second REX prefix
+ will be generated (to the assembler, rex64 followed by semicolon
+ is a separate instruction), and hence the 64-bitness is lost. */
+
+#ifdef CONFIG_AS_FXSAVEQ
+ /* Using "fxsaveq %0" would be the ideal choice, but is only supported
+ starting with gas 2.16. */
+ __asm__ __volatile__("fxsaveq %0"
+ : "=m" (fpu->state->fxsave));
+#else
+ /* Using, as a workaround, the properly prefixed form below isn't
+ accepted by any binutils version so far released, complaining that
+ the same type of prefix is used twice if an extended register is
+ needed for addressing (fix submitted to mainline 2005-11-21).
+ asm volatile("rex64/fxsave %0"
+ : "=m" (fpu->state->fxsave));
+ This, however, we can work around by forcing the compiler to select
+ an addressing mode that doesn't require extended registers. */
+ asm volatile("rex64/fxsave (%[fx])"
+ : "=m" (fpu->state->fxsave)
+ : [fx] "R" (&fpu->state->fxsave));
+#endif
+}
+
+#else /* CONFIG_X86_32 */
+
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+{
+ /*
+ * The "nop" is needed to make the instructions the same
+ * length.
+ */
+ alternative_input(
+ "nop ; frstor %1",
+ "fxrstor %1",
+ X86_FEATURE_FXSR,
+ "m" (*fx));
+
+ return 0;
+}
+
+static inline void fpu_fxsave(struct fpu *fpu)
+{
+ asm volatile("fxsave %[fx]"
+ : [fx] "=m" (fpu->state->fxsave));
+}
+
+#endif /* CONFIG_X86_64 */
+
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact.
+ */
+static inline int fpu_save_init(struct fpu *fpu)
+{
+ if (use_xsave()) {
+ fpu_xsave(fpu);
+
+ /*
+ * xsave header may indicate the init state of the FP.
+ */
+ if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
+ return 1;
+ } else if (use_fxsr()) {
+ fpu_fxsave(fpu);
+ } else {
+ asm volatile("fnsave %[fx]; fwait"
+ : [fx] "=m" (fpu->state->fsave));
+ return 0;
+ }
+
+ /*
+ * If exceptions are pending, we need to clear them so
+ * that we don't randomly get exceptions later.
+ *
+ * FIXME! Is this perhaps only true for the old-style
+ * irq13 case? Maybe we could leave the x87 state
+ * intact otherwise?
+ */
+ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
+ asm volatile("fnclex");
+ return 0;
+ }
+ return 1;
+}
+
+static inline int __save_init_fpu(struct task_struct *tsk)
+{
+ return fpu_save_init(&tsk->thread.fpu);
+}
+
+static inline int fpu_fxrstor_checking(struct fpu *fpu)
+{
+ return fxrstor_checking(&fpu->state->fxsave);
+}
+
+static inline int fpu_restore_checking(struct fpu *fpu)
+{
+ if (use_xsave())
+ return fpu_xrstor_checking(fpu);
+ else
+ return fpu_fxrstor_checking(fpu);
+}
+
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. "m" is a random variable that should be in L1 */
+ alternative_input(
+ ASM_NOP8 ASM_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %P[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (tsk->thread.fpu.has_fpu));
+
+ return fpu_restore_checking(&tsk->thread.fpu);
+}
+
+/*
+ * Software FPU state helpers. Careful: these need to
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
+ */
+static inline int __thread_has_fpu(struct task_struct *tsk)
+{
+ return tsk->thread.fpu.has_fpu;
+}
+
+/* Must be paired with an 'stts' after! */
+static inline void __thread_clear_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.fpu.has_fpu = 0;
+ percpu_write(fpu_owner_task, NULL);
+}
+
+/* Must be paired with a 'clts' before! */
+static inline void __thread_set_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.fpu.has_fpu = 1;
+ percpu_write(fpu_owner_task, tsk);
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void __thread_fpu_end(struct task_struct *tsk)
+{
+ __thread_clear_has_fpu(tsk);
+ stts();
+}
+
+static inline void __thread_fpu_begin(struct task_struct *tsk)
+{
+ clts();
+ __thread_set_has_fpu(tsk);
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+/*
+ * FIXME! We could do a totally lazy restore, but we need to
+ * add a per-cpu "this was the task that last touched the FPU
+ * on this CPU" variable, and the task needs to have a "I last
+ * touched the FPU on this CPU" and check them.
+ *
+ * We don't do that yet, so "fpu_lazy_restore()" always returns
+ * false, but some day..
+ */
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+ return new == percpu_read_stable(fpu_owner_task) &&
+ cpu == new->thread.fpu.last_cpu;
+}
+
+static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
+{
+ fpu_switch_t fpu;
+
+ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+ if (__thread_has_fpu(old)) {
+ if (!__save_init_fpu(old))
+ cpu = ~0;
+ old->thread.fpu.last_cpu = cpu;
+ old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ new->fpu_counter++;
+ __thread_set_has_fpu(new);
+ prefetch(new->thread.fpu.state);
+ } else
+ stts();
+ } else {
+ old->fpu_counter = 0;
+ old->thread.fpu.last_cpu = ~0;
+ if (fpu.preload) {
+ new->fpu_counter++;
+ if (fpu_lazy_restore(new, cpu))
+ fpu.preload = 0;
+ else
+ prefetch(new->thread.fpu.state);
+ __thread_fpu_begin(new);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
+{
+ if (fpu.preload) {
+ if (unlikely(restore_fpu_checking(new)))
+ __thread_fpu_end(new);
+ }
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
+
+static inline void __clear_fpu(struct task_struct *tsk)
+{
+ if (__thread_has_fpu(tsk)) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ __thread_fpu_end(tsk);
+ }
+}
+
+/*
+ * The actual user_fpu_begin/end() functions
+ * need to be preemption-safe.
+ *
+ * NOTE! user_fpu_end() must be used only after you
+ * have saved the FP state, and user_fpu_begin() must
+ * be used only immediately before restoring it.
+ * These functions do not do any save/restore on
+ * their own.
+ */
+static inline void user_fpu_end(void)
+{
+ preempt_disable();
+ __thread_fpu_end(current);
+ preempt_enable();
+}
+
+static inline void user_fpu_begin(void)
+{
+ preempt_disable();
+ if (!user_has_fpu())
+ __thread_fpu_begin(current);
+ preempt_enable();
+}
+
+/*
+ * These disable preemption on their own and are safe
+ */
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+ WARN_ON_ONCE(!__thread_has_fpu(tsk));
+ preempt_disable();
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ __clear_fpu(tsk);
+ preempt_enable();
+}
+
+/*
+ * i387 state interaction
+ */
+static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.fpu.state->fxsave.cwd;
+ } else {
+ return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
+ }
+}
+
+static inline unsigned short get_fpu_swd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.fpu.state->fxsave.swd;
+ } else {
+ return (unsigned short)tsk->thread.fpu.state->fsave.swd;
+ }
+}
+
+static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
+{
+ if (cpu_has_xmm) {
+ return tsk->thread.fpu.state->fxsave.mxcsr;
+ } else {
+ return MXCSR_DEFAULT;
+ }
+}
+
+static bool fpu_allocated(struct fpu *fpu)
+{
+ return fpu->state != NULL;
+}
+
+static inline int fpu_alloc(struct fpu *fpu)
+{
+ if (fpu_allocated(fpu))
+ return 0;
+ fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+ if (!fpu->state)
+ return -ENOMEM;
+ WARN_ON((unsigned long)fpu->state & 15);
+ return 0;
+}
+
+static inline void fpu_free(struct fpu *fpu)
+{
+ if (fpu->state) {
+ kmem_cache_free(task_xstate_cachep, fpu->state);
+ fpu->state = NULL;
+ }
+}
+
+static inline void fpu_copy(struct fpu *dst, struct fpu *src)
+{
+ memcpy(dst->state, src->state, xstate_size);
+}
+
+extern void fpu_finit(struct fpu *fpu);
+
+#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index da0b3ca..382f75d 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,7 +7,6 @@
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
- unsigned int irq0_irqs;
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 3bd0402..302a323 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -61,7 +61,7 @@ void *kmap(struct page *page);
void kunmap(struct page *page);
void *kmap_atomic_prot(struct page *page, pgprot_t prot);
-void *__kmap_atomic(struct page *page);
+void *kmap_atomic(struct page *page);
void __kunmap_atomic(void *kvaddr);
void *kmap_atomic_pfn(unsigned long pfn);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 6919e93..7ce0798 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -13,323 +13,19 @@
#ifndef __ASSEMBLY__
#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
#include <linux/hardirq.h>
-#include <linux/slab.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-#include <asm/xsave.h>
+#include <asm/system.h>
+
+struct pt_regs;
+struct user_i387_struct;
-extern unsigned int sig_xstate_size;
-extern void fpu_init(void);
-extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
-extern asmlinkage void math_state_restore(void);
-extern void __math_state_restore(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
+extern void math_state_restore(void);
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
-
-/*
- * xstateregs_active == fpregs_active. Please refer to the comment
- * at the definition of fpregs_active.
- */
-#define xstateregs_active fpregs_active
-
-extern struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-extern unsigned int sig_xstate_ia32_size;
-extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
-struct _fpstate_ia32;
-struct _xstate_ia32;
-extern int save_i387_xstate_ia32(void __user *buf);
-extern int restore_i387_xstate_ia32(void __user *buf);
-#endif
-
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-#define X87_FSW_ES (1 << 7) /* Exception Summary */
-
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has(X86_FEATURE_FXSR);
-}
-
-extern void __sanitize_i387_state(struct task_struct *);
-
-static inline void sanitize_i387_state(struct task_struct *tsk)
-{
- if (!use_xsaveopt())
- return;
- __sanitize_i387_state(tsk);
-}
-
-#ifdef CONFIG_X86_64
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- int err;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxrstorq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "m" (*fx), "0" (0));
-#else
- asm volatile("1: rex64/fxrstor (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "R" (fx), "m" (*fx), "0" (0));
-#endif
- return err;
-}
-
-static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
-{
- int err;
-
- /*
- * Clear the bytes not touched by the fxsave and reserved
- * for the SW usage.
- */
- err = __clear_user(&fx->sw_reserved,
- sizeof(struct _fpx_sw_bytes));
- if (unlikely(err))
- return -EFAULT;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxsaveq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), [fx] "=m" (*fx)
- : "0" (0));
-#else
- asm volatile("1: rex64/fxsave (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), "=m" (*fx)
- : [fx] "R" (fx), "0" (0));
-#endif
- if (unlikely(err) &&
- __clear_user(fx, sizeof(struct i387_fxsave_struct)))
- err = -EFAULT;
- /* No need to clear here because the caller clears USED_MATH */
- return err;
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- /* Using "rex64; fxsave %0" is broken because, if the memory operand
- uses any extended registers for addressing, a second REX prefix
- will be generated (to the assembler, rex64 followed by semicolon
- is a separate instruction), and hence the 64-bitness is lost. */
-
-#ifdef CONFIG_AS_FXSAVEQ
- /* Using "fxsaveq %0" would be the ideal choice, but is only supported
- starting with gas 2.16. */
- __asm__ __volatile__("fxsaveq %0"
- : "=m" (fpu->state->fxsave));
-#else
- /* Using, as a workaround, the properly prefixed form below isn't
- accepted by any binutils version so far released, complaining that
- the same type of prefix is used twice if an extended register is
- needed for addressing (fix submitted to mainline 2005-11-21).
- asm volatile("rex64/fxsave %0"
- : "=m" (fpu->state->fxsave));
- This, however, we can work around by forcing the compiler to select
- an addressing mode that doesn't require extended registers. */
- asm volatile("rex64/fxsave (%[fx])"
- : "=m" (fpu->state->fxsave)
- : [fx] "R" (&fpu->state->fxsave));
-#endif
-}
-
-#else /* CONFIG_X86_32 */
-
-/* perform fxrstor iff the processor has extended states, otherwise frstor */
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- /*
- * The "nop" is needed to make the instructions the same
- * length.
- */
- alternative_input(
- "nop ; frstor %1",
- "fxrstor %1",
- X86_FEATURE_FXSR,
- "m" (*fx));
-
- return 0;
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- asm volatile("fxsave %[fx]"
- : [fx] "=m" (fpu->state->fxsave));
-}
-
-#endif /* CONFIG_X86_64 */
-
-/* We need a safe address that is cheap to find and that is already
- in L1 during context switch. The best choices are unfortunately
- different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
-#endif
-
-/*
- * These must be called with preempt disabled
- */
-static inline void fpu_save_init(struct fpu *fpu)
-{
- if (use_xsave()) {
- fpu_xsave(fpu);
-
- /*
- * xsave header may indicate the init state of the FP.
- */
- if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return;
- } else if (use_fxsr()) {
- fpu_fxsave(fpu);
- } else {
- asm volatile("fnsave %[fx]; fwait"
- : [fx] "=m" (fpu->state->fsave));
- return;
- }
-
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
- asm volatile("fnclex");
-
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. safe_address is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (safe_address));
-}
-
-static inline void __save_init_fpu(struct task_struct *tsk)
-{
- fpu_save_init(&tsk->thread.fpu);
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
-static inline int fpu_fxrstor_checking(struct fpu *fpu)
-{
- return fxrstor_checking(&fpu->state->fxsave);
-}
-
-static inline int fpu_restore_checking(struct fpu *fpu)
-{
- if (use_xsave())
- return fpu_xrstor_checking(fpu);
- else
- return fpu_fxrstor_checking(fpu);
-}
-
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
- return fpu_restore_checking(&tsk->thread.fpu);
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
-
-static inline void __unlazy_fpu(struct task_struct *tsk)
-{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- __save_init_fpu(tsk);
- stts();
- } else
- tsk->fpu_counter = 0;
-}
-
-static inline void __clear_fpu(struct task_struct *tsk)
-{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
- }
-}
-
-static inline void kernel_fpu_begin(void)
-{
- struct thread_info *me = current_thread_info();
- preempt_disable();
- if (me->status & TS_USEDFPU)
- __save_init_fpu(me->task);
- else
- clts();
-}
-
-static inline void kernel_fpu_end(void)
-{
- stts();
- preempt_enable();
-}
-
-static inline bool irq_fpu_usable(void)
-{
- struct pt_regs *regs;
-
- return !in_interrupt() || !(regs = get_irq_regs()) || \
- user_mode(regs) || (read_cr0() & X86_CR0_TS);
-}
+extern bool irq_fpu_usable(void);
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
/*
* Some instructions like VIA's padlock instructions generate a spurious
@@ -363,90 +59,21 @@ static inline void irq_ts_restore(int TS_state)
}
/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- __save_init_fpu(tsk);
- stts();
- preempt_enable();
-}
-
-static inline void unlazy_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- __unlazy_fpu(tsk);
- preempt_enable();
-}
-
-static inline void clear_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- __clear_fpu(tsk);
- preempt_enable();
-}
-
-/*
- * i387 state interaction
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
*/
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
- if (cpu_has_xmm) {
- return tsk->thread.fpu.state->fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
-static bool fpu_allocated(struct fpu *fpu)
-{
- return fpu->state != NULL;
-}
-
-static inline int fpu_alloc(struct fpu *fpu)
-{
- if (fpu_allocated(fpu))
- return 0;
- fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
- if (!fpu->state)
- return -ENOMEM;
- WARN_ON((unsigned long)fpu->state & 15);
- return 0;
-}
-
-static inline void fpu_free(struct fpu *fpu)
-{
- if (fpu->state) {
- kmem_cache_free(task_xstate_cachep, fpu->state);
- fpu->state = NULL;
- }
-}
-
-static inline void fpu_copy(struct fpu *dst, struct fpu *src)
+static inline int user_has_fpu(void)
{
- memcpy(dst->state, src->state, xstate_size);
+ return current->thread.fpu.has_fpu;
}
-extern void fpu_finit(struct fpu *fpu);
+extern void unlazy_fpu(struct task_struct *tsk);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
index 976f6ec..b0d5716 100644
--- a/arch/x86/include/asm/ia32_unistd.h
+++ b/arch/x86/include/asm/ia32_unistd.h
@@ -2,17 +2,10 @@
#define _ASM_X86_IA32_UNISTD_H
/*
- * This file contains the system call numbers of the ia32 port,
+ * This file contains the system call numbers of the ia32 compat ABI,
* this is for the kernel only.
- * Only add syscalls here where some part of the kernel needs to know
- * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
*/
-
-#define __NR_ia32_restart_syscall 0
-#define __NR_ia32_exit 1
-#define __NR_ia32_read 3
-#define __NR_ia32_write 4
-#define __NR_ia32_sigreturn 119
-#define __NR_ia32_rt_sigreturn 173
+#define __SYSCALL_ia32_NR(x) (x)
+#include <asm/unistd_32_ia32.h>
#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063..74a2e31 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 8dbe353..adcc0ae 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,6 +5,8 @@
extern void __init early_ioremap_page_table_range_init(void);
#endif
+extern void __init zone_sizes_init(void);
+
extern unsigned long __init
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1..48eb30a 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
-/* The last prefix is needed for two-byte and three-byte opcodes */
-static inline insn_byte_t insn_last_prefix(struct insn *insn)
-{
- return insn->prefixes.bytes[3];
-}
-
extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
/* Offset of each field from kaddr */
static inline int insn_offset_rex_prefix(struct insn *insn)
{
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
deleted file mode 100644
index 423bbbd..0000000
--- a/arch/x86/include/asm/irq_controller.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __IRQ_CONTROLLER__
-#define __IRQ_CONTROLLER__
-
-struct irq_domain {
- int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
- u32 *out_hwirq, u32 *out_type);
- void *priv;
- struct device_node *controller;
- struct list_head l;
-};
-
-#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18c..3a16c14 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
#define JUMP_LABEL_NOP_SIZE 5
-#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:"
- JUMP_LABEL_INITIAL_NOP
+ STATIC_KEY_INITIAL_NOP
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ab4092e..7b9cfc4 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -190,6 +190,9 @@ struct x86_emulate_ops {
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
+
+ bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -298,6 +301,19 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
X86EMUL_MODE_PROT64)
+/* CPUID vendors */
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
+
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
+
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
+
enum x86_intercept_stage {
X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
X86_ICPT_PRE_EXCEPT,
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f35ce43..441520e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct device, mce_device);
+DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 0a0a954..fc18bf3 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -26,8 +26,8 @@ extern struct sfi_rtc_table_entry sfi_mrtc_array[];
* identified via MSRs.
*/
enum mrst_cpu_type {
- MRST_CPU_CHIP_LINCROFT = 1,
- MRST_CPU_CHIP_PENWELL,
+ /* 1 was Moorestown */
+ MRST_CPU_CHIP_PENWELL = 2,
};
extern enum mrst_cpu_type __mrst_cpu_chip;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9..ccb8059 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9..c0180fd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void)
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
}
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
static inline u64 paravirt_steal_clock(int cpu)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 096c975..e8fb2c7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
@@ -242,4 +240,12 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
static inline void perf_events_lapic_init(void) { }
#endif
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+ extern void amd_pmu_enable_virt(void);
+ extern void amd_pmu_disable_virt(void);
+#else
+ static inline void amd_pmu_enable_virt(void) { }
+ static inline void amd_pmu_disable_virt(void) { }
+#endif
+
#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index aa9088c..95da14f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -162,6 +162,7 @@ extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
+void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern unsigned short num_cache_leaves;
@@ -374,6 +375,8 @@ union thread_xstate {
};
struct fpu {
+ unsigned int last_cpu;
+ unsigned int has_fpu;
union thread_xstate *state;
};
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 644dd885..60bef66 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -21,7 +21,6 @@
#include <asm/irq.h>
#include <linux/atomic.h>
#include <asm/setup.h>
-#include <asm/irq_controller.h>
#ifdef CONFIG_OF
extern int of_ioapic;
@@ -43,15 +42,6 @@ extern char cmd_line[COMMAND_LINE_SIZE];
#define pci_address_to_pio pci_address_to_pio
unsigned long pci_address_to_pio(phys_addr_t addr);
-/**
- * irq_dispose_mapping - Unmap an interrupt
- * @virq: linux virq number of the interrupt to unmap
- *
- * FIXME: We really should implement proper virq handling like power,
- * but that's going to be major surgery.
- */
-static inline void irq_dispose_mapping(unsigned int virq) { }
-
#define HAVE_ARCH_DEVTREE_FIXUPS
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 9756551..d0f19f9 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -47,7 +47,7 @@ extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern void setup_default_timer_irq(void);
-#ifdef CONFIG_X86_MRST
+#ifdef CONFIG_X86_INTEL_MID
extern void x86_mrst_early_setup(void);
#else
static inline void x86_mrst_early_setup(void) { }
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 73b11bc..0434c40 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void);
#endif /* CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_DEBUG_NMI_SELFTEST
+extern void nmi_selftest(void);
+#else
+#define nmi_selftest() do { } while (0)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf..76bfa2c 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return !!(tmp.tail ^ tmp.head);
+ return tmp.tail != tmp.head;
}
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
+ return (__ticket_t)(tmp.tail - tmp.head) > 1;
}
#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df..ad0ad07 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
#endif
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
-#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
typedef struct arch_spinlock {
union {
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index c4a348f..d962e56 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/err.h>
+#include <asm/asm-offsets.h> /* For NR_syscalls */
extern const unsigned long sys_call_table[];
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 7404715..cfd8144 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,8 +40,8 @@ struct thread_info {
*/
__u8 supervisor_stack[0];
#endif
- int sig_on_uaccess_error:1;
- int uaccess_err:1; /* uaccess failed */
+ unsigned int sig_on_uaccess_error:1;
+ unsigned int uaccess_err:1; /* uaccess failed */
};
#define INIT_THREAD_INFO(tsk) \
@@ -247,8 +247,6 @@ static inline struct thread_info *current_thread_info(void)
* ever touches our thread-synchronous status, so we don't
* have to worry about atomic accesses.
*/
-#define TS_USEDFPU 0x0001 /* FPU was used by this task
- this quantum (SMP) */
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
#define TS_POLLING 0x0004 /* idle task polling need_resched,
skip sending interrupt */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e..34baa0e 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- unsigned long long quot;
- unsigned long long rem;
int cpu = smp_processor_id();
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
- quot = (cyc >> CYC2NS_SCALE_FACTOR);
- rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
- ns += quot * per_cpu(cyc2ns, cpu) +
- ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+ ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+ (1UL << CYC2NS_SCALE_FACTOR));
return ns;
}
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 2a58ed3..21f77b8 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -1,13 +1,60 @@
+#ifndef _ASM_X86_UNISTD_H
+#define _ASM_X86_UNISTD_H 1
+
#ifdef __KERNEL__
# ifdef CONFIG_X86_32
-# include "unistd_32.h"
+
+# include <asm/unistd_32.h>
+# define __ARCH_WANT_IPC_PARSE_VERSION
+# define __ARCH_WANT_STAT64
+# define __ARCH_WANT_SYS_IPC
+# define __ARCH_WANT_SYS_OLD_MMAP
+# define __ARCH_WANT_SYS_OLD_SELECT
+
# else
-# include "unistd_64.h"
+
+# include <asm/unistd_64.h>
+# define __ARCH_WANT_COMPAT_SYS_TIME
+
# endif
+
+# define __ARCH_WANT_OLD_READDIR
+# define __ARCH_WANT_OLD_STAT
+# define __ARCH_WANT_SYS_ALARM
+# define __ARCH_WANT_SYS_FADVISE64
+# define __ARCH_WANT_SYS_GETHOSTNAME
+# define __ARCH_WANT_SYS_GETPGRP
+# define __ARCH_WANT_SYS_LLSEEK
+# define __ARCH_WANT_SYS_NICE
+# define __ARCH_WANT_SYS_OLDUMOUNT
+# define __ARCH_WANT_SYS_OLD_GETRLIMIT
+# define __ARCH_WANT_SYS_OLD_UNAME
+# define __ARCH_WANT_SYS_PAUSE
+# define __ARCH_WANT_SYS_RT_SIGACTION
+# define __ARCH_WANT_SYS_RT_SIGSUSPEND
+# define __ARCH_WANT_SYS_SGETMASK
+# define __ARCH_WANT_SYS_SIGNAL
+# define __ARCH_WANT_SYS_SIGPENDING
+# define __ARCH_WANT_SYS_SIGPROCMASK
+# define __ARCH_WANT_SYS_SOCKETCALL
+# define __ARCH_WANT_SYS_TIME
+# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_SYS_WAITPID
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+
#else
# ifdef __i386__
-# include "unistd_32.h"
+# include <asm/unistd_32.h>
# else
-# include "unistd_64.h"
+# include <asm/unistd_64.h>
# endif
#endif
+
+#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
deleted file mode 100644
index 599c77d..0000000
--- a/arch/x86/include/asm/unistd_32.h
+++ /dev/null
@@ -1,401 +0,0 @@
-#ifndef _ASM_X86_UNISTD_32_H
-#define _ASM_X86_UNISTD_32_H
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall 0
-#define __NR_exit 1
-#define __NR_fork 2
-#define __NR_read 3
-#define __NR_write 4
-#define __NR_open 5
-#define __NR_close 6
-#define __NR_waitpid 7
-#define __NR_creat 8
-#define __NR_link 9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir 12
-#define __NR_time 13
-#define __NR_mknod 14
-#define __NR_chmod 15
-#define __NR_lchown 16
-#define __NR_break 17
-#define __NR_oldstat 18
-#define __NR_lseek 19
-#define __NR_getpid 20
-#define __NR_mount 21
-#define __NR_umount 22
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime 25
-#define __NR_ptrace 26
-#define __NR_alarm 27
-#define __NR_oldfstat 28
-#define __NR_pause 29
-#define __NR_utime 30
-#define __NR_stty 31
-#define __NR_gtty 32
-#define __NR_access 33
-#define __NR_nice 34
-#define __NR_ftime 35
-#define __NR_sync 36
-#define __NR_kill 37
-#define __NR_rename 38
-#define __NR_mkdir 39
-#define __NR_rmdir 40
-#define __NR_dup 41
-#define __NR_pipe 42
-#define __NR_times 43
-#define __NR_prof 44
-#define __NR_brk 45
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_signal 48
-#define __NR_geteuid 49
-#define __NR_getegid 50
-#define __NR_acct 51
-#define __NR_umount2 52
-#define __NR_lock 53
-#define __NR_ioctl 54
-#define __NR_fcntl 55
-#define __NR_mpx 56
-#define __NR_setpgid 57
-#define __NR_ulimit 58
-#define __NR_oldolduname 59
-#define __NR_umask 60
-#define __NR_chroot 61
-#define __NR_ustat 62
-#define __NR_dup2 63
-#define __NR_getppid 64
-#define __NR_getpgrp 65
-#define __NR_setsid 66
-#define __NR_sigaction 67
-#define __NR_sgetmask 68
-#define __NR_ssetmask 69
-#define __NR_setreuid 70
-#define __NR_setregid 71
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname 74
-#define __NR_setrlimit 75
-#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */
-#define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
-#define __NR_getgroups 80
-#define __NR_setgroups 81
-#define __NR_select 82
-#define __NR_symlink 83
-#define __NR_oldlstat 84
-#define __NR_readlink 85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir 89
-#define __NR_mmap 90
-#define __NR_munmap 91
-#define __NR_truncate 92
-#define __NR_ftruncate 93
-#define __NR_fchmod 94
-#define __NR_fchown 95
-#define __NR_getpriority 96
-#define __NR_setpriority 97
-#define __NR_profil 98
-#define __NR_statfs 99
-#define __NR_fstatfs 100
-#define __NR_ioperm 101
-#define __NR_socketcall 102
-#define __NR_syslog 103
-#define __NR_setitimer 104
-#define __NR_getitimer 105
-#define __NR_stat 106
-#define __NR_lstat 107
-#define __NR_fstat 108
-#define __NR_olduname 109
-#define __NR_iopl 110
-#define __NR_vhangup 111
-#define __NR_idle 112
-#define __NR_vm86old 113
-#define __NR_wait4 114
-#define __NR_swapoff 115
-#define __NR_sysinfo 116
-#define __NR_ipc 117
-#define __NR_fsync 118
-#define __NR_sigreturn 119
-#define __NR_clone 120
-#define __NR_setdomainname 121
-#define __NR_uname 122
-#define __NR_modify_ldt 123
-#define __NR_adjtimex 124
-#define __NR_mprotect 125
-#define __NR_sigprocmask 126
-#define __NR_create_module 127
-#define __NR_init_module 128
-#define __NR_delete_module 129
-#define __NR_get_kernel_syms 130
-#define __NR_quotactl 131
-#define __NR_getpgid 132
-#define __NR_fchdir 133
-#define __NR_bdflush 134
-#define __NR_sysfs 135
-#define __NR_personality 136
-#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#define __NR__llseek 140
-#define __NR_getdents 141
-#define __NR__newselect 142
-#define __NR_flock 143
-#define __NR_msync 144
-#define __NR_readv 145
-#define __NR_writev 146
-#define __NR_getsid 147
-#define __NR_fdatasync 148
-#define __NR__sysctl 149
-#define __NR_mlock 150
-#define __NR_munlock 151
-#define __NR_mlockall 152
-#define __NR_munlockall 153
-#define __NR_sched_setparam 154
-#define __NR_sched_getparam 155
-#define __NR_sched_setscheduler 156
-#define __NR_sched_getscheduler 157
-#define __NR_sched_yield 158
-#define __NR_sched_get_priority_max 159
-#define __NR_sched_get_priority_min 160
-#define __NR_sched_rr_get_interval 161
-#define __NR_nanosleep 162
-#define __NR_mremap 163
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_vm86 166
-#define __NR_query_module 167
-#define __NR_poll 168
-#define __NR_nfsservctl 169
-#define __NR_setresgid 170
-#define __NR_getresgid 171
-#define __NR_prctl 172
-#define __NR_rt_sigreturn 173
-#define __NR_rt_sigaction 174
-#define __NR_rt_sigprocmask 175
-#define __NR_rt_sigpending 176
-#define __NR_rt_sigtimedwait 177
-#define __NR_rt_sigqueueinfo 178
-#define __NR_rt_sigsuspend 179
-#define __NR_pread64 180
-#define __NR_pwrite64 181
-#define __NR_chown 182
-#define __NR_getcwd 183
-#define __NR_capget 184
-#define __NR_capset 185
-#define __NR_sigaltstack 186
-#define __NR_sendfile 187
-#define __NR_getpmsg 188 /* some people actually want streams */
-#define __NR_putpmsg 189 /* some people actually want streams */
-#define __NR_vfork 190
-#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
-#define __NR_mmap2 192
-#define __NR_truncate64 193
-#define __NR_ftruncate64 194
-#define __NR_stat64 195
-#define __NR_lstat64 196
-#define __NR_fstat64 197
-#define __NR_lchown32 198
-#define __NR_getuid32 199
-#define __NR_getgid32 200
-#define __NR_geteuid32 201
-#define __NR_getegid32 202
-#define __NR_setreuid32 203
-#define __NR_setregid32 204
-#define __NR_getgroups32 205
-#define __NR_setgroups32 206
-#define __NR_fchown32 207
-#define __NR_setresuid32 208
-#define __NR_getresuid32 209
-#define __NR_setresgid32 210
-#define __NR_getresgid32 211
-#define __NR_chown32 212
-#define __NR_setuid32 213
-#define __NR_setgid32 214
-#define __NR_setfsuid32 215
-#define __NR_setfsgid32 216
-#define __NR_pivot_root 217
-#define __NR_mincore 218
-#define __NR_madvise 219
-#define __NR_madvise1 219 /* delete when C lib stub is removed */
-#define __NR_getdents64 220
-#define __NR_fcntl64 221
-/* 223 is unused */
-#define __NR_gettid 224
-#define __NR_readahead 225
-#define __NR_setxattr 226
-#define __NR_lsetxattr 227
-#define __NR_fsetxattr 228
-#define __NR_getxattr 229
-#define __NR_lgetxattr 230
-#define __NR_fgetxattr 231
-#define __NR_listxattr 232
-#define __NR_llistxattr 233
-#define __NR_flistxattr 234
-#define __NR_removexattr 235
-#define __NR_lremovexattr 236
-#define __NR_fremovexattr 237
-#define __NR_tkill 238
-#define __NR_sendfile64 239
-#define __NR_futex 240
-#define __NR_sched_setaffinity 241
-#define __NR_sched_getaffinity 242
-#define __NR_set_thread_area 243
-#define __NR_get_thread_area 244
-#define __NR_io_setup 245
-#define __NR_io_destroy 246
-#define __NR_io_getevents 247
-#define __NR_io_submit 248
-#define __NR_io_cancel 249
-#define __NR_fadvise64 250
-/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
-#define __NR_exit_group 252
-#define __NR_lookup_dcookie 253
-#define __NR_epoll_create 254
-#define __NR_epoll_ctl 255
-#define __NR_epoll_wait 256
-#define __NR_remap_file_pages 257
-#define __NR_set_tid_address 258
-#define __NR_timer_create 259
-#define __NR_timer_settime (__NR_timer_create+1)
-#define __NR_timer_gettime (__NR_timer_create+2)
-#define __NR_timer_getoverrun (__NR_timer_create+3)
-#define __NR_timer_delete (__NR_timer_create+4)
-#define __NR_clock_settime (__NR_timer_create+5)
-#define __NR_clock_gettime (__NR_timer_create+6)
-#define __NR_clock_getres (__NR_timer_create+7)
-#define __NR_clock_nanosleep (__NR_timer_create+8)
-#define __NR_statfs64 268
-#define __NR_fstatfs64 269
-#define __NR_tgkill 270
-#define __NR_utimes 271
-#define __NR_fadvise64_64 272
-#define __NR_vserver 273
-#define __NR_mbind 274
-#define __NR_get_mempolicy 275
-#define __NR_set_mempolicy 276
-#define __NR_mq_open 277
-#define __NR_mq_unlink (__NR_mq_open+1)
-#define __NR_mq_timedsend (__NR_mq_open+2)
-#define __NR_mq_timedreceive (__NR_mq_open+3)
-#define __NR_mq_notify (__NR_mq_open+4)
-#define __NR_mq_getsetattr (__NR_mq_open+5)
-#define __NR_kexec_load 283
-#define __NR_waitid 284
-/* #define __NR_sys_setaltroot 285 */
-#define __NR_add_key 286
-#define __NR_request_key 287
-#define __NR_keyctl 288
-#define __NR_ioprio_set 289
-#define __NR_ioprio_get 290
-#define __NR_inotify_init 291
-#define __NR_inotify_add_watch 292
-#define __NR_inotify_rm_watch 293
-#define __NR_migrate_pages 294
-#define __NR_openat 295
-#define __NR_mkdirat 296
-#define __NR_mknodat 297
-#define __NR_fchownat 298
-#define __NR_futimesat 299
-#define __NR_fstatat64 300
-#define __NR_unlinkat 301
-#define __NR_renameat 302
-#define __NR_linkat 303
-#define __NR_symlinkat 304
-#define __NR_readlinkat 305
-#define __NR_fchmodat 306
-#define __NR_faccessat 307
-#define __NR_pselect6 308
-#define __NR_ppoll 309
-#define __NR_unshare 310
-#define __NR_set_robust_list 311
-#define __NR_get_robust_list 312
-#define __NR_splice 313
-#define __NR_sync_file_range 314
-#define __NR_tee 315
-#define __NR_vmsplice 316
-#define __NR_move_pages 317
-#define __NR_getcpu 318
-#define __NR_epoll_pwait 319
-#define __NR_utimensat 320
-#define __NR_signalfd 321
-#define __NR_timerfd_create 322
-#define __NR_eventfd 323
-#define __NR_fallocate 324
-#define __NR_timerfd_settime 325
-#define __NR_timerfd_gettime 326
-#define __NR_signalfd4 327
-#define __NR_eventfd2 328
-#define __NR_epoll_create1 329
-#define __NR_dup3 330
-#define __NR_pipe2 331
-#define __NR_inotify_init1 332
-#define __NR_preadv 333
-#define __NR_pwritev 334
-#define __NR_rt_tgsigqueueinfo 335
-#define __NR_perf_event_open 336
-#define __NR_recvmmsg 337
-#define __NR_fanotify_init 338
-#define __NR_fanotify_mark 339
-#define __NR_prlimit64 340
-#define __NR_name_to_handle_at 341
-#define __NR_open_by_handle_at 342
-#define __NR_clock_adjtime 343
-#define __NR_syncfs 344
-#define __NR_sendmmsg 345
-#define __NR_setns 346
-#define __NR_process_vm_readv 347
-#define __NR_process_vm_writev 348
-
-#ifdef __KERNEL__
-
-#define NR_syscalls 349
-
-#define __ARCH_WANT_IPC_PARSE_VERSION
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_OLD_STAT
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_IPC
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLD_GETRLIMIT
-#define __ARCH_WANT_SYS_OLD_UNAME
-#define __ARCH_WANT_SYS_OLD_MMAP
-#define __ARCH_WANT_SYS_OLD_SELECT
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-
-/*
- * "Conditional" syscalls
- *
- * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
- * but it doesn't work on all toolchains, so we just do it by hand
- */
-#ifndef cond_syscall
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_UNISTD_32_H */
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
deleted file mode 100644
index 0431f19..0000000
--- a/arch/x86/include/asm/unistd_64.h
+++ /dev/null
@@ -1,732 +0,0 @@
-#ifndef _ASM_X86_UNISTD_64_H
-#define _ASM_X86_UNISTD_64_H
-
-#ifndef __SYSCALL
-#define __SYSCALL(a, b)
-#endif
-
-/*
- * This file contains the system call numbers.
- *
- * Note: holes are not allowed.
- */
-
-/* at least 8 syscall per cacheline */
-#define __NR_read 0
-__SYSCALL(__NR_read, sys_read)
-#define __NR_write 1
-__SYSCALL(__NR_write, sys_write)
-#define __NR_open 2
-__SYSCALL(__NR_open, sys_open)
-#define __NR_close 3
-__SYSCALL(__NR_close, sys_close)
-#define __NR_stat 4
-__SYSCALL(__NR_stat, sys_newstat)
-#define __NR_fstat 5
-__SYSCALL(__NR_fstat, sys_newfstat)
-#define __NR_lstat 6
-__SYSCALL(__NR_lstat, sys_newlstat)
-#define __NR_poll 7
-__SYSCALL(__NR_poll, sys_poll)
-
-#define __NR_lseek 8
-__SYSCALL(__NR_lseek, sys_lseek)
-#define __NR_mmap 9
-__SYSCALL(__NR_mmap, sys_mmap)
-#define __NR_mprotect 10
-__SYSCALL(__NR_mprotect, sys_mprotect)
-#define __NR_munmap 11
-__SYSCALL(__NR_munmap, sys_munmap)
-#define __NR_brk 12
-__SYSCALL(__NR_brk, sys_brk)
-#define __NR_rt_sigaction 13
-__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction)
-#define __NR_rt_sigprocmask 14
-__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
-#define __NR_rt_sigreturn 15
-__SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn)
-
-#define __NR_ioctl 16
-__SYSCALL(__NR_ioctl, sys_ioctl)
-#define __NR_pread64 17
-__SYSCALL(__NR_pread64, sys_pread64)
-#define __NR_pwrite64 18
-__SYSCALL(__NR_pwrite64, sys_pwrite64)
-#define __NR_readv 19
-__SYSCALL(__NR_readv, sys_readv)
-#define __NR_writev 20
-__SYSCALL(__NR_writev, sys_writev)
-#define __NR_access 21
-__SYSCALL(__NR_access, sys_access)
-#define __NR_pipe 22
-__SYSCALL(__NR_pipe, sys_pipe)
-#define __NR_select 23
-__SYSCALL(__NR_select, sys_select)
-
-#define __NR_sched_yield 24
-__SYSCALL(__NR_sched_yield, sys_sched_yield)
-#define __NR_mremap 25
-__SYSCALL(__NR_mremap, sys_mremap)
-#define __NR_msync 26
-__SYSCALL(__NR_msync, sys_msync)
-#define __NR_mincore 27
-__SYSCALL(__NR_mincore, sys_mincore)
-#define __NR_madvise 28
-__SYSCALL(__NR_madvise, sys_madvise)
-#define __NR_shmget 29
-__SYSCALL(__NR_shmget, sys_shmget)
-#define __NR_shmat 30
-__SYSCALL(__NR_shmat, sys_shmat)
-#define __NR_shmctl 31
-__SYSCALL(__NR_shmctl, sys_shmctl)
-
-#define __NR_dup 32
-__SYSCALL(__NR_dup, sys_dup)
-#define __NR_dup2 33
-__SYSCALL(__NR_dup2, sys_dup2)
-#define __NR_pause 34
-__SYSCALL(__NR_pause, sys_pause)
-#define __NR_nanosleep 35
-__SYSCALL(__NR_nanosleep, sys_nanosleep)
-#define __NR_getitimer 36
-__SYSCALL(__NR_getitimer, sys_getitimer)
-#define __NR_alarm 37
-__SYSCALL(__NR_alarm, sys_alarm)
-#define __NR_setitimer 38
-__SYSCALL(__NR_setitimer, sys_setitimer)
-#define __NR_getpid 39
-__SYSCALL(__NR_getpid, sys_getpid)
-
-#define __NR_sendfile 40
-__SYSCALL(__NR_sendfile, sys_sendfile64)
-#define __NR_socket 41
-__SYSCALL(__NR_socket, sys_socket)
-#define __NR_connect 42
-__SYSCALL(__NR_connect, sys_connect)
-#define __NR_accept 43
-__SYSCALL(__NR_accept, sys_accept)
-#define __NR_sendto 44
-__SYSCALL(__NR_sendto, sys_sendto)
-#define __NR_recvfrom 45
-__SYSCALL(__NR_recvfrom, sys_recvfrom)
-#define __NR_sendmsg 46
-__SYSCALL(__NR_sendmsg, sys_sendmsg)
-#define __NR_recvmsg 47
-__SYSCALL(__NR_recvmsg, sys_recvmsg)
-
-#define __NR_shutdown 48
-__SYSCALL(__NR_shutdown, sys_shutdown)
-#define __NR_bind 49
-__SYSCALL(__NR_bind, sys_bind)
-#define __NR_listen 50
-__SYSCALL(__NR_listen, sys_listen)
-#define __NR_getsockname 51
-__SYSCALL(__NR_getsockname, sys_getsockname)
-#define __NR_getpeername 52
-__SYSCALL(__NR_getpeername, sys_getpeername)
-#define __NR_socketpair 53
-__SYSCALL(__NR_socketpair, sys_socketpair)
-#define __NR_setsockopt 54
-__SYSCALL(__NR_setsockopt, sys_setsockopt)
-#define __NR_getsockopt 55
-__SYSCALL(__NR_getsockopt, sys_getsockopt)
-
-#define __NR_clone 56
-__SYSCALL(__NR_clone, stub_clone)
-#define __NR_fork 57
-__SYSCALL(__NR_fork, stub_fork)
-#define __NR_vfork 58
-__SYSCALL(__NR_vfork, stub_vfork)
-#define __NR_execve 59
-__SYSCALL(__NR_execve, stub_execve)
-#define __NR_exit 60
-__SYSCALL(__NR_exit, sys_exit)
-#define __NR_wait4 61
-__SYSCALL(__NR_wait4, sys_wait4)
-#define __NR_kill 62
-__SYSCALL(__NR_kill, sys_kill)
-#define __NR_uname 63
-__SYSCALL(__NR_uname, sys_newuname)
-
-#define __NR_semget 64
-__SYSCALL(__NR_semget, sys_semget)
-#define __NR_semop 65
-__SYSCALL(__NR_semop, sys_semop)
-#define __NR_semctl 66
-__SYSCALL(__NR_semctl, sys_semctl)
-#define __NR_shmdt 67
-__SYSCALL(__NR_shmdt, sys_shmdt)
-#define __NR_msgget 68
-__SYSCALL(__NR_msgget, sys_msgget)
-#define __NR_msgsnd 69
-__SYSCALL(__NR_msgsnd, sys_msgsnd)
-#define __NR_msgrcv 70
-__SYSCALL(__NR_msgrcv, sys_msgrcv)
-#define __NR_msgctl 71
-__SYSCALL(__NR_msgctl, sys_msgctl)
-
-#define __NR_fcntl 72
-__SYSCALL(__NR_fcntl, sys_fcntl)
-#define __NR_flock 73
-__SYSCALL(__NR_flock, sys_flock)
-#define __NR_fsync 74
-__SYSCALL(__NR_fsync, sys_fsync)
-#define __NR_fdatasync 75
-__SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR_truncate 76
-__SYSCALL(__NR_truncate, sys_truncate)
-#define __NR_ftruncate 77
-__SYSCALL(__NR_ftruncate, sys_ftruncate)
-#define __NR_getdents 78
-__SYSCALL(__NR_getdents, sys_getdents)
-#define __NR_getcwd 79
-__SYSCALL(__NR_getcwd, sys_getcwd)
-
-#define __NR_chdir 80
-__SYSCALL(__NR_chdir, sys_chdir)
-#define __NR_fchdir 81
-__SYSCALL(__NR_fchdir, sys_fchdir)
-#define __NR_rename 82
-__SYSCALL(__NR_rename, sys_rename)
-#define __NR_mkdir 83
-__SYSCALL(__NR_mkdir, sys_mkdir)
-#define __NR_rmdir 84
-__SYSCALL(__NR_rmdir, sys_rmdir)
-#define __NR_creat 85
-__SYSCALL(__NR_creat, sys_creat)
-#define __NR_link 86
-__SYSCALL(__NR_link, sys_link)
-#define __NR_unlink 87
-__SYSCALL(__NR_unlink, sys_unlink)
-
-#define __NR_symlink 88
-__SYSCALL(__NR_symlink, sys_symlink)
-#define __NR_readlink 89
-__SYSCALL(__NR_readlink, sys_readlink)
-#define __NR_chmod 90
-__SYSCALL(__NR_chmod, sys_chmod)
-#define __NR_fchmod 91
-__SYSCALL(__NR_fchmod, sys_fchmod)
-#define __NR_chown 92
-__SYSCALL(__NR_chown, sys_chown)
-#define __NR_fchown 93
-__SYSCALL(__NR_fchown, sys_fchown)
-#define __NR_lchown 94
-__SYSCALL(__NR_lchown, sys_lchown)
-#define __NR_umask 95
-__SYSCALL(__NR_umask, sys_umask)
-
-#define __NR_gettimeofday 96
-__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
-#define __NR_getrlimit 97
-__SYSCALL(__NR_getrlimit, sys_getrlimit)
-#define __NR_getrusage 98
-__SYSCALL(__NR_getrusage, sys_getrusage)
-#define __NR_sysinfo 99
-__SYSCALL(__NR_sysinfo, sys_sysinfo)
-#define __NR_times 100
-__SYSCALL(__NR_times, sys_times)
-#define __NR_ptrace 101
-__SYSCALL(__NR_ptrace, sys_ptrace)
-#define __NR_getuid 102
-__SYSCALL(__NR_getuid, sys_getuid)
-#define __NR_syslog 103
-__SYSCALL(__NR_syslog, sys_syslog)
-
-/* at the very end the stuff that never runs during the benchmarks */
-#define __NR_getgid 104
-__SYSCALL(__NR_getgid, sys_getgid)
-#define __NR_setuid 105
-__SYSCALL(__NR_setuid, sys_setuid)
-#define __NR_setgid 106
-__SYSCALL(__NR_setgid, sys_setgid)
-#define __NR_geteuid 107
-__SYSCALL(__NR_geteuid, sys_geteuid)
-#define __NR_getegid 108
-__SYSCALL(__NR_getegid, sys_getegid)
-#define __NR_setpgid 109
-__SYSCALL(__NR_setpgid, sys_setpgid)
-#define __NR_getppid 110
-__SYSCALL(__NR_getppid, sys_getppid)
-#define __NR_getpgrp 111
-__SYSCALL(__NR_getpgrp, sys_getpgrp)
-
-#define __NR_setsid 112
-__SYSCALL(__NR_setsid, sys_setsid)
-#define __NR_setreuid 113
-__SYSCALL(__NR_setreuid, sys_setreuid)
-#define __NR_setregid 114
-__SYSCALL(__NR_setregid, sys_setregid)
-#define __NR_getgroups 115
-__SYSCALL(__NR_getgroups, sys_getgroups)
-#define __NR_setgroups 116
-__SYSCALL(__NR_setgroups, sys_setgroups)
-#define __NR_setresuid 117
-__SYSCALL(__NR_setresuid, sys_setresuid)
-#define __NR_getresuid 118
-__SYSCALL(__NR_getresuid, sys_getresuid)
-#define __NR_setresgid 119
-__SYSCALL(__NR_setresgid, sys_setresgid)
-
-#define __NR_getresgid 120
-__SYSCALL(__NR_getresgid, sys_getresgid)
-#define __NR_getpgid 121
-__SYSCALL(__NR_getpgid, sys_getpgid)
-#define __NR_setfsuid 122
-__SYSCALL(__NR_setfsuid, sys_setfsuid)
-#define __NR_setfsgid 123
-__SYSCALL(__NR_setfsgid, sys_setfsgid)
-#define __NR_getsid 124
-__SYSCALL(__NR_getsid, sys_getsid)
-#define __NR_capget 125
-__SYSCALL(__NR_capget, sys_capget)
-#define __NR_capset 126
-__SYSCALL(__NR_capset, sys_capset)
-
-#define __NR_rt_sigpending 127
-__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
-#define __NR_rt_sigtimedwait 128
-__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
-#define __NR_rt_sigqueueinfo 129
-__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
-#define __NR_rt_sigsuspend 130
-__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend)
-#define __NR_sigaltstack 131
-__SYSCALL(__NR_sigaltstack, stub_sigaltstack)
-#define __NR_utime 132
-__SYSCALL(__NR_utime, sys_utime)
-#define __NR_mknod 133
-__SYSCALL(__NR_mknod, sys_mknod)
-
-/* Only needed for a.out */
-#define __NR_uselib 134
-__SYSCALL(__NR_uselib, sys_ni_syscall)
-#define __NR_personality 135
-__SYSCALL(__NR_personality, sys_personality)
-
-#define __NR_ustat 136
-__SYSCALL(__NR_ustat, sys_ustat)
-#define __NR_statfs 137
-__SYSCALL(__NR_statfs, sys_statfs)
-#define __NR_fstatfs 138
-__SYSCALL(__NR_fstatfs, sys_fstatfs)
-#define __NR_sysfs 139
-__SYSCALL(__NR_sysfs, sys_sysfs)
-
-#define __NR_getpriority 140
-__SYSCALL(__NR_getpriority, sys_getpriority)
-#define __NR_setpriority 141
-__SYSCALL(__NR_setpriority, sys_setpriority)
-#define __NR_sched_setparam 142
-__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
-#define __NR_sched_getparam 143
-__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
-#define __NR_sched_setscheduler 144
-__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
-#define __NR_sched_getscheduler 145
-__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
-#define __NR_sched_get_priority_max 146
-__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
-#define __NR_sched_get_priority_min 147
-__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
-#define __NR_sched_rr_get_interval 148
-__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
-
-#define __NR_mlock 149
-__SYSCALL(__NR_mlock, sys_mlock)
-#define __NR_munlock 150
-__SYSCALL(__NR_munlock, sys_munlock)
-#define __NR_mlockall 151
-__SYSCALL(__NR_mlockall, sys_mlockall)
-#define __NR_munlockall 152
-__SYSCALL(__NR_munlockall, sys_munlockall)
-
-#define __NR_vhangup 153
-__SYSCALL(__NR_vhangup, sys_vhangup)
-
-#define __NR_modify_ldt 154
-__SYSCALL(__NR_modify_ldt, sys_modify_ldt)
-
-#define __NR_pivot_root 155
-__SYSCALL(__NR_pivot_root, sys_pivot_root)
-
-#define __NR__sysctl 156
-__SYSCALL(__NR__sysctl, sys_sysctl)
-
-#define __NR_prctl 157
-__SYSCALL(__NR_prctl, sys_prctl)
-#define __NR_arch_prctl 158
-__SYSCALL(__NR_arch_prctl, sys_arch_prctl)
-
-#define __NR_adjtimex 159
-__SYSCALL(__NR_adjtimex, sys_adjtimex)
-
-#define __NR_setrlimit 160
-__SYSCALL(__NR_setrlimit, sys_setrlimit)
-
-#define __NR_chroot 161
-__SYSCALL(__NR_chroot, sys_chroot)
-
-#define __NR_sync 162
-__SYSCALL(__NR_sync, sys_sync)
-
-#define __NR_acct 163
-__SYSCALL(__NR_acct, sys_acct)
-
-#define __NR_settimeofday 164
-__SYSCALL(__NR_settimeofday, sys_settimeofday)
-
-#define __NR_mount 165
-__SYSCALL(__NR_mount, sys_mount)
-#define __NR_umount2 166
-__SYSCALL(__NR_umount2, sys_umount)
-
-#define __NR_swapon 167
-__SYSCALL(__NR_swapon, sys_swapon)
-#define __NR_swapoff 168
-__SYSCALL(__NR_swapoff, sys_swapoff)
-
-#define __NR_reboot 169
-__SYSCALL(__NR_reboot, sys_reboot)
-
-#define __NR_sethostname 170
-__SYSCALL(__NR_sethostname, sys_sethostname)
-#define __NR_setdomainname 171
-__SYSCALL(__NR_setdomainname, sys_setdomainname)
-
-#define __NR_iopl 172
-__SYSCALL(__NR_iopl, stub_iopl)
-#define __NR_ioperm 173
-__SYSCALL(__NR_ioperm, sys_ioperm)
-
-#define __NR_create_module 174
-__SYSCALL(__NR_create_module, sys_ni_syscall)
-#define __NR_init_module 175
-__SYSCALL(__NR_init_module, sys_init_module)
-#define __NR_delete_module 176
-__SYSCALL(__NR_delete_module, sys_delete_module)
-#define __NR_get_kernel_syms 177
-__SYSCALL(__NR_get_kernel_syms, sys_ni_syscall)
-#define __NR_query_module 178
-__SYSCALL(__NR_query_module, sys_ni_syscall)
-
-#define __NR_quotactl 179
-__SYSCALL(__NR_quotactl, sys_quotactl)
-
-#define __NR_nfsservctl 180
-__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-
-/* reserved for LiS/STREAMS */
-#define __NR_getpmsg 181
-__SYSCALL(__NR_getpmsg, sys_ni_syscall)
-#define __NR_putpmsg 182
-__SYSCALL(__NR_putpmsg, sys_ni_syscall)
-
-/* reserved for AFS */
-#define __NR_afs_syscall 183
-__SYSCALL(__NR_afs_syscall, sys_ni_syscall)
-
-/* reserved for tux */
-#define __NR_tuxcall 184
-__SYSCALL(__NR_tuxcall, sys_ni_syscall)
-
-#define __NR_security 185
-__SYSCALL(__NR_security, sys_ni_syscall)
-
-#define __NR_gettid 186
-__SYSCALL(__NR_gettid, sys_gettid)
-
-#define __NR_readahead 187
-__SYSCALL(__NR_readahead, sys_readahead)
-#define __NR_setxattr 188
-__SYSCALL(__NR_setxattr, sys_setxattr)
-#define __NR_lsetxattr 189
-__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
-#define __NR_fsetxattr 190
-__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
-#define __NR_getxattr 191
-__SYSCALL(__NR_getxattr, sys_getxattr)
-#define __NR_lgetxattr 192
-__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
-#define __NR_fgetxattr 193
-__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
-#define __NR_listxattr 194
-__SYSCALL(__NR_listxattr, sys_listxattr)
-#define __NR_llistxattr 195
-__SYSCALL(__NR_llistxattr, sys_llistxattr)
-#define __NR_flistxattr 196
-__SYSCALL(__NR_flistxattr, sys_flistxattr)
-#define __NR_removexattr 197
-__SYSCALL(__NR_removexattr, sys_removexattr)
-#define __NR_lremovexattr 198
-__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
-#define __NR_fremovexattr 199
-__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-#define __NR_tkill 200
-__SYSCALL(__NR_tkill, sys_tkill)
-#define __NR_time 201
-__SYSCALL(__NR_time, sys_time)
-#define __NR_futex 202
-__SYSCALL(__NR_futex, sys_futex)
-#define __NR_sched_setaffinity 203
-__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
-#define __NR_sched_getaffinity 204
-__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
-#define __NR_set_thread_area 205
-__SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */
-#define __NR_io_setup 206
-__SYSCALL(__NR_io_setup, sys_io_setup)
-#define __NR_io_destroy 207
-__SYSCALL(__NR_io_destroy, sys_io_destroy)
-#define __NR_io_getevents 208
-__SYSCALL(__NR_io_getevents, sys_io_getevents)
-#define __NR_io_submit 209
-__SYSCALL(__NR_io_submit, sys_io_submit)
-#define __NR_io_cancel 210
-__SYSCALL(__NR_io_cancel, sys_io_cancel)
-#define __NR_get_thread_area 211
-__SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */
-#define __NR_lookup_dcookie 212
-__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
-#define __NR_epoll_create 213
-__SYSCALL(__NR_epoll_create, sys_epoll_create)
-#define __NR_epoll_ctl_old 214
-__SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall)
-#define __NR_epoll_wait_old 215
-__SYSCALL(__NR_epoll_wait_old, sys_ni_syscall)
-#define __NR_remap_file_pages 216
-__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
-#define __NR_getdents64 217
-__SYSCALL(__NR_getdents64, sys_getdents64)
-#define __NR_set_tid_address 218
-__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
-#define __NR_restart_syscall 219
-__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
-#define __NR_semtimedop 220
-__SYSCALL(__NR_semtimedop, sys_semtimedop)
-#define __NR_fadvise64 221
-__SYSCALL(__NR_fadvise64, sys_fadvise64)
-#define __NR_timer_create 222
-__SYSCALL(__NR_timer_create, sys_timer_create)
-#define __NR_timer_settime 223
-__SYSCALL(__NR_timer_settime, sys_timer_settime)
-#define __NR_timer_gettime 224
-__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
-#define __NR_timer_getoverrun 225
-__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
-#define __NR_timer_delete 226
-__SYSCALL(__NR_timer_delete, sys_timer_delete)
-#define __NR_clock_settime 227
-__SYSCALL(__NR_clock_settime, sys_clock_settime)
-#define __NR_clock_gettime 228
-__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
-#define __NR_clock_getres 229
-__SYSCALL(__NR_clock_getres, sys_clock_getres)
-#define __NR_clock_nanosleep 230
-__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
-#define __NR_exit_group 231
-__SYSCALL(__NR_exit_group, sys_exit_group)
-#define __NR_epoll_wait 232
-__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
-#define __NR_epoll_ctl 233
-__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
-#define __NR_tgkill 234
-__SYSCALL(__NR_tgkill, sys_tgkill)
-#define __NR_utimes 235
-__SYSCALL(__NR_utimes, sys_utimes)
-#define __NR_vserver 236
-__SYSCALL(__NR_vserver, sys_ni_syscall)
-#define __NR_mbind 237
-__SYSCALL(__NR_mbind, sys_mbind)
-#define __NR_set_mempolicy 238
-__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
-#define __NR_get_mempolicy 239
-__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
-#define __NR_mq_open 240
-__SYSCALL(__NR_mq_open, sys_mq_open)
-#define __NR_mq_unlink 241
-__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
-#define __NR_mq_timedsend 242
-__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
-#define __NR_mq_timedreceive 243
-__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
-#define __NR_mq_notify 244
-__SYSCALL(__NR_mq_notify, sys_mq_notify)
-#define __NR_mq_getsetattr 245
-__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
-#define __NR_kexec_load 246
-__SYSCALL(__NR_kexec_load, sys_kexec_load)
-#define __NR_waitid 247
-__SYSCALL(__NR_waitid, sys_waitid)
-#define __NR_add_key 248
-__SYSCALL(__NR_add_key, sys_add_key)
-#define __NR_request_key 249
-__SYSCALL(__NR_request_key, sys_request_key)
-#define __NR_keyctl 250
-__SYSCALL(__NR_keyctl, sys_keyctl)
-#define __NR_ioprio_set 251
-__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
-#define __NR_ioprio_get 252
-__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-#define __NR_inotify_init 253
-__SYSCALL(__NR_inotify_init, sys_inotify_init)
-#define __NR_inotify_add_watch 254
-__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
-#define __NR_inotify_rm_watch 255
-__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-#define __NR_migrate_pages 256
-__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
-#define __NR_openat 257
-__SYSCALL(__NR_openat, sys_openat)
-#define __NR_mkdirat 258
-__SYSCALL(__NR_mkdirat, sys_mkdirat)
-#define __NR_mknodat 259
-__SYSCALL(__NR_mknodat, sys_mknodat)
-#define __NR_fchownat 260
-__SYSCALL(__NR_fchownat, sys_fchownat)
-#define __NR_futimesat 261
-__SYSCALL(__NR_futimesat, sys_futimesat)
-#define __NR_newfstatat 262
-__SYSCALL(__NR_newfstatat, sys_newfstatat)
-#define __NR_unlinkat 263
-__SYSCALL(__NR_unlinkat, sys_unlinkat)
-#define __NR_renameat 264
-__SYSCALL(__NR_renameat, sys_renameat)
-#define __NR_linkat 265
-__SYSCALL(__NR_linkat, sys_linkat)
-#define __NR_symlinkat 266
-__SYSCALL(__NR_symlinkat, sys_symlinkat)
-#define __NR_readlinkat 267
-__SYSCALL(__NR_readlinkat, sys_readlinkat)
-#define __NR_fchmodat 268
-__SYSCALL(__NR_fchmodat, sys_fchmodat)
-#define __NR_faccessat 269
-__SYSCALL(__NR_faccessat, sys_faccessat)
-#define __NR_pselect6 270
-__SYSCALL(__NR_pselect6, sys_pselect6)
-#define __NR_ppoll 271
-__SYSCALL(__NR_ppoll, sys_ppoll)
-#define __NR_unshare 272
-__SYSCALL(__NR_unshare, sys_unshare)
-#define __NR_set_robust_list 273
-__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
-#define __NR_get_robust_list 274
-__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
-#define __NR_splice 275
-__SYSCALL(__NR_splice, sys_splice)
-#define __NR_tee 276
-__SYSCALL(__NR_tee, sys_tee)
-#define __NR_sync_file_range 277
-__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
-#define __NR_vmsplice 278
-__SYSCALL(__NR_vmsplice, sys_vmsplice)
-#define __NR_move_pages 279
-__SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_utimensat 280
-__SYSCALL(__NR_utimensat, sys_utimensat)
-#define __NR_epoll_pwait 281
-__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
-#define __NR_signalfd 282
-__SYSCALL(__NR_signalfd, sys_signalfd)
-#define __NR_timerfd_create 283
-__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
-#define __NR_eventfd 284
-__SYSCALL(__NR_eventfd, sys_eventfd)
-#define __NR_fallocate 285
-__SYSCALL(__NR_fallocate, sys_fallocate)
-#define __NR_timerfd_settime 286
-__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
-#define __NR_timerfd_gettime 287
-__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_accept4 288
-__SYSCALL(__NR_accept4, sys_accept4)
-#define __NR_signalfd4 289
-__SYSCALL(__NR_signalfd4, sys_signalfd4)
-#define __NR_eventfd2 290
-__SYSCALL(__NR_eventfd2, sys_eventfd2)
-#define __NR_epoll_create1 291
-__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
-#define __NR_dup3 292
-__SYSCALL(__NR_dup3, sys_dup3)
-#define __NR_pipe2 293
-__SYSCALL(__NR_pipe2, sys_pipe2)
-#define __NR_inotify_init1 294
-__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
-#define __NR_preadv 295
-__SYSCALL(__NR_preadv, sys_preadv)
-#define __NR_pwritev 296
-__SYSCALL(__NR_pwritev, sys_pwritev)
-#define __NR_rt_tgsigqueueinfo 297
-__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_event_open 298
-__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
-#define __NR_recvmmsg 299
-__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
-#define __NR_fanotify_init 300
-__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
-#define __NR_fanotify_mark 301
-__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
-#define __NR_prlimit64 302
-__SYSCALL(__NR_prlimit64, sys_prlimit64)
-#define __NR_name_to_handle_at 303
-__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
-#define __NR_open_by_handle_at 304
-__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
-#define __NR_clock_adjtime 305
-__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
-#define __NR_syncfs 306
-__SYSCALL(__NR_syncfs, sys_syncfs)
-#define __NR_sendmmsg 307
-__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
-#define __NR_setns 308
-__SYSCALL(__NR_setns, sys_setns)
-#define __NR_getcpu 309
-__SYSCALL(__NR_getcpu, sys_getcpu)
-#define __NR_process_vm_readv 310
-__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
-#define __NR_process_vm_writev 311
-__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
-
-#ifndef __NO_STUBS
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_OLD_STAT
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLD_GETRLIMIT
-#define __ARCH_WANT_SYS_OLD_UNAME
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_COMPAT_SYS_TIME
-#endif /* __NO_STUBS */
-
-#ifdef __KERNEL__
-
-#ifndef COMPILE_OFFSETS
-#include <asm/asm-offsets.h>
-#define NR_syscalls (__NR_syscall_max + 1)
-#endif
-
-/*
- * "Conditional" syscalls
- *
- * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
- * but it doesn't work on all toolchains, so we just do it by hand
- */
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_UNISTD_64_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 8e862aa..becf47b 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -65,7 +65,7 @@
* UV2: Bit 19 selects between
* (0): 10 microsecond timebase and
* (1): 80 microseconds
- * we're using 655us, similar to UV1: 65 units of 10us
+ * we're using 560us, similar to UV1: 65 units of 10us
*/
#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
@@ -167,6 +167,7 @@
#define FLUSH_RETRY_TIMEOUT 2
#define FLUSH_GIVEUP 3
#define FLUSH_COMPLETE 4
+#define FLUSH_RETRY_BUSYBUG 5
/*
* tuning the action when the numalink network is extremely delayed
@@ -235,10 +236,10 @@ struct bau_msg_payload {
/*
- * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
* see table 4.2.3.0.1 in broacast_assist spec.
*/
-struct bau_msg_header {
+struct uv1_bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
unsigned int base_dest_nasid:15; /* nasid of the first bit */
@@ -318,19 +319,87 @@ struct bau_msg_header {
};
/*
+ * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * see figure 9-2 of harp_sys.pdf
+ */
+struct uv2_bau_msg_header {
+ unsigned int base_dest_nasid:15; /* nasid of the first bit */
+ /* bits 14:0 */ /* in uvhub map */
+ unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
+ /* bits 19:15 */
+ unsigned int rsvd_1:1; /* must be zero */
+ /* bit 20 */
+ /* Address bits 59:21 */
+ /* bits 25:2 of address (44:21) are payload */
+ /* these next 24 bits become bytes 12-14 of msg */
+ /* bits 28:21 land in byte 12 */
+ unsigned int replied_to:1; /* sent as 0 by the source to
+ byte 12 */
+ /* bit 21 */
+ unsigned int msg_type:3; /* software type of the
+ message */
+ /* bits 24:22 */
+ unsigned int canceled:1; /* message canceled, resource
+ is to be freed*/
+ /* bit 25 */
+ unsigned int payload_1:3; /* not currently used */
+ /* bits 28:26 */
+
+ /* bits 36:29 land in byte 13 */
+ unsigned int payload_2a:3; /* not currently used */
+ unsigned int payload_2b:5; /* not currently used */
+ /* bits 36:29 */
+
+ /* bits 44:37 land in byte 14 */
+ unsigned int payload_3:8; /* not currently used */
+ /* bits 44:37 */
+
+ unsigned int rsvd_2:7; /* reserved */
+ /* bits 51:45 */
+ unsigned int swack_flag:1; /* software acknowledge flag */
+ /* bit 52 */
+ unsigned int rsvd_3a:3; /* must be zero */
+ unsigned int rsvd_3b:8; /* must be zero */
+ unsigned int rsvd_3c:8; /* must be zero */
+ unsigned int rsvd_3d:3; /* must be zero */
+ /* bits 74:53 */
+ unsigned int fairness:3; /* usually zero */
+ /* bits 77:75 */
+
+ unsigned int sequence:16; /* message sequence number */
+ /* bits 93:78 Suppl_A */
+ unsigned int chaining:1; /* next descriptor is part of
+ this activation*/
+ /* bit 94 */
+ unsigned int multilevel:1; /* multi-level multicast
+ format */
+ /* bit 95 */
+ unsigned int rsvd_4:24; /* ordered / source node /
+ source subnode / aging
+ must be zero */
+ /* bits 119:96 */
+ unsigned int command:8; /* message type */
+ /* bits 127:120 */
+};
+
+/*
* The activation descriptor:
* The format of the message to send, plus all accompanying control
* Should be 64 bytes
*/
struct bau_desc {
- struct pnmask distribution;
+ struct pnmask distribution;
/*
* message template, consisting of header and payload:
*/
- struct bau_msg_header header;
- struct bau_msg_payload payload;
+ union bau_msg_header {
+ struct uv1_bau_msg_header uv1_hdr;
+ struct uv2_bau_msg_header uv2_hdr;
+ } header;
+
+ struct bau_msg_payload payload;
};
-/*
+/* UV1:
* -payload-- ---------header------
* bytes 0-11 bits 41-56 bits 58-81
* A B (2) C (3)
@@ -340,6 +409,16 @@ struct bau_desc {
* bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
* ------------payload queue-----------
*/
+/* UV2:
+ * -payload-- ---------header------
+ * bytes 0-11 bits 70-78 bits 21-44
+ * A B (2) C (3)
+ *
+ * A/B/C are moved to:
+ * A C B
+ * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
+ * ------------payload queue-----------
+ */
/*
* The payload queue on the destination side is an array of these.
@@ -385,7 +464,6 @@ struct bau_pq_entry {
struct msg_desc {
struct bau_pq_entry *msg;
int msg_slot;
- int swack_slot;
struct bau_pq_entry *queue_first;
struct bau_pq_entry *queue_last;
};
@@ -405,6 +483,7 @@ struct ptc_stats {
requests */
unsigned long s_stimeout; /* source side timeouts */
unsigned long s_dtimeout; /* destination side timeouts */
+ unsigned long s_strongnacks; /* number of strong nack's */
unsigned long s_time; /* time spent in sending side */
unsigned long s_retriesok; /* successful retries */
unsigned long s_ntargcpu; /* total number of cpu's
@@ -439,6 +518,9 @@ struct ptc_stats {
unsigned long s_retry_messages; /* retry broadcasts */
unsigned long s_bau_reenabled; /* for bau enable/disable */
unsigned long s_bau_disabled; /* for bau enable/disable */
+ unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
+ unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
+ unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
/* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this
cpu were flushed */
@@ -511,9 +593,12 @@ struct bau_control {
short osnode;
short uvhub_cpu;
short uvhub;
+ short uvhub_version;
short cpus_in_socket;
short cpus_in_uvhub;
short partition_base_pnode;
+ short using_desc; /* an index, like uvhub_cpu */
+ unsigned int inuse_map;
unsigned short message_number;
unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE];
@@ -531,6 +616,7 @@ struct bau_control {
int cong_response_us;
int cong_reps;
int cong_period;
+ unsigned long clocks_per_100_usec;
cycles_t period_time;
long period_requests;
struct hub_and_pnode *thp;
@@ -591,6 +677,11 @@ static inline void write_mmr_sw_ack(unsigned long mr)
uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
}
+static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
+{
+ write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
+}
+
static inline unsigned long read_mmr_sw_ack(void)
{
return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 54a13aae..21f7385 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -318,13 +318,13 @@ uv_gpa_in_mmr_space(unsigned long gpa)
/* UV global physical address --> socket phys RAM */
static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
{
- unsigned long paddr = gpa & uv_hub_info->gpa_mask;
+ unsigned long paddr;
unsigned long remap_base = uv_hub_info->lowmem_remap_base;
unsigned long remap_top = uv_hub_info->lowmem_remap_top;
gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
- gpa = gpa & uv_hub_info->gpa_mask;
+ paddr = gpa & uv_hub_info->gpa_mask;
if (paddr >= remap_base && paddr < remap_base + remap_top)
paddr -= remap_base;
return paddr;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8baca3c..532d2e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
-obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
+obj-y += syscall_$(BITS).o
+obj-$(CONFIG_X86_64) += vsyscall_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
@@ -68,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
@@ -80,6 +82,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
+obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f3..406ed77 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 8c3cdde..359b689 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -180,6 +180,7 @@ static struct apic apic_flat = {
.name = "flat",
.probe = flat_probe,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
@@ -337,6 +338,7 @@ static struct apic apic_physflat = {
.name = "physical flat",
.probe = physflat_probe,
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 775b82b..634ae6c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -124,6 +124,7 @@ struct apic apic_noop = {
.probe = noop_probe,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 09d3d8c..d9ea5f3 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,6 +56,12 @@ static unsigned int read_xapic_id(void)
return get_apic_id(apic_read(APIC_ID));
}
+static int numachip_apic_id_valid(int apicid)
+{
+ /* Trust what bootloader passes in MADT */
+ return 1;
+}
+
static int numachip_apic_id_registered(void)
{
return physid_isset(read_xapic_id(), phys_cpu_present_map);
@@ -223,10 +229,11 @@ static int __init numachip_system_init(void)
}
early_initcall(numachip_system_init);
-static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __cpuinit numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strncmp(oem_id, "NUMASC", 6)) {
numachip_system = 1;
+ setup_force_cpu_cap(X86_FEATURE_X2APIC);
return 1;
}
@@ -238,6 +245,7 @@ static struct apic apic_numachip __refconst = {
.name = "NumaConnect system",
.probe = numachip_probe,
.acpi_madt_oem_check = numachip_acpi_madt_oem_check,
+ .apic_id_valid = numachip_apic_id_valid,
.apic_id_registered = numachip_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 521bead..0cdec70 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -198,6 +198,7 @@ static struct apic apic_bigsmp = {
.name = "bigsmp",
.probe = probe_bigsmp,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = bigsmp_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5d513bc..e42d1d3b9 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -625,6 +625,7 @@ static struct apic __refdata apic_es7000_cluster = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
@@ -690,6 +691,7 @@ static struct apic __refdata apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fb07275..6d10a66 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3967,18 +3967,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
- "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+ pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+ MAX_IO_APICS, nr_ioapics);
return 1;
}
if (!address) {
- printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
- " found in table, skipping!\n");
+ pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
return 1;
}
return 0;
}
+static __init int bad_ioapic_register(int idx)
+{
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+
+ reg_00.raw = io_apic_read(idx, 0);
+ reg_01.raw = io_apic_read(idx, 1);
+ reg_02.raw = io_apic_read(idx, 2);
+
+ if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
+ pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
+ mpc_ioapic_addr(idx));
+ return 1;
+ }
+
+ return 0;
+}
+
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
int idx = 0;
@@ -3995,6 +4013,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
ioapics[idx].mp_config.apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+
+ if (bad_ioapic_register(idx)) {
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ return;
+ }
+
ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
@@ -4015,10 +4039,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
if (gsi_cfg->gsi_end >= gsi_top)
gsi_top = gsi_cfg->gsi_end + 1;
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
- "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
- mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
- gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+ pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
+ idx, mpc_ioapic_id(idx),
+ mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+ gsi_cfg->gsi_base, gsi_cfg->gsi_end);
nr_ioapics++;
}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c4a61ca..00d2422 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -478,6 +478,7 @@ static struct apic __refdata apic_numaq = {
.name = "NUMAQ",
.probe = probe_numaq,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = numaq_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0787bb3..ff2c1b9 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -92,6 +92,7 @@ static struct apic apic_default = {
.name = "default",
.probe = probe_default,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 1911442..fea000b 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -496,6 +496,7 @@ static struct apic apic_summit = {
.name = "summit",
.probe = probe_summit,
.acpi_madt_oem_check = summit_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = summit_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 5007958..9193713 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -213,6 +213,7 @@ static struct apic apic_x2apic_cluster = {
.name = "cluster x2apic",
.probe = x2apic_cluster_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373df..bcd1db6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -119,6 +119,7 @@ static struct apic apic_x2apic_phys = {
.name = "physical x2apic",
.probe = x2apic_phys_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9d59bba..fc47714 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -351,6 +351,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.name = "UV large system",
.probe = uv_probe,
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = uv_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
@@ -769,7 +770,12 @@ void __init uv_system_init(void)
for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
uv_possible_blades +=
hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
- printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+ /* uv_num_possible_blades() is really the hub count */
+ printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
+ is_uv1_hub() ? uv_num_possible_blades() :
+ (uv_num_possible_blades() + 1) / 2,
+ uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kzalloc(bytes, GFP_KERNEL);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index a46bd38..5d56931 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -383,21 +383,21 @@ static int ignore_sys_suspend;
static int ignore_normal_resume;
static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
-static int debug __read_mostly;
-static int smp __read_mostly;
+static bool debug __read_mostly;
+static bool smp __read_mostly;
static int apm_disabled = -1;
#ifdef CONFIG_SMP
-static int power_off;
+static bool power_off;
#else
-static int power_off = 1;
+static bool power_off = 1;
#endif
-static int realmode_power_off;
+static bool realmode_power_off;
#ifdef CONFIG_APM_ALLOW_INTS
-static int allow_ints = 1;
+static bool allow_ints = 1;
#else
-static int allow_ints;
+static bool allow_ints;
#endif
-static int broken_psr;
+static bool broken_psr;
static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
@@ -1234,8 +1234,7 @@ static int suspend(int vetoable)
struct apm_user *as;
dpm_suspend_start(PMSG_SUSPEND);
-
- dpm_suspend_noirq(PMSG_SUSPEND);
+ dpm_suspend_end(PMSG_SUSPEND);
local_irq_disable();
syscore_suspend();
@@ -1259,9 +1258,9 @@ static int suspend(int vetoable)
syscore_resume();
local_irq_enable();
- dpm_resume_noirq(PMSG_RESUME);
-
+ dpm_resume_start(PMSG_RESUME);
dpm_resume_end(PMSG_RESUME);
+
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1276,7 @@ static void standby(void)
{
int err;
- dpm_suspend_noirq(PMSG_SUSPEND);
+ dpm_suspend_end(PMSG_SUSPEND);
local_irq_disable();
syscore_suspend();
@@ -1291,7 +1290,7 @@ static void standby(void)
syscore_resume();
local_irq_enable();
- dpm_resume_noirq(PMSG_RESUME);
+ dpm_resume_start(PMSG_RESUME);
}
static apm_event_t get_event(void)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 4f13faf..68de2dc 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -67,4 +67,6 @@ void common(void) {
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+ OFFSET(BP_pref_address, boot_params, hdr.pref_address);
+ OFFSET(BP_code32_start, boot_params, hdr.code32_start);
}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 395a10e..85d98ab 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -3,6 +3,11 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
+#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+static char syscalls[] = {
+#include <asm/syscalls_32.h>
+};
+
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
@@ -76,4 +81,7 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
+ BLANK();
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ DEFINE(NR_syscalls, sizeof(syscalls));
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72a119..834e897 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,11 +1,12 @@
#include <asm/ia32.h>
-#define __NO_STUBS 1
-#undef __SYSCALL
-#undef _ASM_X86_UNISTD_64_H
-#define __SYSCALL(nr, sym) [nr] = 1,
-static char syscalls[] = {
-#include <asm/unistd.h>
+#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
+static char syscalls_64[] = {
+#include <asm/syscalls_64.h>
+};
+#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+static char syscalls_ia32[] = {
+#include <asm/syscalls_32.h>
};
int main(void)
@@ -72,7 +73,11 @@ int main(void)
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
+ DEFINE(NR_syscalls, sizeof(syscalls_64));
+
+ DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1);
+ DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
return 0;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 25f24dc..6ab6aa2 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o sched.o mshyperv.o
obj-y += rdrand.o
+obj-y += match.o
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4..0a44b90 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/io.h>
+#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ if (!check_tsc_unstable())
+ sched_clock_stable = 1;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 850f296..ade9c79 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
@@ -933,7 +934,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = {
{ 0xc0011000, 0xc001103b},
};
-static void __cpuinit print_cpu_msr(void)
+static void __cpuinit __print_cpu_msr(void)
{
unsigned index_min, index_max;
unsigned index;
@@ -997,13 +998,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
else
printk(KERN_CONT "\n");
-#ifdef CONFIG_SMP
+ __print_cpu_msr();
+}
+
+void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
+{
if (c->cpu_index < show_msr)
- print_cpu_msr();
-#else
- if (show_msr)
- print_cpu_msr();
-#endif
+ __print_cpu_msr();
}
static __init int setup_disablecpuid(char *arg)
@@ -1021,6 +1022,8 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) nmi_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
@@ -1042,6 +1045,8 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+
/*
* Special IST stacks which the CPU switches to when it calls
* an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1085,10 +1090,31 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
+static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
+DEFINE_PER_CPU(int, debug_stack_usage);
+
+int is_debug_stack(unsigned long addr)
+{
+ return __get_cpu_var(debug_stack_usage) ||
+ (addr <= __get_cpu_var(debug_stack_addr) &&
+ addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
+}
+
+void debug_stack_set_zero(void)
+{
+ load_idt((const struct desc_ptr *)&nmi_idt_descr);
+}
+
+void debug_stack_reset(void)
+{
+ load_idt((const struct desc_ptr *)&idt_descr);
+}
+
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
@@ -1212,6 +1238,8 @@ void __cpuinit cpu_init(void)
estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
+ if (v == DEBUG_STACK-1)
+ per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
}
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b45e5e..73d08ed 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
- int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
int node;
@@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+
+static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- unsigned long num_threads_sharing;
- int index_msb, i, sibling;
+ struct _cpuid4_info *this_leaf;
+ int ret, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+ ret = 0;
+ if (index == 3) {
+ ret = 1;
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
@@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- return;
+ } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
+ ret = 1;
+ for_each_cpu(i, cpu_sibling_mask(cpu)) {
+ if (!per_cpu(ici_cpuid4_info, i))
+ continue;
+ this_leaf = CPUID4_INFO_IDX(i, index);
+ for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
+ }
}
+
+ return ret;
+}
+
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ unsigned long num_threads_sharing;
+ int index_msb, i;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (cache_shared_amd_cpu_map_setup(cpu, index))
+ return;
+ }
+
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
new file mode 100644
index 0000000..5502b28
--- /dev/null
+++ b/arch/x86/kernel/cpu/match.c
@@ -0,0 +1,91 @@
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/**
+ * x86_match_cpu - match current CPU again an array of x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ *
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL. The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
+ *
+ * Arrays used to match for this should also be declared using
+ * MODULE_DEVICE_TABLE(x86_cpu, ...)
+ *
+ * This always matches against the boot cpu, assuming models and features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
+{
+ const struct x86_cpu_id *m;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+ if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
+ continue;
+ if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
+ continue;
+ if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
+ continue;
+ if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
+ continue;
+ return m;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu);
+
+ssize_t arch_print_cpu_modalias(struct device *dev,
+ struct device_attribute *attr,
+ char *bufptr)
+{
+ int size = PAGE_SIZE;
+ int i, n;
+ char *buf = bufptr;
+
+ n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
+ "model:%04X:feature:",
+ boot_cpu_data.x86_vendor,
+ boot_cpu_data.x86,
+ boot_cpu_data.x86_model);
+ size -= n;
+ buf += n;
+ size -= 1;
+ for (i = 0; i < NCAPINTS*32; i++) {
+ if (boot_cpu_has(i)) {
+ n = snprintf(buf, size, ",%04X", i);
+ if (n >= size) {
+ WARN(1, "x86 features overflow page\n");
+ break;
+ }
+ size -= n;
+ buf += n;
+ }
+ }
+ *buf++ = '\n';
+ return buf - bufptr;
+}
+
+int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf) {
+ arch_print_cpu_modalias(NULL, NULL, buf);
+ add_uevent_var(env, "MODALIAS=%s", buf);
+ kfree(buf);
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f..0c82091 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,7 +54,14 @@ static struct severity {
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
#define MCACOD 0xffff
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
@@ -102,11 +109,24 @@ static struct severity {
SER, BITCLR(MCI_STATUS_S)
),
- /* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
+
+ /* known AR MCACODs: */
+#ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+ KEEP, "HT thread notices Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ MCGMASK(MCG_STATUS_EIPV, 0)
+ ),
+ MCESEV(
+ AR, "Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ USER
+ ),
+#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
@@ -115,11 +135,11 @@ static struct severity {
/* known AO MCACODs: */
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
),
MCESEV(
SOME, "Action optional: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f22a9f7..d086a09 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
{
unsigned int next, i, prev = 0;
- next = rcu_dereference_check_mce(mcelog.next);
+ next = ACCESS_ONCE(mcelog.next);
do {
struct mce *m;
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&__get_cpu_var(mce_irq_work));
}
+/*
+ * Read ADDR and MISC registers.
+ */
+static void mce_read_aux(struct mce *m, int i)
+{
+ if (m->status & MCI_STATUS_MISCV)
+ m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+
+ /*
+ * Mask the reported address by the reported granularity.
+ */
+ if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ u8 shift = MCI_MISC_ADDR_LSB(m->misc);
+ m->addr >>= shift;
+ m->addr <<= shift;
+ }
+ }
+}
+
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define MCE_INFO_MAX 16
+
+struct mce_info {
+ atomic_t inuse;
+ struct task_struct *t;
+ __u64 paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+ if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+ mi->t = current;
+ mi->paddr = addr;
+ return;
+ }
+ }
+
+ mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+ if (atomic_read(&mi->inuse) && mi->t == current)
+ return mi;
+ return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+ atomic_set(&mi->inuse, 0);
+}
+
+/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
barrier();
/*
- * When no restart IP must always kill or panic.
+ * When no restart IP might need to kill or panic.
+ * Assume the worst for now, but if we find the
+ * severity is MCE_AR_SEVERITY we have other options.
*/
if (!(m.mcgstatus & MCG_STATUS_RIPV))
kill_it = 1;
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue;
}
- /*
- * Kill on action required.
- */
- if (severity == MCE_AR_SEVERITY)
- kill_it = 1;
-
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
}
+ /* mce_clear_state will clear *final, save locally for use later */
+ m = *final;
+
if (!no_way_out)
mce_clear_state(toclear);
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
no_way_out = worst >= MCE_PANIC_SEVERITY;
/*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- *
- * This is mainly used in the case when the system doesn't
- * support MCE broadcasting or it has been disabled.
+ * At insane "tolerant" levels we take no action. Otherwise
+ * we only die if we have no other choice. For less serious
+ * issues we try to recover, or limit damage to the current
+ * process.
*/
- if (no_way_out && tolerant < 3)
- mce_panic("Fatal machine check on current CPU", final, msg);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
- */
-
- if (kill_it && tolerant < 3)
- force_sig(SIGBUS, current);
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
+ if (tolerant < 3) {
+ if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
+ if (worst == MCE_AR_SEVERITY) {
+ /* schedule action before return to userland */
+ mce_save_info(m.addr);
+ set_thread_flag(TIF_MCE_NOTIFY);
+ } else if (kill_it) {
+ force_sig(SIGBUS, current);
+ }
+ }
if (worst > 0)
mce_report_event(regs);
@@ -1094,34 +1146,57 @@ out:
}
EXPORT_SYMBOL_GPL(do_machine_check);
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+#ifndef CONFIG_MEMORY_FAILURE
+int memory_failure(unsigned long pfn, int vector, int flags)
{
- printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+ /* mce_severity() should not hand us an ACTION_REQUIRED error */
+ BUG_ON(flags & MF_ACTION_REQUIRED);
+ printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+
+ return 0;
}
+#endif
/*
- * Called after mce notification in process context. This code
- * is allowed to sleep. Call the high level VM handler to process
- * any corrupted pages.
- * Assume that the work queue code only calls this one at a time
- * per CPU.
- * Note we don't disable preemption, so this code might run on the wrong
- * CPU. In this case the event is picked up by the scheduled work queue.
- * This is merely a fast path to expedite processing in some common
- * cases.
+ * Called in process context that interrupted by MCE and marked with
+ * TIF_MCE_NOTIFY, just before returning to erroneous userland.
+ * This code is allowed to sleep.
+ * Attempt possible recovery such as calling the high level VM handler to
+ * process any corrupted pages, and kill/signal current process if required.
+ * Action required errors are handled here.
*/
void mce_notify_process(void)
{
unsigned long pfn;
- mce_notify_irq();
- while (mce_ring_get(&pfn))
- memory_failure(pfn, MCE_VECTOR);
+ struct mce_info *mi = mce_find_info();
+
+ if (!mi)
+ mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
+ pfn = mi->paddr >> PAGE_SHIFT;
+
+ clear_thread_flag(TIF_MCE_NOTIFY);
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx",
+ mi->paddr);
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ pr_err("Memory error not recovered");
+ force_sig(SIGBUS, current);
+ }
+ mce_clear_info(mi);
}
+/*
+ * Action optional processing happens here (picking up
+ * from the list of faulting pages that do_machine_check()
+ * placed into the "ring").
+ */
static void mce_process_work(struct work_struct *dummy)
{
- mce_notify_process();
+ unsigned long pfn;
+
+ while (mce_ring_get(&pfn))
+ memory_failure(pfn, MCE_VECTOR, 0);
}
#ifdef CONFIG_X86_MCE_INTEL
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
/* Not more than two messages every minute */
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
- clear_thread_flag(TIF_MCE_NOTIFY);
-
if (test_and_clear_bit(0, &mce_need_notify)) {
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -1541,6 +1614,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
/* Error or no more MCE record */
if (rc <= 0) {
mce_apei_read_done = 1;
+ /*
+ * When ERST is disabled, mce_chrdev_read() should return
+ * "no record" instead of "no device."
+ */
+ if (rc == -ENODEV)
+ return 0;
return rc;
}
rc = -EFAULT;
@@ -1859,7 +1938,7 @@ static struct bus_type mce_subsys = {
.dev_name = "machinecheck",
};
-DEFINE_PER_CPU(struct device, mce_device);
+DEFINE_PER_CPU(struct device *, mce_device);
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2001,19 +2080,27 @@ static struct device_attribute *mce_device_attrs[] = {
static cpumask_var_t mce_device_initialized;
+static void mce_device_release(struct device *dev)
+{
+ kfree(dev);
+}
+
/* Per cpu device init. All of the cpus still share the same ctrl bank: */
static __cpuinit int mce_device_create(unsigned int cpu)
{
- struct device *dev = &per_cpu(mce_device, cpu);
+ struct device *dev;
int err;
int i, j;
if (!mce_available(&boot_cpu_data))
return -EIO;
- memset(&dev->kobj, 0, sizeof(struct kobject));
+ dev = kzalloc(sizeof *dev, GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
dev->id = cpu;
dev->bus = &mce_subsys;
+ dev->release = &mce_device_release;
err = device_register(dev);
if (err)
@@ -2030,6 +2117,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
goto error2;
}
cpumask_set_cpu(cpu, mce_device_initialized);
+ per_cpu(mce_device, cpu) = dev;
return 0;
error2:
@@ -2046,7 +2134,7 @@ error:
static __cpuinit void mce_device_remove(unsigned int cpu)
{
- struct device *dev = &per_cpu(mce_device, cpu);
+ struct device *dev = per_cpu(mce_device, cpu);
int i;
if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2060,6 +2148,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
+ per_cpu(mce_device, cpu) = NULL;
}
/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ba0b94a..99b5717 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,10 +523,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
+ struct device *dev = per_cpu(mce_device, cpu);
char name[32];
sprintf(name, "threshold_bank%i", bank);
+#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
i = cpumask_first(cpu_llc_shared_mask(cpu));
@@ -543,8 +545,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (!b)
goto out;
- err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
- b->kobj, name);
+ err = sysfs_create_link(&dev->kobj, b->kobj, name);
if (err)
goto out;
@@ -553,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
+#endif
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
if (!b) {
@@ -565,7 +567,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
- b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
+ b->kobj = kobject_create_and_add(name, &dev->kobj);
if (!b->kobj)
goto out_free;
@@ -585,8 +587,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;
- err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
- b->kobj, name);
+ dev = per_cpu(mce_device, i);
+ if (dev)
+ err = sysfs_create_link(&dev->kobj,b->kobj, name);
if (err)
goto out;
@@ -649,6 +652,7 @@ static void deallocate_threshold_block(unsigned int cpu,
static void threshold_remove_bank(unsigned int cpu, int bank)
{
struct threshold_bank *b;
+ struct device *dev;
char name[32];
int i = 0;
@@ -663,7 +667,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name);
+ dev = per_cpu(mce_device, cpu);
+ sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
@@ -675,7 +680,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;
- sysfs_remove_link(&per_cpu(mce_device, i).kobj, name);
+ dev = per_cpu(mce_device, i);
+ if (dev)
+ sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce10..0a18d16 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
+#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/timer.h>
#include "perf_event.h"
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+ u64 m = event->attr.branch_sample_type;
+ u64 b = 0;
+
+ /* must capture all branches */
+ if (!(m & PERF_SAMPLE_BRANCH_ANY))
+ return 0;
+
+ m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_user)
+ b |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+ /*
+ * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+ */
+
+ return m == b;
+}
+
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
+ /*
+ * check that PEBS LBR correction does not conflict with
+ * whatever the user is asking with attr->branch_sample_type
+ */
+ if (event->attr.precise_ip > 1) {
+ u64 *br_type = &event->attr.branch_sample_type;
+
+ if (has_branch_stack(event)) {
+ if (!precise_br_compat(event))
+ return -EOPNOTSUPP;
+
+ /* branch_sample_type is compatible */
+
+ } else {
+ /*
+ * user did not specify branch_sample_type
+ *
+ * For PEBS fixups, we capture all
+ * the branches at the priv level of the
+ * event.
+ */
+ *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+ if (!event->attr.exclude_user)
+ *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+ }
+ }
}
/*
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ /* mark not used */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
return x86_pmu.hw_config(event);
}
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
+ if (x86_pmu.attr_rdpmc)
+ set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void)
}
}
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+ idx -= X86_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+ bool enable = !!(unsigned long)info;
+
+ if (enable)
+ set_in_cr4(X86_CR4_PCE);
+ else
+ clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul(buf, NULL, 0);
+
+ if (!!val != !!x86_pmu.attr_rdpmc) {
+ x86_pmu.attr_rdpmc = !!val;
+ smp_call_function(change_rdpmc, (void *)val, 1);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ NULL,
+};
+
+static void x86_pmu_flush_branch_stack(void)
+{
+ if (x86_pmu.flush_branch_stack)
+ x86_pmu.flush_branch_stack();
+}
+
static struct pmu pmu = {
- .pmu_enable = x86_pmu_enable,
- .pmu_disable = x86_pmu_disable,
+ .pmu_enable = x86_pmu_enable,
+ .pmu_disable = x86_pmu_disable,
+
+ .attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
- .add = x86_pmu_add,
- .del = x86_pmu_del,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
+ .add = x86_pmu_add,
+ .del = x86_pmu_del,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
+ .flush_branch_stack = x86_pmu_flush_branch_stack,
};
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ return;
+
+ userpg->time_mult = this_cpu_read(cyc2ns);
+ userpg->time_shift = CYC2NS_SCALE_FACTOR;
+ userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
/*
* callchain support
*/
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062..8484e77 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ struct er_account *lbr_sel;
+ u64 br_sel;
/*
* Intel host/guest exclude bits
@@ -147,7 +150,9 @@ struct cpu_hw_events {
/*
* AMD specific bits
*/
- struct amd_nb *amd_nb;
+ struct amd_nb *amd_nb;
+ /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+ u64 perf_ctr_virt_mask;
void *kfree_on_online;
};
@@ -266,6 +271,29 @@ struct x86_pmu_quirk {
void (*func)(void);
};
+union x86_pmu_config {
+ struct {
+ u64 event:8,
+ umask:8,
+ usr:1,
+ os:1,
+ edge:1,
+ pc:1,
+ interrupt:1,
+ __reserved1:1,
+ en:1,
+ inv:1,
+ cmask:8,
+ event2:4,
+ __reserved2:4,
+ go:1,
+ ho:1;
+ } bits;
+ u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -307,10 +335,19 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc;
+
+ /*
+ * CPU Hotplug hooks
+ */
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
+ void (*flush_branch_stack)(void);
/*
* Intel Arch Perfmon v2+
@@ -332,6 +369,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+ u64 lbr_sel_mask; /* LBR_SELECT valid bits */
+ const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
@@ -417,9 +456,11 @@ void x86_pmu_disable_all(void);
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
+ u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+
if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
- wrmsrl(hwc->config_base, hwc->config | enable_mask);
+ wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
}
void x86_pmu_enable_all(int added);
@@ -443,6 +484,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+ return ip > PAGE_OFFSET;
+#else
+ return (long)ip < 0;
+#endif
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@@ -523,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
+void intel_pmu_lbr_init_snb(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
int p4_pmu_init(void);
int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 0397b23..dd002fa 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -138,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -357,7 +361,9 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- if (boot_cpu_data.x86_max_cores < 2)
+ cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+
+ if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
return;
nb_id = amd_get_nb_id(cpu);
@@ -587,9 +593,9 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.put_event_constraints = amd_put_event_constraints,
.cpu_prepare = amd_pmu_cpu_prepare,
- .cpu_starting = amd_pmu_cpu_starting,
.cpu_dead = amd_pmu_cpu_dead,
#endif
+ .cpu_starting = amd_pmu_cpu_starting,
};
__init int amd_pmu_init(void)
@@ -621,3 +627,33 @@ __init int amd_pmu_init(void)
return 0;
}
+
+void amd_pmu_enable_virt(void)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ cpuc->perf_ctr_virt_mask = 0;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
+
+void amd_pmu_disable_virt(void)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ /*
+ * We only mask out the Host-only bit so that host-only counting works
+ * when SVM is disabled. If someone sets up a guest-only counter when
+ * SVM is disabled the Guest-only bits still gets set and the counter
+ * will not count anything.
+ */
+ cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bd..6a84e7f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
#define NHM_LOCAL_DRAM (1 << 14)
#define NHM_NON_DRAM (1 << 15)
-#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE (NHM_REMOTE_DRAM)
#define NHM_DMND_READ (NHM_DMND_DATA_RD)
#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
},
},
};
@@ -727,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+ /* user explicitly requested branch sampling */
+ if (has_branch_stack(event))
+ return true;
+
+ /* implicit branch sampling to correct PEBS skid */
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ return true;
+
+ return false;
+}
+
static void intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ /*
+ * must disable before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_disable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -935,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config);
return;
}
+ /*
+ * must enabled before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1084,9 @@ again:
data.period = event->hw.last_period;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1123,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+ struct perf_event *event,
+ struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
- struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
- return &unconstrained;
+ return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1186,10 @@ again:
reg->alloc = 1;
/*
- * All events using extra_reg are unconstrained.
- * Avoids calling x86_get_event_constraints()
- *
- * Must revisit if extra_reg controlling events
- * ever have constraints. Worst case we go through
- * the regular event constraint table.
+ * need to call x86_get_event_constraint()
+ * to check if associated event has constraints
*/
- c = &unconstrained;
+ c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -1200,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = NULL;
-
- if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
- c = __intel_shared_reg_get_constraints(cpuc, event);
-
+ struct event_constraint *c = NULL, *d;
+ struct hw_perf_event_extra *xreg, *breg;
+
+ xreg = &event->hw.extra_reg;
+ if (xreg->idx != EXTRA_REG_NONE) {
+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+ if (c == &emptyconstraint)
+ return c;
+ }
+ breg = &event->hw.branch_reg;
+ if (breg->idx != EXTRA_REG_NONE) {
+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+ if (d == &emptyconstraint) {
+ __intel_shared_reg_put_constraints(cpuc, xreg);
+ c = d;
+ }
+ }
return c;
}
@@ -1252,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
+
+ reg = &event->hw.branch_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1287,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
*
* Thereby we gain a PEBS capable cycle counter.
*/
- u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
+ if (intel_pmu_needs_lbr_smpl(event)) {
+ ret = intel_pmu_setup_lbr_filter(event);
+ if (ret)
+ return ret;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -1431,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- if (!x86_pmu.extra_regs)
+ if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
- if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+ cpuc->lbr_sel = NULL;
+
+ if (!cpuc->shared_regs)
return;
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
- struct intel_shared_regs *pc;
+ if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_shared_regs *pc;
- pc = per_cpu(cpu_hw_events, i).shared_regs;
- if (pc && pc->core_id == core_id) {
- cpuc->kfree_on_online = cpuc->shared_regs;
- cpuc->shared_regs = pc;
- break;
+ pc = per_cpu(cpu_hw_events, i).shared_regs;
+ if (pc && pc->core_id == core_id) {
+ cpuc->kfree_on_online = cpuc->shared_regs;
+ cpuc->shared_regs = pc;
+ break;
+ }
}
+ cpuc->shared_regs->core_id = core_id;
+ cpuc->shared_regs->refcnt++;
}
- cpuc->shared_regs->core_id = core_id;
- cpuc->shared_regs->refcnt++;
+ if (x86_pmu.lbr_sel_map)
+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
@@ -1486,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
+static void intel_pmu_flush_branch_stack(void)
+{
+ /*
+ * Intel LBR does not tag entries with the
+ * PID of the current task, then we need to
+ * flush it on ctxsw
+ * For now, we simply reset it
+ */
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_reset();
+}
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1513,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
+ .flush_branch_stack = intel_pmu_flush_branch_stack,
};
static __init void intel_clovertown_quirk(void)
@@ -1689,9 +1757,11 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_nehalem_extra_regs;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
x86_add_quirk(intel_nehalem_quirk);
@@ -1726,9 +1796,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
pr_cont("Westmere events, ");
break;
@@ -1739,7 +1811,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- intel_pmu_lbr_init_nhm();
+ intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1749,9 +1821,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
pr_cont("SandyBridge events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 73da6b6..7f64df1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/perf_event.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -439,10 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
- WARN_ON_ONCE(cpuc->enabled);
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_enable(event);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -455,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_disable(event);
}
void intel_pmu_pebs_enable_all(void)
@@ -476,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
-#include <asm/insn.h>
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
- return ip > PAGE_OFFSET;
-#else
- return (long)ip < 0;
-#endif
-}
-
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -573,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this
* routine.
*/
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
@@ -603,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 3fab3de..520b426 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -14,6 +15,100 @@ enum {
};
/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT 2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT 5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
+#define LBR_FAR_BIT 8 /* do not capture far branches */
+
+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
+#define LBR_USER (1 << LBR_USER_BIT)
+#define LBR_JCC (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR (1 << LBR_FAR_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP -1 /* LBR filter not supported */
+#define LBR_IGN 0 /* ignored */
+
+#define LBR_ANY \
+ (LBR_JCC |\
+ LBR_REL_CALL |\
+ LBR_IND_CALL |\
+ LBR_RETURN |\
+ LBR_REL_JMP |\
+ LBR_IND_JMP |\
+ LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+
+#define for_each_branch_sample_type(x) \
+ for ((x) = PERF_SAMPLE_BRANCH_USER; \
+ (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+
+#define X86_BR_ANY \
+ (X86_BR_CALL |\
+ X86_BR_RET |\
+ X86_BR_SYSCALL |\
+ X86_BR_SYSRET |\
+ X86_BR_INT |\
+ X86_BR_IRET |\
+ X86_BR_JCC |\
+ X86_BR_JMP |\
+ X86_BR_IRQ |\
+ X86_BR_IND_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL \
+ (X86_BR_CALL |\
+ X86_BR_IND_CALL |\
+ X86_BR_SYSCALL |\
+ X86_BR_IRQ |\
+ X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
*/
@@ -21,6 +116,10 @@ enum {
static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ if (cpuc->lbr_sel)
+ wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -72,17 +171,15 @@ void intel_pmu_lbr_enable(struct perf_event *event)
if (!x86_pmu.lbr_nr)
return;
- WARN_ON_ONCE(cpuc->enabled);
-
/*
* Reset the LBR stack if we changed task context to
* avoid data leaks.
*/
-
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
intel_pmu_lbr_reset();
cpuc->lbr_context = event->ctx;
}
+ cpuc->br_sel = event->hw.branch_reg.reg;
cpuc->lbr_users++;
}
@@ -97,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
- if (cpuc->enabled && !cpuc->lbr_users)
+ if (cpuc->enabled && !cpuc->lbr_users) {
__intel_pmu_lbr_disable();
+ /* avoid stale pointer */
+ cpuc->lbr_context = NULL;
+ }
}
void intel_pmu_lbr_enable_all(void)
@@ -117,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
__intel_pmu_lbr_disable();
}
+/*
+ * TOS = most recently recorded branch
+ */
static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;
@@ -144,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].flags = 0;
+ cpuc->lbr_entries[i].from = msr_lastbranch.from;
+ cpuc->lbr_entries[i].to = msr_lastbranch.to;
+ cpuc->lbr_entries[i].mispred = 0;
+ cpuc->lbr_entries[i].predicted = 0;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-
/*
* Due to lack of segmentation in Linux the effective address (offset)
* is the same as the linear address, allowing us to merge the LIP and EIP
@@ -167,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, flags = 0;
+ u64 from, to, mis = 0, pred = 0;
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
- flags = !!(from & LBR_FROM_FLAG_MISPRED);
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
from = (u64)((((s64)from) << 1) >> 1);
}
- cpuc->lbr_entries[i].from = from;
- cpuc->lbr_entries[i].to = to;
- cpuc->lbr_entries[i].flags = flags;
+ cpuc->lbr_entries[i].from = from;
+ cpuc->lbr_entries[i].to = to;
+ cpuc->lbr_entries[i].mispred = mis;
+ cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
@@ -195,28 +301,404 @@ void intel_pmu_lbr_read(void)
intel_pmu_lbr_read_32(cpuc);
else
intel_pmu_lbr_read_64(cpuc);
+
+ intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+ u64 br_type = event->attr.branch_sample_type;
+ int mask = 0;
+
+ if (br_type & PERF_SAMPLE_BRANCH_USER)
+ mask |= X86_BR_USER;
+
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ mask |= X86_BR_KERNEL;
+
+ /* we ignore BRANCH_HV here */
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ mask |= X86_BR_ANY;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ mask |= X86_BR_ANY_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ mask |= X86_BR_IND_CALL;
+ /*
+ * stash actual user request into reg, it may
+ * be used by fixup code for some CPU
+ */
+ event->hw.branch_reg.reg = mask;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+ u64 br_type = event->attr.branch_sample_type;
+ u64 mask = 0, m;
+ u64 v;
+
+ for_each_branch_sample_type(m) {
+ if (!(br_type & m))
+ continue;
+
+ v = x86_pmu.lbr_sel_map[m];
+ if (v == LBR_NOT_SUPP)
+ return -EOPNOTSUPP;
+
+ if (v != LBR_IGN)
+ mask |= v;
+ }
+ reg = &event->hw.branch_reg;
+ reg->idx = EXTRA_REG_LBR;
+
+ /* LBR_SELECT operates in suppress mode so invert mask */
+ reg->config = ~mask & x86_pmu.lbr_sel_mask;
+
+ return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+ int ret = 0;
+
+ /*
+ * no LBR on this PMU
+ */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /*
+ * setup SW LBR filter
+ */
+ intel_pmu_setup_sw_lbr_filter(event);
+
+ /*
+ * setup HW LBR filter, if any
+ */
+ if (x86_pmu.lbr_sel_map)
+ ret = intel_pmu_setup_hw_lbr_filter(event);
+
+ return ret;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to)
+{
+ struct insn insn;
+ void *addr;
+ int bytes, size = MAX_INSN_SIZE;
+ int ret = X86_BR_NONE;
+ int ext, to_plm, from_plm;
+ u8 buf[MAX_INSN_SIZE];
+ int is64 = 0;
+
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+ /*
+ * maybe zero if lbr did not fill up after a reset by the time
+ * we get a PMU interrupt
+ */
+ if (from == 0 || to == 0)
+ return X86_BR_NONE;
+
+ if (from_plm == X86_BR_USER) {
+ /*
+ * can happen if measuring at the user level only
+ * and we interrupt in a kernel thread, e.g., idle.
+ */
+ if (!current->mm)
+ return X86_BR_NONE;
+
+ /* may fail if text not present */
+ bytes = copy_from_user_nmi(buf, (void __user *)from, size);
+ if (bytes != size)
+ return X86_BR_NONE;
+
+ addr = buf;
+ } else
+ addr = (void *)from;
+
+ /*
+ * decoder needs to know the ABI especially
+ * on 64-bit systems running 32-bit apps
+ */
+#ifdef CONFIG_X86_64
+ is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, addr, is64);
+ insn_get_opcode(&insn);
+
+ switch (insn.opcode.bytes[0]) {
+ case 0xf:
+ switch (insn.opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ ret = X86_BR_SYSCALL;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ ret = X86_BR_SYSRET;
+ break;
+ case 0x80 ... 0x8f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ break;
+ case 0x70 ... 0x7f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ ret = X86_BR_RET;
+ break;
+ case 0xcf: /* iret */
+ ret = X86_BR_IRET;
+ break;
+ case 0xcc ... 0xce: /* int */
+ ret = X86_BR_INT;
+ break;
+ case 0xe8: /* call near rel */
+ case 0x9a: /* call far absolute */
+ ret = X86_BR_CALL;
+ break;
+ case 0xe0 ... 0xe3: /* loop jmp */
+ ret = X86_BR_JCC;
+ break;
+ case 0xe9 ... 0xeb: /* jmp */
+ ret = X86_BR_JMP;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ insn_get_modrm(&insn);
+ ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ ret = X86_BR_IND_CALL;
+ break;
+ case 4:
+ case 5:
+ ret = X86_BR_JMP;
+ break;
+ }
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ /*
+ * interrupts, traps, faults (and thus ring transition) may
+ * occur on any instructions. Thus, to classify them correctly,
+ * we need to first look at the from and to priv levels. If they
+ * are different and to is in the kernel, then it indicates
+ * a ring transition. If the from instruction is not a ring
+ * transition instr (syscall, systenter, int), then it means
+ * it was a irq, trap or fault.
+ *
+ * we have no way of detecting kernel to kernel faults.
+ */
+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ ret = X86_BR_IRQ;
+
+ /*
+ * branch priv level determined by target as
+ * is done by HW when LBR_SELECT is implemented
+ */
+ if (ret != X86_BR_NONE)
+ ret |= to_plm;
+
+ return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+ u64 from, to;
+ int br_sel = cpuc->br_sel;
+ int i, j, type;
+ bool compress = false;
+
+ /* if sampling all branches, then nothing to filter */
+ if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ return;
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+ from = cpuc->lbr_entries[i].from;
+ to = cpuc->lbr_entries[i].to;
+
+ type = branch_type(from, to);
+
+ /* if type does not correspond, then discard */
+ if (type == X86_BR_NONE || (br_sel & type) != type) {
+ cpuc->lbr_entries[i].from = 0;
+ compress = true;
+ }
+ }
+
+ if (!compress)
+ return;
+
+ /* remove all entries with from=0 */
+ for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ if (!cpuc->lbr_entries[i].from) {
+ j = i;
+ while (++j < cpuc->lbr_stack.nr)
+ cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_stack.nr--;
+ if (!cpuc->lbr_entries[i].from)
+ continue;
+ }
+ i++;
+ }
}
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
+ | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+ */
+ [PERF_SAMPLE_BRANCH_ANY_CALL] =
+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+ */
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
+};
+
+/* core */
void intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_nr = 4;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("4-deep LBR, ");
}
+/* nehalem/westmere */
void intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x680;
- x86_pmu.lbr_to = 0x6c0;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - workaround LBR_SEL errata (see above)
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void intel_pmu_lbr_init_snb(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
}
+/* atom */
void intel_pmu_lbr_init_atom(void)
{
+ /*
+ * only models starting at stepping 10 seems
+ * to have an operational LBR which can freeze
+ * on PMU interrupt
+ */
+ if (boot_cpu_data.x86_mask < 10) {
+ pr_cont("LBR disabled due to erratum");
+ return;
+ }
+
x86_pmu.lbr_nr = 8;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6..addf9e8 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 642f75a..11891ca 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -62,16 +62,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!userbuf) {
memcpy(buf, (vaddr + offset), csize);
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
} else {
if (!kdump_buf_page) {
printk(KERN_WARNING "Kdump: Kdump buffer page not"
" allocated\n");
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
return -EFAULT;
}
copy_page(kdump_buf_page, vaddr);
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
if (copy_to_user(buf, (kdump_buf_page + offset), csize))
return -EFAULT;
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 5282179..3ae2ced 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -4,6 +4,7 @@
#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/of.h>
@@ -17,64 +18,14 @@
#include <linux/initrd.h>
#include <asm/hpet.h>
-#include <asm/irq_controller.h>
#include <asm/apic.h>
#include <asm/pci_x86.h>
__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
-static LIST_HEAD(irq_domains);
-static DEFINE_RAW_SPINLOCK(big_irq_lock);
int __initdata of_ioapic;
-#ifdef CONFIG_X86_IO_APIC
-static void add_interrupt_host(struct irq_domain *ih)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&big_irq_lock, flags);
- list_add(&ih->l, &irq_domains);
- raw_spin_unlock_irqrestore(&big_irq_lock, flags);
-}
-#endif
-
-static struct irq_domain *get_ih_from_node(struct device_node *controller)
-{
- struct irq_domain *ih, *found = NULL;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&big_irq_lock, flags);
- list_for_each_entry(ih, &irq_domains, l) {
- if (ih->controller == controller) {
- found = ih;
- break;
- }
- }
- raw_spin_unlock_irqrestore(&big_irq_lock, flags);
- return found;
-}
-
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- struct irq_domain *ih;
- u32 virq, type;
- int ret;
-
- ih = get_ih_from_node(controller);
- if (!ih)
- return 0;
- ret = ih->xlate(ih, intspec, intsize, &virq, &type);
- if (ret)
- return 0;
- if (type == IRQ_TYPE_NONE)
- return virq;
- irq_set_irq_type(virq, type);
- return virq;
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-
unsigned long pci_address_to_pio(phys_addr_t address)
{
/*
@@ -354,36 +305,43 @@ static struct of_ioapic_type of_ioapic_type[] =
},
};
-static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
- u32 *out_hwirq, u32 *out_type)
+static int ioapic_xlate(struct irq_domain *domain,
+ struct device_node *controller,
+ const u32 *intspec, u32 intsize,
+ irq_hw_number_t *out_hwirq, u32 *out_type)
{
- struct mp_ioapic_gsi *gsi_cfg;
struct io_apic_irq_attr attr;
struct of_ioapic_type *it;
- u32 line, idx, type;
+ u32 line, idx;
+ int rc;
- if (intsize < 2)
+ if (WARN_ON(intsize < 2))
return -EINVAL;
- line = *intspec;
- idx = (u32) id->priv;
- gsi_cfg = mp_ioapic_gsi_routing(idx);
- *out_hwirq = line + gsi_cfg->gsi_base;
-
- intspec++;
- type = *intspec;
+ line = intspec[0];
- if (type >= ARRAY_SIZE(of_ioapic_type))
+ if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
- it = of_ioapic_type + type;
- *out_type = it->out_type;
+ it = &of_ioapic_type[intspec[1]];
+ idx = (u32) domain->host_data;
set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
- return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
+ rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
+ cpu_to_node(0), &attr);
+ if (rc)
+ return rc;
+
+ *out_hwirq = line;
+ *out_type = it->out_type;
+ return 0;
}
+const struct irq_domain_ops ioapic_irq_domain_ops = {
+ .xlate = ioapic_xlate,
+};
+
static void __init ioapic_add_ofnode(struct device_node *np)
{
struct resource r;
@@ -399,13 +357,14 @@ static void __init ioapic_add_ofnode(struct device_node *np)
for (i = 0; i < nr_ioapics; i++) {
if (r.start == mpc_ioapic_addr(i)) {
struct irq_domain *id;
+ struct mp_ioapic_gsi *gsi_cfg;
+
+ gsi_cfg = mp_ioapic_gsi_routing(i);
- id = kzalloc(sizeof(*id), GFP_KERNEL);
+ id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
+ &ioapic_irq_domain_ops,
+ (void*)i);
BUG_ON(!id);
- id->controller = np;
- id->xlate = ioapic_xlate;
- id->priv = (void *)i;
- add_interrupt_host(id);
return;
}
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1aae78f..4025fe4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -252,7 +252,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
unsigned short ss;
unsigned long sp;
#endif
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+ printk(KERN_DEFAULT
+ "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index c99f9ed..88ec912 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
int i;
print_modules();
- __show_regs(regs, 0);
+ __show_regs(regs, !user_mode_vm(regs));
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
TASK_COMM_LEN, current->comm, task_pid_nr(current),
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6d728d9..17107bd 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
if (regs)
stack = (unsigned long *)regs->sp;
- else if (task && task != current)
+ else if (task != current)
stack = (unsigned long *)task->thread.sp;
else
stack = &dummy;
@@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs)
unsigned char c;
u8 *ip;
- printk(KERN_EMERG "Stack:\n");
+ printk(KERN_DEFAULT "Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
- 0, KERN_EMERG);
+ 0, KERN_DEFAULT);
- printk(KERN_EMERG "Code: ");
+ printk(KERN_DEFAULT "Code: ");
ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 8071e2f..62d61e9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
#include <linux/acpi.h>
#include <linux/firmware-map.h>
#include <linux/memblock.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/proto.h>
@@ -227,22 +228,38 @@ void __init e820_print_map(char *who)
* ____________________33__
* ______________________4_
*/
+struct change_member {
+ struct e820entry *pbios; /* pointer to original bios entry */
+ unsigned long long addr; /* address for this change point */
+};
+
+static int __init cpcompare(const void *a, const void *b)
+{
+ struct change_member * const *app = a, * const *bpp = b;
+ const struct change_member *ap = *app, *bp = *bpp;
+
+ /*
+ * Inputs are pointers to two elements of change_point[]. If their
+ * addresses are unequal, their difference dominates. If the addresses
+ * are equal, then consider one that represents the end of its region
+ * to be greater than one that does not.
+ */
+ if (ap->addr != bp->addr)
+ return ap->addr > bp->addr ? 1 : -1;
+
+ return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr);
+}
int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
u32 *pnr_map)
{
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
static struct change_member change_point_list[2*E820_X_MAX] __initdata;
static struct change_member *change_point[2*E820_X_MAX] __initdata;
static struct e820entry *overlap_list[E820_X_MAX] __initdata;
static struct e820entry new_bios[E820_X_MAX] __initdata;
- struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
- int chgidx, still_changing;
+ int chgidx;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr, chg_nr;
@@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
chg_nr = chgidx;
/* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i = 1; i < chg_nr; i++) {
- unsigned long long curaddr, lastaddr;
- unsigned long long curpbaddr, lastpbaddr;
-
- curaddr = change_point[i]->addr;
- lastaddr = change_point[i - 1]->addr;
- curpbaddr = change_point[i]->pbios->addr;
- lastpbaddr = change_point[i - 1]->pbios->addr;
-
- /*
- * swap entries, when:
- *
- * curaddr > lastaddr or
- * curaddr == lastaddr and curaddr == curpbaddr and
- * lastaddr != lastpbaddr
- */
- if (curaddr < lastaddr ||
- (curaddr == lastaddr && curaddr == curpbaddr &&
- lastaddr != lastpbaddr)) {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing = 1;
- }
- }
- }
+ sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL);
/* create a new bios memory map, removing overlaps */
overlap_entries = 0; /* number of entries in the overlap table */
@@ -714,7 +703,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
}
#endif
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
/**
* Mark ACPI NVS memory region, so that we can save/restore it during
* hibernation and the subsequent resume.
@@ -727,7 +716,7 @@ static int __init e820_mark_nvs_memory(void)
struct e820entry *ei = &e820.map[i];
if (ei->type == E820_NVS)
- suspend_nvs_register(ei->addr, ei->size);
+ acpi_nvs_register(ei->addr, ei->size);
}
return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9d42a52..9b9f18b 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_EARLY_PRINTK_MRST
+#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
if (!strncmp(buf, "mrst", 4)) {
mrst_early_console_init();
early_console_register(&early_mrst_console, keep);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 22d0e21..79d97e6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
*/
#include <linux/linkage.h>
+#include <linux/err.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/errno.h>
@@ -81,8 +82,6 @@
* enough to patch inline, increasing performance.
*/
-#define nr_syscalls ((syscall_table_size)/4)
-
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
@@ -423,7 +422,7 @@ sysenter_past_esp:
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz sysenter_audit
sysenter_do_call:
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
@@ -455,7 +454,7 @@ sysenter_audit:
movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
movl %eax,%edx /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
pushl_cfi %ebx
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -466,11 +465,10 @@ sysexit_audit:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpl $-MAX_ERRNO,%eax /* is it an error ? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%eax /* zero-extend that */
- inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
@@ -504,7 +502,7 @@ ENTRY(system_call)
# system call tracing in operation / emulation
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jae syscall_badsys
syscall_call:
call *sys_call_table(,%eax,4)
@@ -654,7 +652,7 @@ syscall_trace_entry:
movl %esp, %eax
call syscall_trace_enter
/* What it returned is what we'll actually use. */
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jnae syscall_call
jmp syscall_exit
END(syscall_trace_entry)
@@ -694,29 +692,28 @@ END(syscall_badsys)
* System calls that need a pt_regs pointer.
*/
#define PTREGSCALL0(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL1(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%edx; \
movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL2(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%ecx; \
movl (PT_ECX+4)(%esp),%edx; \
movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL3(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
CFI_STARTPROC; \
leal 4(%esp),%eax; \
pushl_cfi %eax; \
@@ -741,8 +738,7 @@ PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
/* Clone is an oddball. The 4th arg is in %edi */
- ALIGN;
-ptregs_clone:
+ENTRY(ptregs_clone)
CFI_STARTPROC
leal 4(%esp),%eax
pushl_cfi %eax
@@ -1213,11 +1209,6 @@ return_to_handler:
jmp *%ecx
#endif
-.section .rodata,"a"
-#include "syscall_table_32.S"
-
-syscall_table_size=(.-sys_call_table)
-
/*
* Some functions should be protected against kprobes
*/
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb..734ebd1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
#include <asm/paravirt.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
+#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -319,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */
- testl $3, CS(%rdi)
+ testl $3, CS-RBP(%rsi)
je 1f
SWAPGS
/*
@@ -329,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
- jne 2f
- mov PER_CPU_VAR(irq_stack_ptr),%rsp
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
-2: /* Store previous stack value */
+ /* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
@@ -548,7 +548,7 @@ badsys:
#ifdef CONFIG_AUDITSYSCALL
/*
* Fast path for syscall audit without full syscall trace.
- * We just call audit_syscall_entry() directly, and then
+ * We just call __audit_syscall_entry() directly, and then
* jump back to the normal fast path.
*/
auditsys:
@@ -558,22 +558,21 @@ auditsys:
movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
movq %rax,%rsi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
LOAD_ARGS 0 /* reload call-clobbered registers */
jmp system_call_fastpath
/*
- * Return fast path for syscall audit. Call audit_syscall_exit()
+ * Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
*/
sysret_audit:
movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
- cmpq $0,%rsi /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
jmp sysret_check
#endif /* CONFIG_AUDITSYSCALL */
@@ -813,7 +812,7 @@ ret_from_intr:
/* Restore saved previous stack */
popq %rsi
- CFI_DEF_CFA_REGISTER rsi
+ CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
leaq ARGOFFSET-RBP(%rsi), %rsp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
@@ -1480,60 +1479,225 @@ ENTRY(error_exit)
CFI_ENDPROC
END(error_exit)
+/*
+ * Test if a given stack is an NMI stack or not.
+ */
+ .macro test_in_nmi reg stack nmi_ret normal_ret
+ cmpq %\reg, \stack
+ ja \normal_ret
+ subq $EXCEPTION_STKSZ, %\reg
+ cmpq %\reg, \stack
+ jb \normal_ret
+ jmp \nmi_ret
+ .endm
/* runs on exception stack */
ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq_cfi $-1
+ /*
+ * We allow breakpoints in NMIs. If a breakpoint occurs, then
+ * the iretq it performs will take us out of NMI context.
+ * This means that we can have nested NMIs where the next
+ * NMI is using the top of the stack of the previous NMI. We
+ * can't let it execute because the nested NMI will corrupt the
+ * stack of the previous NMI. NMI handlers are not re-entrant
+ * anyway.
+ *
+ * To handle this case we do the following:
+ * Check the a special location on the stack that contains
+ * a variable that is set when NMIs are executing.
+ * The interrupted task's stack is also checked to see if it
+ * is an NMI stack.
+ * If the variable is not set and the stack is not the NMI
+ * stack then:
+ * o Set the special variable on the stack
+ * o Copy the interrupt frame into a "saved" location on the stack
+ * o Copy the interrupt frame into a "copy" location on the stack
+ * o Continue processing the NMI
+ * If the variable is set or the previous stack is the NMI stack:
+ * o Modify the "copy" location to jump to the repeate_nmi
+ * o return back to the first NMI
+ *
+ * Now on exit of the first NMI, we first clear the stack variable
+ * The NMI stack will tell any nested NMIs at that point that it is
+ * nested. Then we pop the stack normally with iret, and if there was
+ * a nested NMI that updated the copy interrupt stack frame, a
+ * jump will be made to the repeat_nmi code that will handle the second
+ * NMI.
+ */
+
+ /* Use %rdx as out temp variable throughout */
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
+
+ /*
+ * If %cs was not the kernel segment, then the NMI triggered in user
+ * space, which means it is definitely not nested.
+ */
+ cmpl $__KERNEL_CS, 16(%rsp)
+ jne first_nmi
+
+ /*
+ * Check the special variable on the stack to see if NMIs are
+ * executing.
+ */
+ cmpl $1, -8(%rsp)
+ je nested_nmi
+
+ /*
+ * Now test if the previous stack was an NMI stack.
+ * We need the double check. We check the NMI stack to satisfy the
+ * race when the first NMI clears the variable before returning.
+ * We check the variable because the first NMI could be in a
+ * breakpoint routine using a breakpoint stack.
+ */
+ lea 6*8(%rsp), %rdx
+ test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+ CFI_REMEMBER_STATE
+
+nested_nmi:
+ /*
+ * Do nothing if we interrupted the fixup in repeat_nmi.
+ * It's about to repeat the NMI handler, so we are fine
+ * with ignoring this one.
+ */
+ movq $repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja 1f
+ movq $end_repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja nested_nmi_out
+
+1:
+ /* Set up the interrupted NMIs stack to jump to repeat_nmi */
+ leaq -6*8(%rsp), %rdx
+ movq %rdx, %rsp
+ CFI_ADJUST_CFA_OFFSET 6*8
+ pushq_cfi $__KERNEL_DS
+ pushq_cfi %rdx
+ pushfq_cfi
+ pushq_cfi $__KERNEL_CS
+ pushq_cfi $repeat_nmi
+
+ /* Put stack back */
+ addq $(11*8), %rsp
+ CFI_ADJUST_CFA_OFFSET -11*8
+
+nested_nmi_out:
+ popq_cfi %rdx
+ CFI_RESTORE rdx
+
+ /* No need to check faults here */
+ INTERRUPT_RETURN
+
+ CFI_RESTORE_STATE
+first_nmi:
+ /*
+ * Because nested NMIs will use the pushed location that we
+ * stored in rdx, we must keep that space available.
+ * Here's what our stack frame will look like:
+ * +-------------------------+
+ * | original SS |
+ * | original Return RSP |
+ * | original RFLAGS |
+ * | original CS |
+ * | original RIP |
+ * +-------------------------+
+ * | temp storage for rdx |
+ * +-------------------------+
+ * | NMI executing variable |
+ * +-------------------------+
+ * | Saved SS |
+ * | Saved Return RSP |
+ * | Saved RFLAGS |
+ * | Saved CS |
+ * | Saved RIP |
+ * +-------------------------+
+ * | copied SS |
+ * | copied Return RSP |
+ * | copied RFLAGS |
+ * | copied CS |
+ * | copied RIP |
+ * +-------------------------+
+ * | pt_regs |
+ * +-------------------------+
+ *
+ * The saved stack frame is used to fix up the copied stack frame
+ * that a nested NMI may change to make the interrupted NMI iret jump
+ * to the repeat_nmi. The original stack frame and the temp storage
+ * is also used by nested NMIs and can not be trusted on exit.
+ */
+ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+ movq (%rsp), %rdx
+ CFI_RESTORE rdx
+
+ /* Set the NMI executing variable on the stack. */
+ pushq_cfi $1
+
+ /* Copy the stack frame to the Saved frame */
+ .rept 5
+ pushq_cfi 6*8(%rsp)
+ .endr
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+
+ /* Everything up to here is safe from nested NMIs */
+
+ /*
+ * If there was a nested NMI, the first NMI's iret will return
+ * here. But NMIs are still enabled and we can take another
+ * nested NMI. The nested NMI checks the interrupted RIP to see
+ * if it is between repeat_nmi and end_repeat_nmi, and if so
+ * it will just return, as we are about to repeat an NMI anyway.
+ * This makes it safe to copy to the stack frame that a nested
+ * NMI will update.
+ */
+repeat_nmi:
+ /*
+ * Update the stack variable to say we are still in NMI (the update
+ * is benign for the non-repeat case, where 1 was pushed just above
+ * to this very stack slot).
+ */
+ movq $1, 5*8(%rsp)
+
+ /* Make another copy, this one may be modified by nested NMIs */
+ .rept 5
+ pushq_cfi 4*8(%rsp)
+ .endr
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+end_repeat_nmi:
+
+ /*
+ * Everything below this point can be preempted by a nested
+ * NMI if the first NMI took an exception and reset our iret stack
+ * so that we repeat another NMI.
+ */
+ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+ /*
+ * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+ * as we should not be calling schedule in NMI context.
+ * Even with normal interrupts enabled. An NMI should not be
+ * setting NEED_RESCHED or anything that normal interrupts and
+ * exceptions might do.
+ */
call save_paranoid
DEFAULT_FRAME 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* paranoidexit; without TRACE_IRQS_OFF */
- /* ebx: no swapgs flag */
- DISABLE_INTERRUPTS(CLBR_NONE)
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
- testl $3,CS(%rsp)
- jnz nmi_userspace
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_ALL 8
+ /* Clear the NMI executing stack variable */
+ movq $0, 10*8(%rsp)
jmp irq_return
-nmi_userspace:
- GET_THREAD_INFO(%rcx)
- movl TI_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz nmi_swapgs
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz nmi_schedule
- movl %ebx,%edx /* arg3: thread flags */
- ENABLE_INTERRUPTS(CLBR_NONE)
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- DISABLE_INTERRUPTS(CLBR_NONE)
- jmp nmi_userspace
-nmi_schedule:
- ENABLE_INTERRUPTS(CLBR_ANY)
- call schedule
- DISABLE_INTERRUPTS(CLBR_ANY)
- jmp nmi_userspace
CFI_ENDPROC
-#else
- jmp paranoid_exit
- CFI_ENDPROC
-#endif
END(nmi)
ENTRY(ignore_sysret)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e11e394..40f4eb3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -417,6 +417,10 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
+ .align L1_CACHE_BYTES
+ENTRY(nmi_idt_table)
+ .skip IDT_ENTRIES * 16
+
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 739d859..7734bcb 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -16,6 +16,7 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/user.h>
#ifdef CONFIG_X86_64
@@ -32,6 +33,86 @@
# define user32_fxsr_struct user_fxsr_struct
#endif
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+static inline bool interrupted_kernel_fpu_idle(void)
+{
+ return !__thread_has_fpu(current) &&
+ (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static inline bool interrupted_user_mode(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ return regs && user_mode_vm(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool irq_fpu_usable(void)
+{
+ return !in_interrupt() ||
+ interrupted_user_mode() ||
+ interrupted_kernel_fpu_idle();
+}
+EXPORT_SYMBOL(irq_fpu_usable);
+
+void kernel_fpu_begin(void)
+{
+ struct task_struct *me = current;
+
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+ if (__thread_has_fpu(me)) {
+ __save_init_fpu(me);
+ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else {
+ percpu_write(fpu_owner_task, NULL);
+ clts();
+ }
+}
+EXPORT_SYMBOL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ stts();
+ preempt_enable();
+}
+EXPORT_SYMBOL(kernel_fpu_end);
+
+void unlazy_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ if (__thread_has_fpu(tsk)) {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ } else
+ tsk->fpu_counter = 0;
+ preempt_enable();
+}
+EXPORT_SYMBOL(unlazy_fpu);
+
#ifdef CONFIG_MATH_EMULATION
# define HAVE_HWFP (boot_cpu_data.hard_math)
#else
@@ -44,7 +125,7 @@ EXPORT_SYMBOL_GPL(xstate_size);
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
-void __cpuinit mxcsr_feature_mask_init(void)
+static void __cpuinit mxcsr_feature_mask_init(void)
{
unsigned long mask = 0;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 7209070..58b7f27 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
+
+int sysctl_panic_on_stackoverflow __read_mostly;
+
/* Debugging check for stack overflow: is there less than 1KB free? */
static int check_stack_overflow(void)
{
@@ -43,6 +46,8 @@ static void print_stack_overflow(void)
{
printk(KERN_WARNING "low stack detected by irq handler\n");
dump_stack();
+ if (sysctl_panic_on_stackoverflow)
+ panic("low stack detected by irq handler - check messages\n");
}
#else
@@ -95,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
- /*
- * Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context.
- */
- irqctx->tinfo.preempt_count =
- (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
- (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -191,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc))
return false;
- if (!execute_on_irq_stack(overflow, desc, irq)) {
+ if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 69bca46..d04d3ecd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
+int sysctl_panic_on_stackoverflow;
+
/*
* Probabilistic stack overflow check:
*
@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
+#define STACK_TOP_MARGIN 128
+ struct orig_ist *oist;
+ u64 irq_stack_top, irq_stack_bottom;
+ u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs))
return;
- WARN_ONCE(regs->sp >= curbase &&
- regs->sp <= curbase + THREAD_SIZE &&
- regs->sp < curbase + sizeof(struct thread_info) +
- sizeof(struct pt_regs) + 128,
+ if (regs->sp >= curbase + sizeof(struct thread_info) +
+ sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+ regs->sp <= curbase + THREAD_SIZE)
+ return;
+
+ irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
+ STACK_TOP_MARGIN;
+ irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
+ if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
+ return;
+
+ oist = &__get_cpu_var(orig_ist);
+ estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
+ estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
+ if (regs->sp >= estack_top && regs->sp <= estack_bottom)
+ return;
+
+ WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+ current->comm, curbase, regs->sp,
+ irq_stack_top, irq_stack_bottom,
+ estack_top, estack_bottom);
- "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- current->comm, curbase, regs->sp);
+ if (sysctl_panic_on_stackoverflow)
+ panic("low stack detected by irq handler - check messages\n");
#endif
}
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 0000000..3230b68
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
+#ifndef __X86_KERNEL_KPROBES_COMMON_H
+#define __X86_KERNEL_KPROBES_COMMON_H
+
+/* Kprobes and Optprobes common header */
+
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax. */ \
+ " subq $24, %rsp\n" \
+ " pushq %rdi\n" \
+ " pushq %rsi\n" \
+ " pushq %rdx\n" \
+ " pushq %rcx\n" \
+ " pushq %rax\n" \
+ " pushq %r8\n" \
+ " pushq %r9\n" \
+ " pushq %r10\n" \
+ " pushq %r11\n" \
+ " pushq %rbx\n" \
+ " pushq %rbp\n" \
+ " pushq %r12\n" \
+ " pushq %r13\n" \
+ " pushq %r14\n" \
+ " pushq %r15\n"
+#define RESTORE_REGS_STRING \
+ " popq %r15\n" \
+ " popq %r14\n" \
+ " popq %r13\n" \
+ " popq %r12\n" \
+ " popq %rbp\n" \
+ " popq %rbx\n" \
+ " popq %r11\n" \
+ " popq %r10\n" \
+ " popq %r9\n" \
+ " popq %r8\n" \
+ " popq %rax\n" \
+ " popq %rcx\n" \
+ " popq %rdx\n" \
+ " popq %rsi\n" \
+ " popq %rdi\n" \
+ /* Skip orig_ax, ip, cs */ \
+ " addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax and gs. */ \
+ " subl $16, %esp\n" \
+ " pushl %fs\n" \
+ " pushl %es\n" \
+ " pushl %ds\n" \
+ " pushl %eax\n" \
+ " pushl %ebp\n" \
+ " pushl %edi\n" \
+ " pushl %esi\n" \
+ " pushl %edx\n" \
+ " pushl %ecx\n" \
+ " pushl %ebx\n"
+#define RESTORE_REGS_STRING \
+ " popl %ebx\n" \
+ " popl %ecx\n" \
+ " popl %edx\n" \
+ " popl %esi\n" \
+ " popl %edi\n" \
+ " popl %ebp\n" \
+ " popl %eax\n" \
+ /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+ " addl $24, %esp\n"
+#endif
+
+/* Ensure if the instruction can be boostable */
+extern int can_boost(kprobe_opcode_t *instruction);
+/* Recover instruction if given address is probed */
+extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
+ unsigned long addr);
+/*
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
+ */
+extern int __copy_instruction(u8 *dest, u8 *src);
+
+/* Generate a relative-jump/call instruction */
+extern void synthesize_reljump(void *from, void *to);
+extern void synthesize_relcall(void *from, void *to);
+
+#ifdef CONFIG_OPTPROBES
+extern int arch_init_optprobes(void);
+extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
+extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
+#else /* !CONFIG_OPTPROBES */
+static inline int arch_init_optprobes(void)
+{
+ return 0;
+}
+static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ return 0;
+}
+static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ return addr;
+}
+#endif
+#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 0000000..c5e410e
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
+/*
+ * Kernel Probes Jump Optimization (Optprobes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
+
+#include "kprobes-common.h"
+
+unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct optimized_kprobe *op;
+ struct kprobe *kp;
+ long offs;
+ int i;
+
+ for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+ kp = get_kprobe((void *)addr - i);
+ /* This function only handles jump-optimized kprobe */
+ if (kp && kprobe_optimized(kp)) {
+ op = container_of(kp, struct optimized_kprobe, kp);
+ /* If op->list is not empty, op is under optimizing */
+ if (list_empty(&op->list))
+ goto found;
+ }
+ }
+
+ return addr;
+found:
+ /*
+ * If the kprobe can be optimized, original bytes which can be
+ * overwritten by jump destination address. In this case, original
+ * bytes must be recovered from op->optinsn.copied_insn buffer.
+ */
+ memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ if (addr == (unsigned long)kp->addr) {
+ buf[0] = kp->opcode;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ } else {
+ offs = addr - (unsigned long)kp->addr - 1;
+ memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+ }
+
+ return (unsigned long)buf;
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+static void __used __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry:\n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ /* Move flags to rsp */
+ " movq 144(%rsp), %rdx\n"
+ " movq %rdx, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ /* Skip flags entry */
+ " addq $8, %rsp\n"
+ " popfq\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end:\n");
+}
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long flags;
+
+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
+ local_irq_save(flags);
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* Save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->orig_ax = ~0UL;
+
+ __this_cpu_write(current_kprobe, &op->kp);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+ local_irq_restore(flags);
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+ int len = 0, ret;
+
+ while (len < RELATIVEJUMP_SIZE) {
+ ret = __copy_instruction(dest + len, src + len);
+ if (!ret || !can_boost(dest + len))
+ return -EINVAL;
+ len += ret;
+ }
+ /* Check whether the address range is reserved */
+ if (ftrace_text_reserved(src, src + len - 1) ||
+ alternatives_text_reserved(src, src + len - 1) ||
+ jump_label_text_reserved(src, src + len - 1))
+ return -EBUSY;
+
+ return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+ insn->opcode.bytes[0] == 0xea); /* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+ unsigned long target = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xe0: /* loopne */
+ case 0xe1: /* loope */
+ case 0xe2: /* loop */
+ case 0xe3: /* jcxz */
+ case 0xe9: /* near relative jump */
+ case 0xeb: /* short relative jump */
+ break;
+ case 0x0f:
+ if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+ break;
+ return 0;
+ default:
+ if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+ break;
+ return 0;
+ }
+ target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+ return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+ unsigned long addr, size = 0, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ /* Lookup symbol including addr */
+ if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
+ return 0;
+
+ /*
+ * Do not optimize in the entry code due to the unstable
+ * stack handling.
+ */
+ if ((paddr >= (unsigned long)__entry_text_start) &&
+ (paddr < (unsigned long)__entry_text_end))
+ return 0;
+
+ /* Check there is enough space for a relative jump. */
+ if (size - offset < RELATIVEJUMP_SIZE)
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr - offset + size) { /* Decode until function end */
+ if (search_exception_tables(addr))
+ /*
+ * Since some fixup code will jumps into this function,
+ * we can't optimize kprobe in this function.
+ */
+ return 0;
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
+ insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+ /* Check any instructions don't jump into target */
+ if (insn_is_indirect_jump(&insn) ||
+ insn_jump_into_range(&insn, paddr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE))
+ return 0;
+ addr += insn.length;
+ }
+
+ return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ int i;
+ struct kprobe *p;
+
+ for (i = 1; i < op->optinsn.size; i++) {
+ p = get_kprobe(op->kp.addr + i);
+ if (p && !kprobe_disabled(p))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes
+arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+{
+ return ((unsigned long)op->kp.addr <= addr &&
+ (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, dirty);
+ op->optinsn.insn = NULL;
+ op->optinsn.size = 0;
+ }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ * This is called when new aggr(opt)probe is allocated or reused.
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret;
+ long rel;
+
+ if (!can_optimize((unsigned long)op->kp.addr))
+ return -EILSEQ;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ /*
+ * Verify if the address gap is in 2GB range, because this uses
+ * a relative jump.
+ */
+ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ if (abs(rel) > 0x7fffffff)
+ return -ERANGE;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* Copy instructions into the out-of-line buffer */
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+ if (ret < 0) {
+ __arch_remove_optimized_kprobe(op, 0);
+ return ret;
+ }
+ op->optinsn.size = ret;
+
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* Set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* Set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* Set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ (u8 *)op->kp.addr + op->optinsn.size);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.size + RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ u8 buf[RELATIVEJUMP_SIZE];
+
+ /* Set int3 to first byte for kprobes */
+ buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+int __kprobes
+setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ struct optimized_kprobe *op;
+
+ if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+ /* This kprobe is really able to run optimized path. */
+ op = container_of(p, struct optimized_kprobe, kp);
+ /* Detour through copied instructions */
+ regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+ if (!reenter)
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+int __kprobes arch_init_optprobes(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d..e213fc8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
* 2005-May Rusty Lynch <rusty.lynch@intel.com>
- * Added function return probes functionality
+ * Added function return probes functionality
* 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
- * kprobe-booster and kretprobe-booster for i386.
+ * kprobe-booster and kretprobe-booster for i386.
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
- * and kretprobe-booster for x86-64
+ * and kretprobe-booster for x86-64
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
- * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
- * unified x86 kprobes code.
+ * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ * unified x86 kprobes code.
*/
-
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/string.h>
@@ -59,6 +58,8 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include "kprobes-common.h"
+
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
doesn't switch kernel stack.*/
{NULL, NULL} /* Terminator */
};
+
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
}
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes synthesize_reljump(void *from, void *to)
+void __kprobes synthesize_reljump(void *from, void *to)
{
__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
/*
* Skip the prefixes of the instruction.
*/
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
* Returns non-zero if opcode is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
-static int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int __kprobes can_boost(kprobe_opcode_t *opcodes)
{
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
}
}
-/* Recover the probed instruction at addr for further analysis. */
-static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+static unsigned long
+__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
+
kp = get_kprobe((void *)addr);
+ /* There is no probe, return original address */
if (!kp)
- return -EINVAL;
+ return addr;
/*
* Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
*/
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
buf[0] = kp->opcode;
- return 0;
+ return (unsigned long)buf;
+}
+
+/*
+ * Recover the probed instruction at addr for further analysis.
+ * Caller must lock kprobes by kprobe_mutex, or disable preemption
+ * for preventing to release referencing kprobes.
+ */
+unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ unsigned long __addr;
+
+ __addr = __recover_optprobed_insn(buf, addr);
+ if (__addr != addr)
+ return __addr;
+
+ return __recover_probed_insn(buf, addr);
}
/* Check if paddr is at an instruction boundary */
static int __kprobes can_probe(unsigned long paddr)
{
- int ret;
- unsigned long addr, offset = 0;
+ unsigned long addr, __addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
-
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
* original instruction in our buffer.
+ * Also, jump optimization will change the breakpoint to
+ * relative-jump. Since the relative-jump itself is
+ * normally used, we just go through if there is no kprobe.
*/
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- /*
- * Another debugging subsystem might insert
- * this breakpoint. In that case, we can't
- * recover it.
- */
- return 0;
- kernel_insn_init(&insn, buf);
- }
+ __addr = recover_probed_instruction(buf, addr);
+ kernel_insn_init(&insn, (void *)__addr);
insn_get_length(&insn);
+
+ /*
+ * Another debugging subsystem might insert this breakpoint.
+ * In that case, we can't recover it.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
addr += insn.length;
}
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
+int __kprobes __copy_instruction(u8 *dest, u8 *src)
{
struct insn insn;
- int ret;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- kernel_insn_init(&insn, src);
- if (recover) {
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf,
- (unsigned long)src);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- }
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint, failed to recover */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
memcpy(dest, insn.kaddr, insn.length);
#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
* extension of the original signed 32-bit displacement would
* have given.
*/
- newdisp = (u8 *) src + (s64) insn.displacement.value -
- (u8 *) dest;
+ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
disp = (u8 *) dest + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ /* Copy an instruction with recovering if other optprobe modifies it.*/
+ __copy_instruction(p->ainsn.insn, p->addr);
+
/*
- * Copy an instruction without recovering int3, because it will be
- * put by another subsystem.
+ * __copy_instruction can modify the displacement of the instruction,
+ * but it doesn't affect boostable check.
*/
- __copy_instruction(p->ainsn.insn, p->addr, 0);
-
- if (can_boost(p->addr))
+ if (can_boost(p->ainsn.insn))
p->ainsn.boostable = 0;
else
p->ainsn.boostable = -1;
- p->opcode = *p->addr;
+ /* Also, displacement change doesn't affect the first byte */
+ p->opcode = p->ainsn.insn[0];
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
}
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+void __kprobes
+arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
unsigned long *sara = stack_addr(regs);
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
*sara = (unsigned long) &kretprobe_trampoline;
}
-#ifdef CONFIG_OPTPROBES
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter);
-#else
-#define setup_detour_execution(p, regs, reenter) (0)
-#endif
-
-static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, int reenter)
+static void __kprobes
+setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
{
if (setup_detour_execution(p, regs, reenter))
return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* within the handler. We save the original kprobes variables and just single
* step on the instruction of the new probe without calling any user handlers.
*/
-static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+static int __kprobes
+reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
return 0;
}
-#ifdef CONFIG_X86_64
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax. */ \
- " subq $24, %rsp\n" \
- " pushq %rdi\n" \
- " pushq %rsi\n" \
- " pushq %rdx\n" \
- " pushq %rcx\n" \
- " pushq %rax\n" \
- " pushq %r8\n" \
- " pushq %r9\n" \
- " pushq %r10\n" \
- " pushq %r11\n" \
- " pushq %rbx\n" \
- " pushq %rbp\n" \
- " pushq %r12\n" \
- " pushq %r13\n" \
- " pushq %r14\n" \
- " pushq %r15\n"
-#define RESTORE_REGS_STRING \
- " popq %r15\n" \
- " popq %r14\n" \
- " popq %r13\n" \
- " popq %r12\n" \
- " popq %rbp\n" \
- " popq %rbx\n" \
- " popq %r11\n" \
- " popq %r10\n" \
- " popq %r9\n" \
- " popq %r8\n" \
- " popq %rax\n" \
- " popq %rcx\n" \
- " popq %rdx\n" \
- " popq %rsi\n" \
- " popq %rdi\n" \
- /* Skip orig_ax, ip, cs */ \
- " addq $24, %rsp\n"
-#else
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
- " pushl %fs\n" \
- " pushl %es\n" \
- " pushl %ds\n" \
- " pushl %eax\n" \
- " pushl %ebp\n" \
- " pushl %edi\n" \
- " pushl %esi\n" \
- " pushl %edx\n" \
- " pushl %ecx\n" \
- " pushl %ebx\n"
-#define RESTORE_REGS_STRING \
- " popl %ebx\n" \
- " popl %ecx\n" \
- " popl %edx\n" \
- " popl %esi\n" \
- " popl %edi\n" \
- " popl %ebp\n" \
- " popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
-#endif
-
/*
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
* jump instruction after the copied instruction, that jumps to the next
* instruction after the probepoint.
*/
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void __kprobes
+resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
/*
* Wrapper routine for handling exceptions.
*/
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+int __kprobes
+kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
{
struct die_args *args = data;
int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-
-#ifdef CONFIG_OPTPROBES
-
-/* Insert a call instruction at address 'from', which calls address 'to'.*/
-static void __kprobes synthesize_relcall(void *from, void *to)
-{
- __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
-}
-
-/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
- unsigned long val)
-{
-#ifdef CONFIG_X86_64
- *addr++ = 0x48;
- *addr++ = 0xbf;
-#else
- *addr++ = 0xb8;
-#endif
- *(unsigned long *)addr = val;
-}
-
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
- asm volatile (
- ".global optprobe_template_entry\n"
- "optprobe_template_entry: \n"
-#ifdef CONFIG_X86_64
- /* We don't bother saving the ss register */
- " pushq %rsp\n"
- " pushfq\n"
- SAVE_REGS_STRING
- " movq %rsp, %rsi\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- /* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
- RESTORE_REGS_STRING
- /* Skip flags entry */
- " addq $8, %rsp\n"
- " popfq\n"
-#else /* CONFIG_X86_32 */
- " pushf\n"
- SAVE_REGS_STRING
- " movl %esp, %edx\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
-#endif
- ".global optprobe_template_end\n"
- "optprobe_template_end: \n");
-}
-
-#define TMPL_MOVE_IDX \
- ((long)&optprobe_template_val - (long)&optprobe_template_entry)
-#define TMPL_CALL_IDX \
- ((long)&optprobe_template_call - (long)&optprobe_template_entry)
-#define TMPL_END_IDX \
- ((long)&optprobe_template_end - (long)&optprobe_template_entry)
-
-#define INT3_SIZE sizeof(kprobe_opcode_t)
-
-/* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op,
- struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
- /* This is possible if op is under delayed unoptimizing */
- if (kprobe_disabled(&op->kp))
- return;
-
- local_irq_save(flags);
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(&op->kp);
- } else {
- /* Save skipped registers */
-#ifdef CONFIG_X86_64
- regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
- regs->gs = 0;
-#endif
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
- regs->orig_ax = ~0UL;
-
- __this_cpu_write(current_kprobe, &op->kp);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- opt_pre_handler(&op->kp, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
- local_irq_restore(flags);
-}
-
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
-{
- int len = 0, ret;
-
- while (len < RELATIVEJUMP_SIZE) {
- ret = __copy_instruction(dest + len, src + len, 1);
- if (!ret || !can_boost(dest + len))
- return -EINVAL;
- len += ret;
- }
- /* Check whether the address range is reserved */
- if (ftrace_text_reserved(src, src + len - 1) ||
- alternatives_text_reserved(src, src + len - 1) ||
- jump_label_text_reserved(src, src + len - 1))
- return -EBUSY;
-
- return len;
-}
-
-/* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
-{
- return ((insn->opcode.bytes[0] == 0xff &&
- (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
- insn->opcode.bytes[0] == 0xea); /* Segment based jump */
-}
-
-/* Check whether insn jumps into specified address range */
-static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
-{
- unsigned long target = 0;
-
- switch (insn->opcode.bytes[0]) {
- case 0xe0: /* loopne */
- case 0xe1: /* loope */
- case 0xe2: /* loop */
- case 0xe3: /* jcxz */
- case 0xe9: /* near relative jump */
- case 0xeb: /* short relative jump */
- break;
- case 0x0f:
- if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
- break;
- return 0;
- default:
- if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
- break;
- return 0;
- }
- target = (unsigned long)insn->next_byte + insn->immediate.value;
-
- return (start <= target && target <= start + len);
-}
-
-/* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
-{
- int ret;
- unsigned long addr, size = 0, offset = 0;
- struct insn insn;
- kprobe_opcode_t buf[MAX_INSN_SIZE];
-
- /* Lookup symbol including addr */
- if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
- return 0;
-
- /*
- * Do not optimize in the entry code due to the unstable
- * stack handling.
- */
- if ((paddr >= (unsigned long )__entry_text_start) &&
- (paddr < (unsigned long )__entry_text_end))
- return 0;
-
- /* Check there is enough space for a relative jump. */
- if (size - offset < RELATIVEJUMP_SIZE)
- return 0;
-
- /* Decode instructions */
- addr = paddr - offset;
- while (addr < paddr - offset + size) { /* Decode until function end */
- if (search_exception_tables(addr))
- /*
- * Since some fixup code will jumps into this function,
- * we can't optimize kprobe in this function.
- */
- return 0;
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- insn_get_length(&insn);
- /* Recover address */
- insn.kaddr = (void *)addr;
- insn.next_byte = (void *)(addr + insn.length);
- /* Check any instructions don't jump into target */
- if (insn_is_indirect_jump(&insn) ||
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
- RELATIVE_ADDR_SIZE))
- return 0;
- addr += insn.length;
- }
-
- return 1;
-}
-
-/* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
-{
- int i;
- struct kprobe *p;
-
- for (i = 1; i < op->optinsn.size; i++) {
- p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
- return -EEXIST;
- }
-
- return 0;
-}
-
-/* Check the addr is within the optimized instructions. */
-int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
- unsigned long addr)
-{
- return ((unsigned long)op->kp.addr <= addr &&
- (unsigned long)op->kp.addr + op->optinsn.size > addr);
-}
-
-/* Free optimized instruction slot */
-static __kprobes
-void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
-{
- if (op->optinsn.insn) {
- free_optinsn_slot(op->optinsn.insn, dirty);
- op->optinsn.insn = NULL;
- op->optinsn.size = 0;
- }
-}
-
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
-{
- __arch_remove_optimized_kprobe(op, 1);
-}
-
-/*
- * Copy replacing target instructions
- * Target instructions MUST be relocatable (checked inside)
- */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
-{
- u8 *buf;
- int ret;
- long rel;
-
- if (!can_optimize((unsigned long)op->kp.addr))
- return -EILSEQ;
-
- op->optinsn.insn = get_optinsn_slot();
- if (!op->optinsn.insn)
- return -ENOMEM;
-
- /*
- * Verify if the address gap is in 2GB range, because this uses
- * a relative jump.
- */
- rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
- if (abs(rel) > 0x7fffffff)
- return -ERANGE;
-
- buf = (u8 *)op->optinsn.insn;
-
- /* Copy instructions into the out-of-line buffer */
- ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
- if (ret < 0) {
- __arch_remove_optimized_kprobe(op, 0);
- return ret;
- }
- op->optinsn.size = ret;
-
- /* Copy arch-dep-instance from template */
- memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
-
- /* Set probe information */
- synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
-
- /* Set probe function call */
- synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
-
- /* Set returning jmp instruction at the tail of out-of-line buffer */
- synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
- (u8 *)op->kp.addr + op->optinsn.size);
-
- flush_icache_range((unsigned long) buf,
- (unsigned long) buf + TMPL_END_IDX +
- op->optinsn.size + RELATIVEJUMP_SIZE);
- return 0;
-}
-
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
- u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
- /* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
-
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Replace breakpoints (int3) with relative jumps.
- * Caller must call with locking kprobe_mutex and text_mutex.
- */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- WARN_ON(kprobe_disabled(&op->kp));
- /* Setup param */
- setup_optimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_del_init(&op->list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- /* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Recover original instructions and breakpoints from relative jumps.
- * Caller must call with locking kprobe_mutex.
- */
-extern void arch_unoptimize_kprobes(struct list_head *oplist,
- struct list_head *done_list)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- /* Setup param */
- setup_unoptimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_move(&op->list, done_list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3). */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
- u8 buf[RELATIVEJUMP_SIZE];
-
- /* Set int3 to first byte for kprobes */
- buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
-}
-
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter)
-{
- struct optimized_kprobe *op;
-
- if (p->flags & KPROBE_FLAG_OPTIMIZED) {
- /* This kprobe is really able to run optimized path. */
- op = container_of(p, struct optimized_kprobe, kp);
- /* Detour through copied instructions */
- regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
- if (!reenter)
- reset_current_kprobe();
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-static int __kprobes init_poke_params(void)
-{
- /* Allocate code buffer and parameter array */
- jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_bufs)
- return -ENOMEM;
-
- jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_params) {
- kfree(jump_poke_bufs);
- jump_poke_bufs = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-#else /* !CONFIG_OPTPROBES */
-static int __kprobes init_poke_params(void)
-{
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
- return init_poke_params();
+ return arch_init_optprobes();
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6..694d801 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void)
static __init int activate_jump_labels(void)
{
if (has_steal_clock) {
- jump_label_inc(&paravirt_steal_enabled);
+ static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
- jump_label_inc(&paravirt_steal_rq_enabled);
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
}
return 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index fe86493..73465aa 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -311,13 +311,33 @@ out:
return state;
}
+/*
+ * AMD microcode firmware naming convention, up to family 15h they are in
+ * the legacy file:
+ *
+ * amd-ucode/microcode_amd.bin
+ *
+ * This legacy file is always smaller than 2K in size.
+ *
+ * Starting at family 15h they are in family specific firmware files:
+ *
+ * amd-ucode/microcode_amd_fam15h.bin
+ * amd-ucode/microcode_amd_fam16h.bin
+ * ...
+ *
+ * These might be larger than 2K.
+ */
static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
- const char *fw_name = "amd-ucode/microcode_amd.bin";
+ char fw_name[36] = "amd-ucode/microcode_amd.bin";
const struct firmware *fw;
enum ucode_state ret = UCODE_NFOUND;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86 >= 0x15)
+ snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
- if (request_firmware(&fw, fw_name, device)) {
+ if (request_firmware(&fw, (const char *)fw_name, device)) {
pr_err("failed to load file %s\n", fw_name);
goto out;
}
@@ -340,7 +360,6 @@ out:
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
- pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
return UCODE_ERROR;
}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fda91c3..87a0f86 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -86,6 +86,7 @@
#include <asm/microcode.h>
#include <asm/processor.h>
+#include <asm/cpu_device_id.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
+#ifdef MODULE
+/* Autoload on Intel and AMD systems */
+static const struct x86_cpu_id microcode_id[] = {
+#ifdef CONFIG_MICROCODE_INTEL
+ { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+#ifdef CONFIG_MICROCODE_AMD
+ { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, microcode_id);
+#endif
+
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e88f37b..47acaf3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs);
}
+/*
+ * NMIs can hit breakpoints which will cause it to lose its
+ * NMI context with the CPU when the breakpoint does an iret.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * For i386, NMIs use the same stack as the kernel, and we can
+ * add a workaround to the iret problem in C. Simply have 3 states
+ * the NMI can be in.
+ *
+ * 1) not running
+ * 2) executing
+ * 3) latched
+ *
+ * When no NMI is in progress, it is in the "not running" state.
+ * When an NMI comes in, it goes into the "executing" state.
+ * Normally, if another NMI is triggered, it does not interrupt
+ * the running NMI and the HW will simply latch it so that when
+ * the first NMI finishes, it will restart the second NMI.
+ * (Note, the latch is binary, thus multiple NMIs triggering,
+ * when one is running, are ignored. Only one NMI is restarted.)
+ *
+ * If an NMI hits a breakpoint that executes an iret, another
+ * NMI can preempt it. We do not want to allow this new NMI
+ * to run, but we want to execute it when the first one finishes.
+ * We set the state to "latched", and the first NMI will perform
+ * an cmpxchg on the state, and if it doesn't successfully
+ * reset the state to "not running" it will restart the next
+ * NMI.
+ */
+enum nmi_states {
+ NMI_NOT_RUNNING,
+ NMI_EXECUTING,
+ NMI_LATCHED,
+};
+static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+
+#define nmi_nesting_preprocess(regs) \
+ do { \
+ if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
+ __get_cpu_var(nmi_state) = NMI_LATCHED; \
+ return; \
+ } \
+ nmi_restart: \
+ __get_cpu_var(nmi_state) = NMI_EXECUTING; \
+ } while (0)
+
+#define nmi_nesting_postprocess() \
+ do { \
+ if (cmpxchg(&__get_cpu_var(nmi_state), \
+ NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
+ goto nmi_restart; \
+ } while (0)
+#else /* x86_64 */
+/*
+ * In x86_64 things are a bit more difficult. This has the same problem
+ * where an NMI hitting a breakpoint that calls iret will remove the
+ * NMI context, allowing a nested NMI to enter. What makes this more
+ * difficult is that both NMIs and breakpoints have their own stack.
+ * When a new NMI or breakpoint is executed, the stack is set to a fixed
+ * point. If an NMI is nested, it will have its stack set at that same
+ * fixed address that the first NMI had, and will start corrupting the
+ * stack. This is handled in entry_64.S, but the same problem exists with
+ * the breakpoint stack.
+ *
+ * If a breakpoint is being processed, and the debug stack is being used,
+ * if an NMI comes in and also hits a breakpoint, the stack pointer
+ * will be set to the same fixed address as the breakpoint that was
+ * interrupted, causing that stack to be corrupted. To handle this case,
+ * check if the stack that was interrupted is the debug stack, and if
+ * so, change the IDT so that new breakpoints will use the current stack
+ * and not switch to the fixed address. On return of the NMI, switch back
+ * to the original IDT.
+ */
+static DEFINE_PER_CPU(int, update_debug_stack);
+
+static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+{
+ /*
+ * If we interrupted a breakpoint, it is possible that
+ * the nmi handler will have breakpoints too. We need to
+ * change the IDT such that breakpoints that happen here
+ * continue to use the NMI stack.
+ */
+ if (unlikely(is_debug_stack(regs->sp))) {
+ debug_stack_set_zero();
+ __get_cpu_var(update_debug_stack) = 1;
+ }
+}
+
+static inline void nmi_nesting_postprocess(void)
+{
+ if (unlikely(__get_cpu_var(update_debug_stack)))
+ debug_stack_reset();
+}
+#endif
+
dotraplinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
{
+ nmi_nesting_preprocess(regs);
+
nmi_enter();
inc_irq_stat(__nmi_count);
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
default_do_nmi(regs);
nmi_exit();
+
+ /* On i386, may loop back to preprocess */
+ nmi_nesting_postprocess();
}
void stop_nmi(void)
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
new file mode 100644
index 0000000..2c39dcd
--- /dev/null
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -0,0 +1,181 @@
+/*
+ * arch/x86/kernel/nmi-selftest.c
+ *
+ * Testsuite for NMI: IPIs
+ *
+ * Started by Don Zickus:
+ * (using lib/locking-selftest.c as a guide)
+ *
+ * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com>
+ */
+
+#include <linux/smp.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/apic.h>
+#include <asm/nmi.h>
+
+#define SUCCESS 0
+#define FAILURE 1
+#define TIMEOUT 2
+
+static int __initdata nmi_fail;
+
+/* check to see if NMI IPIs work on this machine */
+static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata;
+
+static int __initdata testcase_total;
+static int __initdata testcase_successes;
+static int __initdata expected_testcase_failures;
+static int __initdata unexpected_testcase_failures;
+static int __initdata unexpected_testcase_unknowns;
+
+static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
+{
+ unexpected_testcase_unknowns++;
+ return NMI_HANDLED;
+}
+
+static void __init init_nmi_testsuite(void)
+{
+ /* trap all the unknown NMIs we may generate */
+ register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+}
+
+static void __init cleanup_nmi_testsuite(void)
+{
+ unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
+}
+
+static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
+{
+ int cpu = raw_smp_processor_id();
+
+ if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask)))
+ return NMI_HANDLED;
+
+ return NMI_DONE;
+}
+
+static void __init test_nmi_ipi(struct cpumask *mask)
+{
+ unsigned long timeout;
+
+ if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+ NMI_FLAG_FIRST, "nmi_selftest")) {
+ nmi_fail = FAILURE;
+ return;
+ }
+
+ /* sync above data before sending NMI */
+ wmb();
+
+ apic->send_IPI_mask(mask, NMI_VECTOR);
+
+ /* Don't wait longer than a second */
+ timeout = USEC_PER_SEC;
+ while (!cpumask_empty(mask) && timeout--)
+ udelay(1);
+
+ /* What happens if we timeout, do we still unregister?? */
+ unregister_nmi_handler(NMI_LOCAL, "nmi_selftest");
+
+ if (!timeout)
+ nmi_fail = TIMEOUT;
+ return;
+}
+
+static void __init remote_ipi(void)
+{
+ cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
+ if (!cpumask_empty(to_cpumask(nmi_ipi_mask)))
+ test_nmi_ipi(to_cpumask(nmi_ipi_mask));
+}
+
+static void __init local_ipi(void)
+{
+ cpumask_clear(to_cpumask(nmi_ipi_mask));
+ cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
+ test_nmi_ipi(to_cpumask(nmi_ipi_mask));
+}
+
+static void __init reset_nmi(void)
+{
+ nmi_fail = 0;
+}
+
+static void __init dotest(void (*testcase_fn)(void), int expected)
+{
+ testcase_fn();
+ /*
+ * Filter out expected failures:
+ */
+ if (nmi_fail != expected) {
+ unexpected_testcase_failures++;
+
+ if (nmi_fail == FAILURE)
+ printk("FAILED |");
+ else if (nmi_fail == TIMEOUT)
+ printk("TIMEOUT|");
+ else
+ printk("ERROR |");
+ dump_stack();
+ } else {
+ testcase_successes++;
+ printk(" ok |");
+ }
+ testcase_total++;
+
+ reset_nmi();
+}
+
+static inline void __init print_testname(const char *testname)
+{
+ printk("%12s:", testname);
+}
+
+void __init nmi_selftest(void)
+{
+ init_nmi_testsuite();
+
+ /*
+ * Run the testsuite:
+ */
+ printk("----------------\n");
+ printk("| NMI testsuite:\n");
+ printk("--------------------\n");
+
+ print_testname("remote IPI");
+ dotest(remote_ipi, SUCCESS);
+ printk("\n");
+ print_testname("local IPI");
+ dotest(local_ipi, SUCCESS);
+ printk("\n");
+
+ cleanup_nmi_testsuite();
+
+ if (unexpected_testcase_failures) {
+ printk("--------------------\n");
+ printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
+ unexpected_testcase_failures, testcase_total);
+ printk("-----------------------------------------------------------------\n");
+ } else if (expected_testcase_failures && testcase_successes) {
+ printk("--------------------\n");
+ printk("%3d out of %3d testcases failed, as expected. |\n",
+ expected_testcase_failures, testcase_total);
+ printk("----------------------------------------------------\n");
+ } else if (expected_testcase_failures && !testcase_successes) {
+ printk("--------------------\n");
+ printk("All %3d testcases failed, as expected. |\n",
+ expected_testcase_failures);
+ printk("----------------------------------------\n");
+ } else {
+ printk("--------------------\n");
+ printk("Good, all %3d testcases passed! |\n",
+ testcase_successes);
+ printk("---------------------------------\n");
+ }
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e..ada2f99 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr)
__native_flush_tlb_single(addr);
}
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
static u64 native_steal_clock(int cpu)
{
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 34e06e8..0bc72e2 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/export.h>
+#include <asm/probe_roms.h>
#include <asm/pci-direct.h>
#include <asm/e820.h>
#include <asm/mmzone.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af..14baf78 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -21,6 +21,7 @@
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
struct kmem_cache *task_xstate_cachep;
@@ -377,8 +378,8 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -391,8 +392,8 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -450,8 +451,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
static void mwait_idle(void)
{
if (!need_resched()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -461,8 +462,8 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else
local_irq_enable();
}
@@ -474,13 +475,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start(POWER_CSTATE, 0, smp_processor_id());
- trace_cpu_idle(0, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
+ trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 485204f..9d7d484 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -45,6 +45,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
@@ -119,9 +120,7 @@ void cpu_idle(void)
}
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
@@ -214,6 +213,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
task_user_gs(p) = get_user_gs(regs);
+ p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;
@@ -299,22 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
- bool preload_fpu;
+ fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- __unlazy_fpu(prev_p);
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
* Reload esp0.
@@ -354,11 +343,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
- /* If we're going to preload the fpu context, make sure clts
- is run while we're batching the cpu state updates. */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -368,15 +352,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
- if (preload_fpu)
- __math_state_restore();
-
/*
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
+ switch_fpu_finish(next_p, fpu);
+
percpu_write(current_task, next_p);
return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9b9fe4a..292da13 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -43,6 +43,7 @@
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
@@ -156,9 +157,7 @@ void cpu_idle(void)
}
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
@@ -286,6 +285,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
set_tsk_thread_flag(p, TIF_FORK);
+ p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
@@ -341,6 +341,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
+ current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
percpu_write(old_rsp, new_sp);
@@ -386,18 +387,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
- bool preload_fpu;
+ fpu_switch_t fpu;
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
* Reload esp0, LDT and the page table pointer:
@@ -427,13 +419,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
- /* Must be after DS reload */
- __unlazy_fpu(prev_p);
-
- /* Make sure cpu is ready for new context */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -474,6 +459,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
+ switch_fpu_finish(next_p, fpu);
+
/*
* Switch the PDA and FPU contexts.
*/
@@ -492,13 +479,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
- /*
- * Preload the FPU context, now that we've determined that the
- * task is likely to be using it.
- */
- if (preload_fpu)
- __math_state_restore();
-
return prev_p;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7..78f05e4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -27,6 +27,7 @@
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>
@@ -1392,20 +1393,18 @@ long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
- if (unlikely(current->audit_context)) {
- if (IS_IA32)
- audit_syscall_entry(AUDIT_ARCH_I386,
- regs->orig_ax,
- regs->bx, regs->cx,
- regs->dx, regs->si);
+ if (IS_IA32)
+ audit_syscall_entry(AUDIT_ARCH_I386,
+ regs->orig_ax,
+ regs->bx, regs->cx,
+ regs->dx, regs->si);
#ifdef CONFIG_X86_64
- else
- audit_syscall_entry(AUDIT_ARCH_X86_64,
- regs->orig_ax,
- regs->di, regs->si,
- regs->dx, regs->r10);
+ else
+ audit_syscall_entry(AUDIT_ARCH_X86_64,
+ regs->orig_ax,
+ regs->di, regs->si,
+ regs->dx, regs->r10);
#endif
- }
return ret ?: regs->orig_ax;
}
@@ -1414,8 +1413,7 @@ void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
- if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 37a458b..d840e69 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,6 +39,14 @@ static int reboot_mode;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
+/* This variable is used privately to keep track of whether or not
+ * reboot_type is still set to its default value (i.e., reboot= hasn't
+ * been set on the command line). This is needed so that we can
+ * suppress DMI scanning for reboot quirks. Without it, it's
+ * impossible to override a faulty reboot quirk without recompiling.
+ */
+static int reboot_default = 1;
+
#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
static int reboot_cpu = -1;
#endif
@@ -67,6 +75,12 @@ bool port_cf9_safe = false;
static int __init reboot_setup(char *str)
{
for (;;) {
+ /* Having anything passed on the command line via
+ * reboot= will cause us to disable DMI checking
+ * below.
+ */
+ reboot_default = 0;
+
switch (*str) {
case 'w':
reboot_mode = 0x1234;
@@ -295,14 +309,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
- { /* Handle problems with rebooting on VersaLogic Menlow boards */
- .callback = set_bios_reboot,
- .ident = "VersaLogic Menlow based board",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
- DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
- },
- },
{ /* Handle reboot issue on Acer Aspire one */
.callback = set_kbd_reboot,
.ident = "Acer Aspire One A110",
@@ -316,7 +322,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
static int __init reboot_init(void)
{
- dmi_check_system(reboot_dmi_table);
+ /* Only do the DMI check if reboot_type hasn't been overridden
+ * on the command line
+ */
+ if (reboot_default) {
+ dmi_check_system(reboot_dmi_table);
+ }
return 0;
}
core_initcall(reboot_init);
@@ -465,7 +476,12 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
static int __init pci_reboot_init(void)
{
- dmi_check_system(pci_reboot_dmi_table);
+ /* Only do the DMI check if reboot_type hasn't been overridden
+ * on the command line
+ */
+ if (reboot_default) {
+ dmi_check_system(pci_reboot_dmi_table);
+ }
return 0;
}
core_initcall(pci_reboot_init);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d05444a..8863888 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -749,15 +749,16 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-#ifdef CONFIG_X86_32
- "EL32",
-#else
- "EL64",
-#endif
- 4)) {
+ "EL32", 4)) {
+ efi_enabled = 1;
+ efi_64bit = false;
+ } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+ "EL64", 4)) {
efi_enabled = 1;
- efi_memblock_x86_reserve_range();
+ efi_64bit = true;
}
+ if (efi_enabled && efi_memblock_x86_reserve_range())
+ efi_enabled = 0;
#endif
x86_init.oem.arch_setup();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 46a01bdc..25edcfc 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -24,6 +24,7 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/vdso.h>
#include <asm/mce.h>
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 16204dc..66c74f4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -29,6 +29,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask)
free_cpumask_var(allbutself);
}
+static atomic_t stopping_cpu = ATOMIC_INIT(-1);
+
+static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+{
+ /* We are registered on stopping cpu too, avoid spurious NMI */
+ if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
+ return NMI_HANDLED;
+
+ stop_this_cpu(NULL);
+
+ return NMI_HANDLED;
+}
+
+static void native_nmi_stop_other_cpus(int wait)
+{
+ unsigned long flags;
+ unsigned long timeout;
+
+ if (reboot_force)
+ return;
+
+ /*
+ * Use an own vector here because smp_call_function
+ * does lots of things not suitable in a panic situation.
+ */
+ if (num_online_cpus() > 1) {
+ /* did someone beat us here? */
+ if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
+ return;
+
+ if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
+ NMI_FLAG_FIRST, "smp_stop"))
+ /* Note: we ignore failures here */
+ return;
+
+ /* sync above data before sending NMI */
+ wmb();
+
+ apic->send_IPI_allbutself(NMI_VECTOR);
+
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
+ udelay(1);
+ }
+
+ local_irq_save(flags);
+ disable_local_APIC();
+ local_irq_restore(flags);
+}
+
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit();
}
-static void native_stop_other_cpus(int wait)
+static void native_irq_stop_other_cpus(int wait)
{
unsigned long flags;
unsigned long timeout;
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait)
local_irq_restore(flags);
}
+static void native_smp_disable_nmi_ipi(void)
+{
+ smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
+}
+
/*
* Reschedule call back.
*/
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
irq_exit();
}
+static int __init nonmi_ipi_setup(char *str)
+{
+ native_smp_disable_nmi_ipi();
+ return 1;
+}
+
+__setup("nonmi_ipi", nonmi_ipi_setup);
+
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .stop_other_cpus = native_stop_other_cpus,
+ .stop_other_cpus = native_nmi_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e38e217..e578a79 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -207,23 +207,29 @@ static void __cpuinit smp_callin(void)
* Need to setup vector mappings before we enable interrupts.
*/
setup_vector_irq(smp_processor_id());
+
+ /*
+ * Save our processor parameters. Note: this information
+ * is needed for clock calibration.
+ */
+ smp_store_cpu_info(cpuid);
+
/*
* Get our bogomips.
+ * Update loops_per_jiffy in cpu_data. Previous call to
+ * smp_store_cpu_info() stored a value that is close but not as
+ * accurate as the value just calculated.
*
* Need to enable IRQs because it can take longer and then
* the NMI watchdog might kill us.
*/
local_irq_enable();
calibrate_delay();
+ cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
local_irq_disable();
pr_debug("Stack at about %p\n", &cpuid);
/*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- /*
* This must be done before setting cpu_online_mask
* or calling notify_cpu_starting.
*/
@@ -285,19 +291,6 @@ notrace static void __cpuinit start_secondary(void *unused)
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
x86_platform.nmi_init();
- /*
- * Wait until the cpu which brought this one up marked it
- * online before enabling interrupts. If we don't do that then
- * we can end up waking up the softirq thread before this cpu
- * reached the active state, which makes the scheduler unhappy
- * and schedule the softirq thread on the wrong cpu. This is
- * only observable with forced threaded interrupts, but in
- * theory it could also happen w/o them. It's just way harder
- * to achieve.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
-
/* enable local interrupts */
local_irq_enable();
@@ -734,8 +727,6 @@ do_rest:
* the targeted processor.
*/
- printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
-
atomic_set(&init_deasserted, 0);
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -785,9 +776,10 @@ do_rest:
schedule();
}
- if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
+ print_cpu_msr(&cpu_data(cpu));
pr_debug("CPU%d: has booted.\n", cpu);
- else {
+ } else {
boot_error = 1;
if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
== 0xA5A5A5A5)
@@ -841,7 +833,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
!physid_isset(apicid, phys_cpu_present_map) ||
- (!x2apic_mode && apicid >= 255)) {
+ !apic->apic_id_valid(apicid)) {
printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
return -EINVAL;
}
@@ -1143,6 +1135,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done.\n");
+ nmi_selftest();
impress_friends();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 0514890..ef59642 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
+ unsigned long addr = addr0, start_addr;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
@@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
mm->free_area_cache = mm->mmap_base;
}
+try_again:
/* either no address requested or can't fit in requested address hole */
- addr = mm->free_area_cache;
-
- /* make sure it can fit in the remaining address space */
- if (addr > len) {
- unsigned long tmp_addr = align_addr(addr - len, filp,
- ALIGN_TOPDOWN);
-
- vma = find_vma(mm, tmp_addr);
- if (!vma || tmp_addr + len <= vma->vm_start)
- /* remember the address as a hint for next time */
- return mm->free_area_cache = tmp_addr;
- }
-
- if (mm->mmap_base < len)
- goto bottomup;
+ start_addr = addr = mm->free_area_cache;
- addr = mm->mmap_base-len;
+ if (addr < len)
+ goto fail;
+ addr -= len;
do {
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
@@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = vma->vm_start-len;
} while (len < vma->vm_start);
+fail:
+ /*
+ * if hint left us with no space for the requested
+ * mapping then try again:
+ */
+ if (start_addr != mm->mmap_base) {
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = 0;
+ goto try_again;
+ }
+
bottomup:
/*
* A failed mmap() very likely causes application failure,
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
new file mode 100644
index 0000000..147fcd4
--- /dev/null
+++ b/arch/x86/kernel/syscall_32.c
@@ -0,0 +1,25 @@
+/* System call table for i386. */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+#include <asm/asm-offsets.h>
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#include <asm/syscalls_32.h>
+#undef __SYSCALL_I386
+
+#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+
+typedef asmlinkage void (*sys_call_ptr_t)(void);
+
+extern asmlinkage void sys_ni_syscall(void);
+
+const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_32.h>
+};
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index de87d60..7ac7943 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -5,15 +5,11 @@
#include <linux/cache.h>
#include <asm/asm-offsets.h>
-#define __NO_STUBS
+#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
+#include <asm/syscalls_64.h>
+#undef __SYSCALL_64
-#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_UNISTD_64_H
-#include <asm/unistd_64.h>
-
-#undef __SYSCALL
-#define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_UNISTD_64_H
+#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
typedef void (*sys_call_ptr_t)(void);
@@ -21,9 +17,9 @@ extern void sys_ni_syscall(void);
const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
- *Smells like a like a compiler bug -- it doesn't work
- *when the & below is removed.
- */
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
[0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm/unistd_64.h>
+#include <asm/syscalls_64.h>
};
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
deleted file mode 100644
index 9a0e312..0000000
--- a/arch/x86/kernel/syscall_table_32.S
+++ /dev/null
@@ -1,350 +0,0 @@
-ENTRY(sys_call_table)
- .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
- .long sys_exit
- .long ptregs_fork
- .long sys_read
- .long sys_write
- .long sys_open /* 5 */
- .long sys_close
- .long sys_waitpid
- .long sys_creat
- .long sys_link
- .long sys_unlink /* 10 */
- .long ptregs_execve
- .long sys_chdir
- .long sys_time
- .long sys_mknod
- .long sys_chmod /* 15 */
- .long sys_lchown16
- .long sys_ni_syscall /* old break syscall holder */
- .long sys_stat
- .long sys_lseek
- .long sys_getpid /* 20 */
- .long sys_mount
- .long sys_oldumount
- .long sys_setuid16
- .long sys_getuid16
- .long sys_stime /* 25 */
- .long sys_ptrace
- .long sys_alarm
- .long sys_fstat
- .long sys_pause
- .long sys_utime /* 30 */
- .long sys_ni_syscall /* old stty syscall holder */
- .long sys_ni_syscall /* old gtty syscall holder */
- .long sys_access
- .long sys_nice
- .long sys_ni_syscall /* 35 - old ftime syscall holder */
- .long sys_sync
- .long sys_kill
- .long sys_rename
- .long sys_mkdir
- .long sys_rmdir /* 40 */
- .long sys_dup
- .long sys_pipe
- .long sys_times
- .long sys_ni_syscall /* old prof syscall holder */
- .long sys_brk /* 45 */
- .long sys_setgid16
- .long sys_getgid16
- .long sys_signal
- .long sys_geteuid16
- .long sys_getegid16 /* 50 */
- .long sys_acct
- .long sys_umount /* recycled never used phys() */
- .long sys_ni_syscall /* old lock syscall holder */
- .long sys_ioctl
- .long sys_fcntl /* 55 */
- .long sys_ni_syscall /* old mpx syscall holder */
- .long sys_setpgid
- .long sys_ni_syscall /* old ulimit syscall holder */
- .long sys_olduname
- .long sys_umask /* 60 */
- .long sys_chroot
- .long sys_ustat
- .long sys_dup2
- .long sys_getppid
- .long sys_getpgrp /* 65 */
- .long sys_setsid
- .long sys_sigaction
- .long sys_sgetmask
- .long sys_ssetmask
- .long sys_setreuid16 /* 70 */
- .long sys_setregid16
- .long sys_sigsuspend
- .long sys_sigpending
- .long sys_sethostname
- .long sys_setrlimit /* 75 */
- .long sys_old_getrlimit
- .long sys_getrusage
- .long sys_gettimeofday
- .long sys_settimeofday
- .long sys_getgroups16 /* 80 */
- .long sys_setgroups16
- .long sys_old_select
- .long sys_symlink
- .long sys_lstat
- .long sys_readlink /* 85 */
- .long sys_uselib
- .long sys_swapon
- .long sys_reboot
- .long sys_old_readdir
- .long sys_old_mmap /* 90 */
- .long sys_munmap
- .long sys_truncate
- .long sys_ftruncate
- .long sys_fchmod
- .long sys_fchown16 /* 95 */
- .long sys_getpriority
- .long sys_setpriority
- .long sys_ni_syscall /* old profil syscall holder */
- .long sys_statfs
- .long sys_fstatfs /* 100 */
- .long sys_ioperm
- .long sys_socketcall
- .long sys_syslog
- .long sys_setitimer
- .long sys_getitimer /* 105 */
- .long sys_newstat
- .long sys_newlstat
- .long sys_newfstat
- .long sys_uname
- .long ptregs_iopl /* 110 */
- .long sys_vhangup
- .long sys_ni_syscall /* old "idle" system call */
- .long ptregs_vm86old
- .long sys_wait4
- .long sys_swapoff /* 115 */
- .long sys_sysinfo
- .long sys_ipc
- .long sys_fsync
- .long ptregs_sigreturn
- .long ptregs_clone /* 120 */
- .long sys_setdomainname
- .long sys_newuname
- .long sys_modify_ldt
- .long sys_adjtimex
- .long sys_mprotect /* 125 */
- .long sys_sigprocmask
- .long sys_ni_syscall /* old "create_module" */
- .long sys_init_module
- .long sys_delete_module
- .long sys_ni_syscall /* 130: old "get_kernel_syms" */
- .long sys_quotactl
- .long sys_getpgid
- .long sys_fchdir
- .long sys_bdflush
- .long sys_sysfs /* 135 */
- .long sys_personality
- .long sys_ni_syscall /* reserved for afs_syscall */
- .long sys_setfsuid16
- .long sys_setfsgid16
- .long sys_llseek /* 140 */
- .long sys_getdents
- .long sys_select
- .long sys_flock
- .long sys_msync
- .long sys_readv /* 145 */
- .long sys_writev
- .long sys_getsid
- .long sys_fdatasync
- .long sys_sysctl
- .long sys_mlock /* 150 */
- .long sys_munlock
- .long sys_mlockall
- .long sys_munlockall
- .long sys_sched_setparam
- .long sys_sched_getparam /* 155 */
- .long sys_sched_setscheduler
- .long sys_sched_getscheduler
- .long sys_sched_yield
- .long sys_sched_get_priority_max
- .long sys_sched_get_priority_min /* 160 */
- .long sys_sched_rr_get_interval
- .long sys_nanosleep
- .long sys_mremap
- .long sys_setresuid16
- .long sys_getresuid16 /* 165 */
- .long ptregs_vm86
- .long sys_ni_syscall /* Old sys_query_module */
- .long sys_poll
- .long sys_ni_syscall /* Old nfsservctl */
- .long sys_setresgid16 /* 170 */
- .long sys_getresgid16
- .long sys_prctl
- .long ptregs_rt_sigreturn
- .long sys_rt_sigaction
- .long sys_rt_sigprocmask /* 175 */
- .long sys_rt_sigpending
- .long sys_rt_sigtimedwait
- .long sys_rt_sigqueueinfo
- .long sys_rt_sigsuspend
- .long sys_pread64 /* 180 */
- .long sys_pwrite64
- .long sys_chown16
- .long sys_getcwd
- .long sys_capget
- .long sys_capset /* 185 */
- .long ptregs_sigaltstack
- .long sys_sendfile
- .long sys_ni_syscall /* reserved for streams1 */
- .long sys_ni_syscall /* reserved for streams2 */
- .long ptregs_vfork /* 190 */
- .long sys_getrlimit
- .long sys_mmap_pgoff
- .long sys_truncate64
- .long sys_ftruncate64
- .long sys_stat64 /* 195 */
- .long sys_lstat64
- .long sys_fstat64
- .long sys_lchown
- .long sys_getuid
- .long sys_getgid /* 200 */
- .long sys_geteuid
- .long sys_getegid
- .long sys_setreuid
- .long sys_setregid
- .long sys_getgroups /* 205 */
- .long sys_setgroups
- .long sys_fchown
- .long sys_setresuid
- .long sys_getresuid
- .long sys_setresgid /* 210 */
- .long sys_getresgid
- .long sys_chown
- .long sys_setuid
- .long sys_setgid
- .long sys_setfsuid /* 215 */
- .long sys_setfsgid
- .long sys_pivot_root
- .long sys_mincore
- .long sys_madvise
- .long sys_getdents64 /* 220 */
- .long sys_fcntl64
- .long sys_ni_syscall /* reserved for TUX */
- .long sys_ni_syscall
- .long sys_gettid
- .long sys_readahead /* 225 */
- .long sys_setxattr
- .long sys_lsetxattr
- .long sys_fsetxattr
- .long sys_getxattr
- .long sys_lgetxattr /* 230 */
- .long sys_fgetxattr
- .long sys_listxattr
- .long sys_llistxattr
- .long sys_flistxattr
- .long sys_removexattr /* 235 */
- .long sys_lremovexattr
- .long sys_fremovexattr
- .long sys_tkill
- .long sys_sendfile64
- .long sys_futex /* 240 */
- .long sys_sched_setaffinity
- .long sys_sched_getaffinity
- .long sys_set_thread_area
- .long sys_get_thread_area
- .long sys_io_setup /* 245 */
- .long sys_io_destroy
- .long sys_io_getevents
- .long sys_io_submit
- .long sys_io_cancel
- .long sys_fadvise64 /* 250 */
- .long sys_ni_syscall
- .long sys_exit_group
- .long sys_lookup_dcookie
- .long sys_epoll_create
- .long sys_epoll_ctl /* 255 */
- .long sys_epoll_wait
- .long sys_remap_file_pages
- .long sys_set_tid_address
- .long sys_timer_create
- .long sys_timer_settime /* 260 */
- .long sys_timer_gettime
- .long sys_timer_getoverrun
- .long sys_timer_delete
- .long sys_clock_settime
- .long sys_clock_gettime /* 265 */
- .long sys_clock_getres
- .long sys_clock_nanosleep
- .long sys_statfs64
- .long sys_fstatfs64
- .long sys_tgkill /* 270 */
- .long sys_utimes
- .long sys_fadvise64_64
- .long sys_ni_syscall /* sys_vserver */
- .long sys_mbind
- .long sys_get_mempolicy
- .long sys_set_mempolicy
- .long sys_mq_open
- .long sys_mq_unlink
- .long sys_mq_timedsend
- .long sys_mq_timedreceive /* 280 */
- .long sys_mq_notify
- .long sys_mq_getsetattr
- .long sys_kexec_load
- .long sys_waitid
- .long sys_ni_syscall /* 285 */ /* available */
- .long sys_add_key
- .long sys_request_key
- .long sys_keyctl
- .long sys_ioprio_set
- .long sys_ioprio_get /* 290 */
- .long sys_inotify_init
- .long sys_inotify_add_watch
- .long sys_inotify_rm_watch
- .long sys_migrate_pages
- .long sys_openat /* 295 */
- .long sys_mkdirat
- .long sys_mknodat
- .long sys_fchownat
- .long sys_futimesat
- .long sys_fstatat64 /* 300 */
- .long sys_unlinkat
- .long sys_renameat
- .long sys_linkat
- .long sys_symlinkat
- .long sys_readlinkat /* 305 */
- .long sys_fchmodat
- .long sys_faccessat
- .long sys_pselect6
- .long sys_ppoll
- .long sys_unshare /* 310 */
- .long sys_set_robust_list
- .long sys_get_robust_list
- .long sys_splice
- .long sys_sync_file_range
- .long sys_tee /* 315 */
- .long sys_vmsplice
- .long sys_move_pages
- .long sys_getcpu
- .long sys_epoll_pwait
- .long sys_utimensat /* 320 */
- .long sys_signalfd
- .long sys_timerfd_create
- .long sys_eventfd
- .long sys_fallocate
- .long sys_timerfd_settime /* 325 */
- .long sys_timerfd_gettime
- .long sys_signalfd4
- .long sys_eventfd2
- .long sys_epoll_create1
- .long sys_dup3 /* 330 */
- .long sys_pipe2
- .long sys_inotify_init1
- .long sys_preadv
- .long sys_pwritev
- .long sys_rt_tgsigqueueinfo /* 335 */
- .long sys_perf_event_open
- .long sys_recvmmsg
- .long sys_fanotify_init
- .long sys_fanotify_mark
- .long sys_prlimit64 /* 340 */
- .long sys_name_to_handle_at
- .long sys_open_by_handle_at
- .long sys_clock_adjtime
- .long sys_syncfs
- .long sys_sendmmsg /* 345 */
- .long sys_setns
- .long sys_process_vm_readv
- .long sys_process_vm_writev
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dd5fbf4..c6eba2b 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
- /* Keep nmi watchdog up to date */
- inc_irq_stat(irq0_irqs);
-
global_clock_event->event_handler(global_clock_event);
/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fa1191fb..ec61d4c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,6 +54,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/mach_traps.h>
@@ -311,9 +312,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP)
return;
+ /*
+ * Let others (NMI) know that the debug stack is in use
+ * as we may switch to the interrupt stack.
+ */
+ debug_stack_usage_inc();
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
}
#ifdef CONFIG_X86_64
@@ -406,6 +413,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
SIGTRAP) == NOTIFY_STOP)
return;
+ /*
+ * Let others (NMI) know that the debug stack is in use
+ * as we may switch to the interrupt stack.
+ */
+ debug_stack_usage_inc();
+
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
@@ -413,6 +426,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
return;
}
@@ -432,6 +446,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
return;
}
@@ -557,41 +572,18 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
}
/*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use. Used during context switch.
- */
-void __math_state_restore(void)
-{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
-
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(tsk))) {
- stts();
- force_sig(SIGSEGV, tsk);
- return;
- }
-
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
-}
-
-/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
*
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works.
*
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
*/
-asmlinkage void math_state_restore(void)
+void math_state_restore(void)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
if (!tsk_used_math(tsk)) {
local_irq_enable();
@@ -608,9 +600,17 @@ asmlinkage void math_state_restore(void)
local_irq_disable();
}
- clts(); /* Allow maths ops (or we recurse) */
+ __thread_fpu_begin(tsk);
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ __thread_fpu_end(tsk);
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
- __math_state_restore();
+ tsk->fpu_counter++;
}
EXPORT_SYMBOL_GPL(math_state_restore);
@@ -718,4 +718,10 @@ void __init trap_init(void)
cpu_init();
x86_init.irqs.trap_init();
+
+#ifdef CONFIG_X86_64
+ memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
+ set_nmi_gate(1, &debug);
+ set_nmi_gate(3, &int3);
+#endif
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2c9cf0f..183c592 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -290,14 +290,15 @@ static inline int pit_verify_msb(unsigned char val)
static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
{
int count;
- u64 tsc = 0;
+ u64 tsc = 0, prev_tsc = 0;
for (count = 0; count < 50000; count++) {
if (!pit_verify_msb(val))
break;
+ prev_tsc = tsc;
tsc = get_cycles();
}
- *deltap = get_cycles() - tsc;
+ *deltap = get_cycles() - prev_tsc;
*tscp = tsc;
/*
@@ -311,9 +312,9 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
* How many MSB values do we want to see? We aim for
* a maximum error rate of 500ppm (in practice the
* real error is much smaller), but refuse to spend
- * more than 25ms on it.
+ * more than 50ms on it.
*/
-#define MAX_QUICK_PIT_MS 25
+#define MAX_QUICK_PIT_MS 50
#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
static unsigned long quick_pit_calibrate(void)
@@ -383,15 +384,12 @@ success:
*
* As a result, we can depend on there not being
* any odd delays anywhere, and the TSC reads are
- * reliable (within the error). We also adjust the
- * delta to the middle of the error bars, just
- * because it looks nicer.
+ * reliable (within the error).
*
* kHz = ticks / time-in-seconds / 1000;
* kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
* kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
*/
- delta += (long)(d2 - d1)/2;
delta *= PIT_TICK_RATE;
do_div(delta, i*256*1000);
printk("Fast TSC calibration using PIT\n");
@@ -622,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
@@ -995,3 +994,23 @@ void __init tsc_init(void)
check_system_tsc_reliable();
}
+#ifdef CONFIG_SMP
+/*
+ * If we have a constant TSC and are using the TSC for the delay loop,
+ * we can skip clock calibration if another cpu in the same socket has already
+ * been calibrated. This assumes that CONSTANT_TSC applies to all
+ * cpus in the socket - this should be a safe assumption.
+ */
+unsigned long __cpuinit calibrate_delay_is_known(void)
+{
+ int i, cpu = smp_processor_id();
+
+ if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
+ return 0;
+
+ for_each_online_cpu(i)
+ if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
+ return cpu_data(i).loops_per_jiffy;
+ return 0;
+}
+#endif
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b..fc25e60 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
/*
* TSC-warp measurement loop running on both CPUs:
*/
-static __cpuinit void check_tsc_warp(void)
+static __cpuinit void check_tsc_warp(unsigned int timeout)
{
cycles_t start, now, prev, end;
int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
start = get_cycles();
rdtsc_barrier();
/*
- * The measurement runs for 20 msecs:
+ * The measurement runs for 'timeout' msecs:
*/
- end = start + tsc_khz * 20ULL;
+ end = start + (cycles_t) tsc_khz * timeout;
now = start;
for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
}
/*
+ * If the target CPU coming online doesn't have any of its core-siblings
+ * online, a timeout of 20msec will be used for the TSC-warp measurement
+ * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
+ * information about this socket already (and this information grows as we
+ * have more and more logical-siblings in that socket).
+ *
+ * Ideally we should be able to skip the TSC sync check on the other
+ * core-siblings, if the first logical CPU in a socket passed the sync test.
+ * But as the TSC is per-logical CPU and can potentially be modified wrongly
+ * by the bios, TSC sync test for smaller duration should be able
+ * to catch such errors. Also this will catch the condition where all the
+ * cores in the socket doesn't get reset at the same time.
+ */
+static inline unsigned int loop_timeout(int cpu)
+{
+ return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+}
+
+/*
* Source CPU calls into this - it waits for the freshly booted
* target CPU to arrive and then starts the measurement:
*/
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
*/
atomic_inc(&start_count);
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(cpu));
while (atomic_read(&stop_count) != cpus-1)
cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
while (atomic_read(&start_count) != cpus)
cpu_relax();
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(smp_processor_id()));
/*
* Ok, we are done:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f875..328cb37 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
spinlock_t *ptl;
int i;
+ down_write(&mm->mmap_sem);
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
}
pte_unmap_unlock(pte, ptl);
out:
+ up_write(&mm->mmap_sem);
flush_tlb();
}
@@ -335,9 +337,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
if (info->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
- /*call audit_syscall_exit since we do not exit via the normal paths */
+ /*call __audit_syscall_exit since we do not exit via the normal paths */
+#ifdef CONFIG_AUDITSYSCALL
if (unlikely(current->audit_context))
- audit_syscall_exit(AUDITSC_RESULT(0), 0);
+ __audit_syscall_exit(1, 0);
+#endif
__asm__ __volatile__(
"movl %0,%%esp\n\t"
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a391134..e62728e 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -6,6 +6,7 @@
#include <linux/bootmem.h>
#include <linux/compat.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#ifdef CONFIG_IA32_EMULATION
#include <asm/sigcontext32.h>
#endif
@@ -47,7 +48,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
if (!fx)
return;
- BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
+ BUG_ON(__thread_has_fpu(tsk));
xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
@@ -168,7 +169,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ if (user_has_fpu()) {
if (use_xsave())
err = xsave_user(buf);
else
@@ -176,8 +177,7 @@ int save_i387_xstate(void __user *buf)
if (err)
return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
+ user_fpu_end();
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +292,7 @@ int restore_i387_xstate(void __user *buf)
return err;
}
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
+ user_fpu_begin();
if (use_xsave())
err = restore_user_xstate(buf);
else
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 05a562b..0982507 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1891,6 +1891,51 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
ss->p = 1;
}
+static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
+{
+ struct x86_emulate_ops *ops = ctxt->ops;
+ u32 eax, ebx, ecx, edx;
+
+ /*
+ * syscall should always be enabled in longmode - so only become
+ * vendor specific (cpuid) if other modes are active...
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return true;
+
+ eax = 0x00000000;
+ ecx = 0x00000000;
+ if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
+ /*
+ * Intel ("GenuineIntel")
+ * remark: Intel CPUs only support "syscall" in 64bit
+ * longmode. Also an 64bit guest with a
+ * 32bit compat-app running will #UD !! While this
+ * behaviour can be fixed (by emulating) into AMD
+ * response - CPUs of AMD can't behave like Intel.
+ */
+ if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+ return false;
+
+ /* AMD ("AuthenticAMD") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+ return true;
+
+ /* AMD ("AMDisbetter!") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
+ return true;
+ }
+
+ /* default: (not Intel, not AMD), apply Intel's stricter rules... */
+ return false;
+}
+
static int em_syscall(struct x86_emulate_ctxt *ctxt)
{
struct x86_emulate_ops *ops = ctxt->ops;
@@ -1904,9 +1949,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->mode == X86EMUL_MODE_VM86)
return emulate_ud(ctxt);
+ if (!(em_syscall_is_enabled(ctxt)))
+ return emulate_ud(ctxt);
+
ops->get_msr(ctxt, MSR_EFER, &efer);
setup_syscalls_segments(ctxt, &cs, &ss);
+ if (!(efer & EFER_SCE))
+ return emulate_ud(ctxt);
+
ops->get_msr(ctxt, MSR_STAR, &msr_data);
msr_data >>= 32;
cs_sel = (u16)(msr_data & 0xfffc);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cfdc6e0..31bfc69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1283,9 +1283,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
return;
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
@@ -1310,9 +1310,9 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
}
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a2a9b4..224b02c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -74,7 +74,7 @@ enum {
#endif
#ifdef MMU_DEBUG
-static int dbg = 0;
+static bool dbg = 0;
module_param(dbg, bool, 0644);
#endif
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc..ea7b4fd 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
}
static bool mmu_audit;
-static struct jump_label_key mmu_audit_key;
+static struct static_key mmu_audit_key;
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
- if (static_branch((&mmu_audit_key)))
+ if (static_key_false((&mmu_audit_key)))
__kvm_mmu_audit(vcpu, point);
}
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
if (mmu_audit)
return;
- jump_label_inc(&mmu_audit_key);
+ static_key_slow_inc(&mmu_audit_key);
mmu_audit = true;
}
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
if (!mmu_audit)
return;
- jump_label_dec(&mmu_audit_key);
+ static_key_slow_dec(&mmu_audit_key);
mmu_audit = false;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1561028..df5a703 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -92,9 +92,9 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
if (unlikely(npages != 1))
return -EFAULT;
- table = kmap_atomic(page, KM_USER0);
+ table = kmap_atomic(page);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table, KM_USER0);
+ kunmap_atomic(table);
kvm_release_page_dirty(page);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5fa553b..e385214 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -29,6 +29,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
+#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/kvm_para.h>
@@ -575,6 +576,8 @@ static void svm_hardware_disable(void *garbage)
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
cpu_svm_disable();
+
+ amd_pmu_disable_virt();
}
static int svm_hardware_enable(void *garbage)
@@ -622,6 +625,8 @@ static int svm_hardware_enable(void *garbage)
svm_init_erratum_383();
+ amd_pmu_enable_virt();
+
return 0;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 906a7e8..246490f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,29 +51,29 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static int __read_mostly enable_vpid = 1;
+static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
-static int __read_mostly flexpriority_enabled = 1;
+static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
-static int __read_mostly enable_ept = 1;
+static bool __read_mostly enable_ept = 1;
module_param_named(ept, enable_ept, bool, S_IRUGO);
-static int __read_mostly enable_unrestricted_guest = 1;
+static bool __read_mostly enable_unrestricted_guest = 1;
module_param_named(unrestricted_guest,
enable_unrestricted_guest, bool, S_IRUGO);
-static int __read_mostly emulate_invalid_guest_state = 0;
+static bool __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static int __read_mostly vmm_exclusive = 1;
+static bool __read_mostly vmm_exclusive = 1;
module_param(vmm_exclusive, bool, S_IRUGO);
-static int __read_mostly yield_on_hlt = 1;
+static bool __read_mostly yield_on_hlt = 1;
module_param(yield_on_hlt, bool, S_IRUGO);
-static int __read_mostly fasteoi = 1;
+static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
/*
@@ -81,7 +81,7 @@ module_param(fasteoi, bool, S_IRUGO);
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
* use VMX instructions.
*/
-static int __read_mostly nested = 0;
+static bool __read_mostly nested = 0;
module_param(nested, bool, S_IRUGO);
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
@@ -1457,7 +1457,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (current_thread_info()->status & TS_USEDFPU)
+ if (user_has_fpu())
clts();
load_gdt(&__get_cpu_var(host_gdt));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1171def..54696b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -57,6 +57,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h> /* Ugh! */
#include <asm/xcr.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
@@ -88,8 +89,8 @@ static void process_nmi(struct kvm_vcpu *vcpu);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
-int ignore_msrs = 0;
-module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+static bool ignore_msrs = 0;
+module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
bool kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
@@ -1162,12 +1163,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+ shared_kaddr = kmap_atomic(vcpu->time_page);
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
sizeof(vcpu->hv_clock));
- kunmap_atomic(shared_kaddr, KM_USER0);
+ kunmap_atomic(shared_kaddr);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
return 0;
@@ -1495,6 +1496,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
+ bool pr = false;
+
switch (msr) {
case MSR_EFER:
return set_efer(vcpu, data);
@@ -1635,6 +1638,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
+ case MSR_P6_PERFCTR0:
+ case MSR_P6_PERFCTR1:
+ pr = true;
+ case MSR_P6_EVNTSEL0:
+ case MSR_P6_EVNTSEL1:
+ if (kvm_pmu_msr(vcpu, msr))
+ return kvm_pmu_set_msr(vcpu, msr, data);
+
+ if (pr || data != 0)
+ pr_unimpl(vcpu, "disabled perfctr wrmsr: "
+ "0x%x data 0x%llx\n", msr, data);
+ break;
case MSR_K7_CLK_CTL:
/*
* Ignore all writes to this no longer documented MSR.
@@ -1835,6 +1850,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_FAM10H_MMIO_CONF_BASE:
data = 0;
break;
+ case MSR_P6_PERFCTR0:
+ case MSR_P6_PERFCTR1:
+ case MSR_P6_EVNTSEL0:
+ case MSR_P6_EVNTSEL1:
+ if (kvm_pmu_msr(vcpu, msr))
+ return kvm_pmu_get_msr(vcpu, msr, pdata);
+ data = 0;
+ break;
case MSR_IA32_UCODE_REV:
data = 0x100000000ULL;
break;
@@ -3826,7 +3849,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
goto emul_write;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
kaddr += offset_in_page(gpa);
switch (bytes) {
case 1:
@@ -3844,7 +3867,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
default:
BUG();
}
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
kvm_release_page_dirty(page);
if (!exchanged)
@@ -4180,6 +4203,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
}
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ struct kvm_cpuid_entry2 *cpuid = NULL;
+
+ if (eax && ecx)
+ cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt),
+ *eax, *ecx);
+
+ if (cpuid) {
+ *eax = cpuid->eax;
+ *ecx = cpuid->ecx;
+ if (ebx)
+ *ebx = cpuid->ebx;
+ if (edx)
+ *edx = cpuid->edx;
+ return true;
+ }
+
+ return false;
+}
+
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
@@ -4211,6 +4256,7 @@ static struct x86_emulate_ops emulate_ops = {
.get_fpu = emulator_get_fpu,
.put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
+ .get_cpuid = emulator_get_cpuid,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cf4603b..642d880 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -856,18 +856,23 @@ static void __init lguest_init_IRQ(void)
}
/*
- * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so
- * rather than set them in lguest_init_IRQ we are called here every time an
- * lguest device needs an interrupt.
- *
- * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
- * pass that up!
+ * Interrupt descriptors are allocated as-needed, but low-numbered ones are
+ * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it
+ * tells us the irq is already used: other errors (ie. ENOMEM) we take
+ * seriously.
*/
-void lguest_setup_irq(unsigned int irq)
+int lguest_setup_irq(unsigned int irq)
{
- irq_alloc_desc_at(irq, 0);
+ int err;
+
+ /* Returns -ve error or vector number. */
+ err = irq_alloc_desc_at(irq, 0);
+ if (err < 0 && err != -EEXIST)
+ return err;
+
irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
+ return 0;
}
/*
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 042f682..a0b4a35 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -1,59 +1,4 @@
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/types.h>
+#define ATOMIC64_EXPORT EXPORT_SYMBOL
-#include <asm/processor.h>
-#include <asm/cmpxchg.h>
+#include <linux/export.h>
#include <linux/atomic.h>
-
-long long atomic64_read_cx8(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_read_cx8);
-long long atomic64_set_cx8(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_set_cx8);
-long long atomic64_xchg_cx8(long long, unsigned high);
-EXPORT_SYMBOL(atomic64_xchg_cx8);
-long long atomic64_add_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_return_cx8);
-long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_return_cx8);
-long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_return_cx8);
-long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_return_cx8);
-long long atomic64_dec_if_positive_cx8(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
-int atomic64_inc_not_zero_cx8(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
-int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
-EXPORT_SYMBOL(atomic64_add_unless_cx8);
-
-#ifndef CONFIG_X86_CMPXCHG64
-long long atomic64_read_386(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_read_386);
-long long atomic64_set_386(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_set_386);
-long long atomic64_xchg_386(long long, unsigned high);
-EXPORT_SYMBOL(atomic64_xchg_386);
-long long atomic64_add_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_return_386);
-long long atomic64_sub_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_return_386);
-long long atomic64_inc_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_return_386);
-long long atomic64_dec_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_return_386);
-long long atomic64_add_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_386);
-long long atomic64_sub_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_386);
-long long atomic64_inc_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_386);
-long long atomic64_dec_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_386);
-long long atomic64_dec_if_positive_386(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_if_positive_386);
-int atomic64_inc_not_zero_386(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_not_zero_386);
-int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
-EXPORT_SYMBOL(atomic64_add_unless_386);
-#endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e8e7e0d..00933d5 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -137,13 +137,13 @@ BEGIN(dec_return)
RET_ENDP
#undef v
-#define v %ecx
+#define v %esi
BEGIN(add_unless)
- addl %eax, %esi
+ addl %eax, %ecx
adcl %edx, %edi
addl (v), %eax
adcl 4(v), %edx
- cmpl %eax, %esi
+ cmpl %eax, %ecx
je 3f
1:
movl %eax, (v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083..f5cc9eb 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
ENTRY(atomic64_xchg_cx8)
CFI_STARTPROC
- movl %ebx, %eax
- movl %ecx, %edx
1:
LOCK_PREFIX
cmpxchg8b (%esi)
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
movl %edx, %edi
movl %ecx, %ebp
- read64 %ebp
+ read64 %ecx
1:
movl %eax, %ebx
movl %edx, %ecx
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
SAVE ebx
/* these just push these two parameters on the stack */
SAVE edi
- SAVE esi
+ SAVE ecx
- movl %ecx, %ebp
- movl %eax, %esi
+ movl %eax, %ebp
movl %edx, %edi
- read64 %ebp
+ read64 %esi
1:
cmpl %eax, 0(%esp)
je 4f
2:
movl %eax, %ebx
movl %edx, %ecx
- addl %esi, %ebx
+ addl %ebp, %ebx
adcl %edi, %ecx
LOCK_PREFIX
- cmpxchg8b (%ebp)
+ cmpxchg8b (%esi)
jne 1b
movl $1, %eax
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
read64 %esi
1:
- testl %eax, %eax
- je 4f
-2:
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jz 3f
movl %eax, %ebx
- movl %edx, %ecx
+ xorl %ecx, %ecx
addl $1, %ebx
- adcl $0, %ecx
+ adcl %edx, %ecx
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
3:
RESTORE ebx
ret
-4:
- testl %edx, %edx
- jne 2b
- jmp 3b
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805b..6b34d04 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
ENTRY(copy_page)
CFI_STARTPROC
- subq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET 3*8
+ subq $2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
- movq 2*8(%rsp),%r13
- CFI_RESTORE r13
- addq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET -3*8
+ addq $2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba8..e395693 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
{
- unsigned long bclock, now;
+ u32 bclock, now, loops = __loops;
int cpu;
preempt_disable();
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fb..c1f01a8 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
return inat_primary_table[opcode];
}
-insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
insn_attr_t esc_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_escape_id(esc_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_escape_tables[n][0];
if (!table)
return 0;
- if (inat_has_variant(table[opcode]) && m) {
- table = inat_escape_tables[n][m];
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
if (!table)
return 0;
}
return table[opcode];
}
-insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
insn_attr_t grp_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_group_id(grp_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_group_tables[n][0];
if (!table)
return inat_group_common_attribute(grp_attr);
- if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
- table = inat_group_tables[n][m];
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
if (!table)
return inat_group_common_attribute(grp_attr);
}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3..25feb1a 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
void insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
- insn_byte_t op, pfx;
+ insn_byte_t op;
+ int pfx_id;
if (opcode->got)
return;
if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
opcode->bytes[opcode->nbytes++] = op;
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
if (inat_must_vex(insn->attr))
insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
void insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
- insn_byte_t pfx, mod;
+ insn_byte_t pfx_id, mod;
if (modrm->got)
return;
if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
modrm->value = mod;
modrm->nbytes = 1;
if (inat_is_group(insn->attr)) {
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_group_attribute(mod, pfx,
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
insn->attr = 0; /* This is bad */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0..1c273be 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
-
- movl %edx, %ecx
- shrl $3, %ecx
+ movq %rdx, %rcx
+ shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
@@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
-
- movl %edx, %ecx
+ movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
- /*
- * Use 32bit CMP here to avoid long NOP padding.
- */
- cmp $0x20, %edx
+ cmpq $0x20, %rdx
jb .Lhandle_tail
/*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
- subl $0x20, %edx
+ subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
@@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
- addq $0x20, %rdx
+ addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
- addq $0x20, %rdx
+ addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
- cmpq $16, %rdx
+ cmpl $16, %edx
jb .Lless_16bytes
/*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
- cmpq $8, %rdx
+ cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
- cmpq $4, %rdx
+ cmpl $4, %edx
jb .Lless_3bytes
/*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
- cmpl $0, %edx
- je .Lend
+ subl $1, %edx
+ jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
-.Lloop_1:
- movb (%rsi), %r8b
- movb %r8b, (%rdi)
- incq %rdi
- incq %rsi
- decl %edx
- jnz .Lloop_1
+ movzbl (%rsi), %ecx
+ jz .Lstore_1byte
+ movzbq 1(%rsi), %r8
+ movzbq (%rsi, %rdx), %r9
+ movb %r8b, 1(%rdi)
+ movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+ movb %cl, (%rdi)
.Lend:
retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454..2dcb380 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
movq %rdi,%r9
- movl %edx,%r8d
- andl $7,%r8d
- movl %edx,%ecx
- shrl $3,%ecx
+ movq %rdx,%rcx
+ andl $7,%edx
+ shrq $3,%rcx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
- mulq %rsi /* with rax, clobbers rdx */
+ imulq %rsi,%rax
rep stosq
- movl %r8d,%ecx
+ movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
@@ -50,7 +49,7 @@
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
- movl %edx,%ecx
+ movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
@@ -61,12 +60,11 @@ ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
- movq %rdx,%r11
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
- mul %rcx /* with rax, clobbers rdx */
+ imulq %rcx,%rax
/* align dst */
movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
- movl %r11d,%ecx
- shrl $6,%ecx
+ movq %rdx,%rcx
+ shrq $6,%rcx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
- decl %ecx
+ decq %rcx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
to predict jump tables. */
.p2align 4
.Lhandle_tail:
- movl %r11d,%ecx
+ movl %edx,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
jnz .Lloop_8
.Lhandle_7:
- movl %r11d,%ecx
- andl $7,%ecx
+ andl $7,%edx
jz .Lende
.p2align 4
.Lloop_1:
- decl %ecx
+ decl %edx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
CFI_RESTORE_STATE
.Lbad_alignment:
- cmpq $7,%r11
+ cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
- subq %r8,%r11
+ subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e218d5d..d9b094c 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -760,9 +760,9 @@ survive:
break;
}
- maddr = kmap_atomic(pg, KM_USER0);
+ maddr = kmap_atomic(pg);
memcpy(maddr + offset, from, len);
- kunmap_atomic(maddr, KM_USER0);
+ kunmap_atomic(maddr);
set_page_dirty_lock(pg);
put_page(pg);
up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5b83c51..8191379 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -219,7 +219,9 @@ ab: STOS/W/D/Q Yv,rAX
ac: LODS/B AL,Xb
ad: LODS/W/D/Q rAX,Xv
ae: SCAS/B AL,Yb
-af: SCAS/W/D/Q rAX,Xv
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
# 0xb0 - 0xbf
b0: MOV AL/R8L,Ib
b1: MOV CL/R9L,Ib
@@ -729,8 +731,8 @@ de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
-f3: ANDN Gy,By,Ey (v)
-f4: Grp17 (1A)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d74824..f0b4caf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -673,7 +673,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
- printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
@@ -684,7 +684,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
sig = 0;
/* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
+ printk(KERN_DEFAULT "CR2: %016lx\n", address);
oops_end(flags, regs, sig);
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index f4f29b1..6f31ee5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -51,11 +51,11 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
}
EXPORT_SYMBOL(kmap_atomic_prot);
-void *__kmap_atomic(struct page *page)
+void *kmap_atomic(struct page *page)
{
return kmap_atomic_prot(page, kmap_prot);
}
-EXPORT_SYMBOL(__kmap_atomic);
+EXPORT_SYMBOL(kmap_atomic);
/*
* This is the same as kmap_atomic() but can map memory that doesn't
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f581a18..f6679a7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
{
struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *prev_vma;
- unsigned long base = mm->mmap_base, addr = addr0;
+ struct vm_area_struct *vma;
+ unsigned long base = mm->mmap_base;
+ unsigned long addr = addr0;
unsigned long largest_hole = mm->cached_hole_size;
- int first_time = 1;
+ unsigned long start_addr;
/* don't allow allocations above current base */
if (mm->free_area_cache > base)
@@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
mm->free_area_cache = base;
}
try_again:
+ start_addr = mm->free_area_cache;
+
/* make sure it can fit in the remaining address space */
if (mm->free_area_cache < len)
goto fail;
@@ -333,24 +336,18 @@ try_again:
* Lookup failure means no vma is above this address,
* i.e. return with success:
*/
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+ vma = find_vma(mm, addr);
+ if (!vma)
return addr;
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr + len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end))) {
+ if (addr + len <= vma->vm_start) {
/* remember the address as a hint for next time */
mm->cached_hole_size = largest_hole;
return (mm->free_area_cache = addr);
- } else {
+ } else if (mm->free_area_cache == vma->vm_end) {
/* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end) {
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
+ mm->free_area_cache = vma->vm_start;
+ mm->cached_hole_size = largest_hole;
}
/* remember the largest hole we saw so far */
@@ -366,10 +363,9 @@ fail:
* if hint left us with no space for the requested
* mapping then try again:
*/
- if (first_time) {
+ if (start_addr != base) {
mm->free_area_cache = base;
largest_hole = 0;
- first_time = 0;
goto try_again;
}
/*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a298914..6cabf65 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -3,6 +3,7 @@
#include <linux/ioport.h>
#include <linux/swap.h>
#include <linux/memblock.h>
+#include <linux/bootmem.h> /* for max_low_pfn */
#include <asm/cacheflush.h>
#include <asm/e820.h>
@@ -15,6 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/proto.h>
+#include <asm/dma.h> /* for MAX_DMA_PFN */
unsigned long __initdata pgt_buf_start;
unsigned long __meminitdata pgt_buf_end;
@@ -392,3 +394,24 @@ void free_initrd_mem(unsigned long start, unsigned long end)
free_init_pages("initrd memory", start, PAGE_ALIGN(end));
}
#endif
+
+void __init zone_sizes_init(void)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
+#endif
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+#ifdef CONFIG_HIGHMEM
+ max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
+#endif
+
+ free_area_init_nodes(max_zone_pfns);
+}
+
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0c1da39..8663f6c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -668,22 +668,6 @@ void __init initmem_init(void)
}
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
-static void __init zone_sizes_init(void)
-{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] =
- virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-#endif
- max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
- max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-#endif
-
- free_area_init_nodes(max_zone_pfns);
-}
-
void __init setup_bootmem_allocator(void)
{
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
@@ -754,6 +738,17 @@ void __init mem_init(void)
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
+ /*
+ * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
+ * be done before free_all_bootmem(). Memblock use free low memory for
+ * temporary data (see find_range_array()) and for this purpose can use
+ * pages that was already passed to the buddy allocator, hence marked as
+ * not accessible in the page tables when compiled with
+ * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
+ * important here.
+ */
+ set_highmem_pages_init();
+
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
@@ -765,8 +760,6 @@ void __init mem_init(void)
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
- set_highmem_pages_init();
-
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a8a56ce..436a030 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -614,15 +614,6 @@ void __init initmem_init(void)
void __init paging_init(void)
{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
-
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-#endif
- max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
- max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
@@ -634,7 +625,7 @@ void __init paging_init(void)
*/
node_clear_state(0, N_NORMAL_MEMORY);
- free_area_init_nodes(max_zone_pfns);
+ zone_sizes_init();
}
/*
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 4b5ba85..845df68 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -75,9 +75,9 @@ static unsigned long mmap_rnd(void)
*/
if (current->flags & PF_RANDOMIZE) {
if (mmap_is_ia32())
- rnd = (long)get_random_int() % (1<<8);
+ rnd = get_random_int() % (1<<8);
else
- rnd = (long)(get_random_int() % (1<<28));
+ rnd = get_random_int() % (1<<28);
}
return rnd << PAGE_SHIFT;
}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index de54b9b..dc0b727 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -75,8 +75,8 @@ static LIST_HEAD(trace_list); /* struct remap_trace */
/* module parameters */
static unsigned long filter_offset;
-static int nommiotrace;
-static int trace_pc;
+static bool nommiotrace;
+static bool trace_pc;
module_param(filter_offset, ulong, 0);
module_param(nommiotrace, bool, 0);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 496f494..19d3fa0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu)
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
- * Note: node_to_cpumask() is not valid until after this is done.
+ * Note: cpumask_of_node() is not valid until after this is done.
* (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
*/
void __init setup_node_to_cpumask_map(void)
@@ -422,8 +422,9 @@ static int __init numa_alloc_distance(void)
* calls are ignored until the distance table is reset with
* numa_reset_distance().
*
- * If @from or @to is higher than the highest known node at the time of
- * table creation or @distance doesn't make sense, the call is ignored.
+ * If @from or @to is higher than the highest known node or lower than zero
+ * at the time of table creation or @distance doesn't make sense, the call
+ * is ignored.
* This is to allow simplification of specific NUMA config implementations.
*/
void __init numa_set_distance(int from, int to, int distance)
@@ -431,8 +432,9 @@ void __init numa_set_distance(int from, int to, int distance)
if (!numa_distance && numa_alloc_distance() < 0)
return;
- if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
- printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
+ from < 0 || to < 0) {
+ pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 46db568..53489ff 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,7 +28,7 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
return -ENOENT;
}
-static u64 mem_hole_size(u64 start, u64 end)
+static u64 __init mem_hole_size(u64 start, u64 end)
{
unsigned long start_pfn = PFN_UP(start);
unsigned long end_pfn = PFN_DOWN(end);
@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
eb->nid = nid;
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = pb->nid;
+ emu_nid_to_phys[nid] = nid;
pb->start += size;
if (pb->start >= pb->end) {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index eda2acb..e1ebde3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1334,12 +1334,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
}
/*
- * If page allocator is not up yet then do not call c_p_a():
- */
- if (!debug_pagealloc_enabled)
- return;
-
- /*
* The return value is ignored as the calls cannot fail.
* Large pages for identity mappings are not used at boot time
* and hence no memory allocations during large page split.
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index fd61b3f..1c1c4f4 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -109,6 +109,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
+ if (acpi_srat_revision >= 2)
+ pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -160,6 +162,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
+ if (acpi_srat_revision <= 1)
+ pxm &= 0xff;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7b65f75..5671752 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -151,17 +151,18 @@ void bpf_jit_compile(struct sk_filter *fp)
cleanup_addr = proglen; /* epilogue address */
for (pass = 0; pass < 10; pass++) {
+ u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
/* no prologue/epilogue for trivial filters (RET something) */
proglen = 0;
prog = temp;
- if (seen) {
+ if (seen_or_pass0) {
EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
/* note : must save %rbx in case bpf_error is hit */
- if (seen & (SEEN_XREG | SEEN_DATAREF))
+ if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
CLEAR_X(); /* make sure we dont leek kernel memory */
/*
@@ -170,7 +171,7 @@ void bpf_jit_compile(struct sk_filter *fp)
* r9 = skb->len - skb->data_len
* r8 = skb->data
*/
- if (seen & SEEN_DATAREF) {
+ if (seen_or_pass0 & SEEN_DATAREF) {
if (offsetof(struct sk_buff, len) <= 127)
/* mov off8(%rdi),%r9d */
EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
@@ -260,9 +261,14 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_ALU_DIV_X: /* A /= X; */
seen |= SEEN_XREG;
EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
- if (pc_ret0 != -1)
- EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
- else {
+ if (pc_ret0 > 0) {
+ /* addrs[pc_ret0 - 1] is start address of target
+ * (addrs[i] - 4) is the address following this jmp
+ * ("xor %edx,%edx; div %ebx" being 4 bytes long)
+ */
+ EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
+ (addrs[i] - 4));
+ } else {
EMIT_COND_JMP(X86_JNE, 2 + 5);
CLEAR_A();
EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
@@ -335,12 +341,12 @@ void bpf_jit_compile(struct sk_filter *fp)
}
/* fallinto */
case BPF_S_RET_A:
- if (seen) {
+ if (seen_or_pass0) {
if (i != flen - 1) {
EMIT_JMP(cleanup_addr - addrs[i]);
break;
}
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
EMIT1(0xc9); /* leaveq */
}
@@ -469,8 +475,10 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_LD_W_ABS:
func = sk_load_word;
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0)
+ if ((int)K < 0) {
+ /* Abort the JIT because __load_pointer() is needed. */
goto out;
+ }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
@@ -483,13 +491,8 @@ common_load: seen |= SEEN_DATAREF;
goto common_load;
case BPF_S_LDX_B_MSH:
if ((int)K < 0) {
- if (pc_ret0 != -1) {
- EMIT_JMP(addrs[pc_ret0] - addrs[i]);
- break;
- }
- CLEAR_A();
- EMIT_JMP(cleanup_addr - addrs[i]);
- break;
+ /* Abort the JIT because __load_pointer() is needed. */
+ goto out;
}
seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = sk_load_byte_msh - (image + addrs[i]);
@@ -599,13 +602,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
* use it to give the cleanup instruction(s) addr
*/
cleanup_addr = proglen - 1; /* ret */
- if (seen)
+ if (seen_or_pass0)
cleanup_addr -= 1; /* leaveq */
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
if (image) {
- WARN_ON(proglen != oldproglen);
+ if (proglen != oldproglen)
+ pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
break;
}
if (proglen == oldproglen) {
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 6b8759f..e76e18c 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -15,11 +15,12 @@ obj-$(CONFIG_X86_VISWS) += visws.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
-obj-$(CONFIG_X86_MRST) += mrst.o
+obj-$(CONFIG_X86_INTEL_MID) += mrst.o
obj-y += common.o early.o
-obj-y += amd_bus.o bus_numa.o
+obj-y += bus_numa.o
+obj-$(CONFIG_AMD_NB) += amd_bus.o
obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o
ifeq ($(CONFIG_PCI_DEBUG),y)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a312e76..49a5cb5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -60,6 +60,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
+ {
+ .callback = set_use_crs,
+ .ident = "MSI MS-7253",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
+ DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ },
+ },
/* Now for the blacklist.. */
@@ -282,9 +292,6 @@ static void add_resources(struct pci_root_info *info)
int i;
struct resource *res, *root, *conflict;
- if (!pci_use_crs)
- return;
-
coalesce_windows(info, IORESOURCE_MEM);
coalesce_windows(info, IORESOURCE_IO);
@@ -336,8 +343,13 @@ get_current_resources(struct acpi_device *device, int busnum,
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
&info);
- add_resources(&info);
- return;
+ if (pci_use_crs) {
+ add_resources(&info);
+
+ return;
+ }
+
+ kfree(info.name);
name_alloc_fail:
kfree(info.res);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 249a5ae..7415aa9 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -400,7 +400,7 @@ int __init pci_xen_init(void)
int __init pci_xen_hvm_init(void)
{
- if (!xen_feature(XENFEAT_hvm_pirqs))
+ if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
return 0;
#ifdef CONFIG_ACPI
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 4cf9bd0..92660eda 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -26,6 +26,8 @@
* Skip non-WB memory and ignore empty memory ranges.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
@@ -47,7 +49,6 @@
#include <asm/x86_init.h>
#define EFI_DEBUG 1
-#define PFX "EFI: "
int efi_enabled;
EXPORT_SYMBOL(efi_enabled);
@@ -67,6 +68,9 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
+bool efi_64bit;
+static bool efi_native;
+
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
@@ -254,7 +258,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
status = efi.get_time(&eft, &cap);
if (status != EFI_SUCCESS) {
- printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ pr_err("Oops: efitime: can't read time!\n");
return -1;
}
@@ -268,7 +272,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
status = efi.set_time(&eft);
if (status != EFI_SUCCESS) {
- printk(KERN_ERR "Oops: efitime: can't write time!\n");
+ pr_err("Oops: efitime: can't write time!\n");
return -1;
}
return 0;
@@ -282,7 +286,7 @@ unsigned long efi_get_time(void)
status = efi.get_time(&eft, &cap);
if (status != EFI_SUCCESS)
- printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ pr_err("Oops: efitime: can't read time!\n");
return mktime(eft.year, eft.month, eft.day, eft.hour,
eft.minute, eft.second);
@@ -338,11 +342,16 @@ static void __init do_add_efi_memmap(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
-void __init efi_memblock_x86_reserve_range(void)
+int __init efi_memblock_x86_reserve_range(void)
{
unsigned long pmap;
#ifdef CONFIG_X86_32
+ /* Can't handle data above 4GB at this time */
+ if (boot_params.efi_info.efi_memmap_hi) {
+ pr_err("Memory map is above 4GB, disabling EFI.\n");
+ return -EINVAL;
+ }
pmap = boot_params.efi_info.efi_memmap;
#else
pmap = (boot_params.efi_info.efi_memmap |
@@ -354,6 +363,8 @@ void __init efi_memblock_x86_reserve_range(void)
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+
+ return 0;
}
#if EFI_DEBUG
@@ -367,7 +378,7 @@ static void __init print_efi_memmap(void)
p < memmap.map_end;
p += memmap.desc_size, i++) {
md = p;
- printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
+ pr_info("mem%02u: type=%u, attr=0x%llx, "
"range=[0x%016llx-0x%016llx) (%lluMB)\n",
i, md->type, md->attribute, md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
@@ -400,7 +411,7 @@ void __init efi_reserve_boot_services(void)
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
md->num_pages = 0;
- memblock_dbg(PFX "Could not reserve boot range "
+ memblock_dbg("Could not reserve boot range "
"[0x%010llx-0x%010llx]\n",
start, start+size-1);
} else
@@ -429,103 +440,172 @@ static void __init efi_free_boot_services(void)
}
}
-void __init efi_init(void)
+static int __init efi_systab_init(void *phys)
{
- efi_config_table_t *config_tables;
- efi_runtime_services_t *runtime;
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- int i = 0;
- void *tmp;
+ if (efi_64bit) {
+ efi_system_table_64_t *systab64;
+ u64 tmp = 0;
+
+ systab64 = early_ioremap((unsigned long)phys,
+ sizeof(*systab64));
+ if (systab64 == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
+ efi_systab.hdr = systab64->hdr;
+ efi_systab.fw_vendor = systab64->fw_vendor;
+ tmp |= systab64->fw_vendor;
+ efi_systab.fw_revision = systab64->fw_revision;
+ efi_systab.con_in_handle = systab64->con_in_handle;
+ tmp |= systab64->con_in_handle;
+ efi_systab.con_in = systab64->con_in;
+ tmp |= systab64->con_in;
+ efi_systab.con_out_handle = systab64->con_out_handle;
+ tmp |= systab64->con_out_handle;
+ efi_systab.con_out = systab64->con_out;
+ tmp |= systab64->con_out;
+ efi_systab.stderr_handle = systab64->stderr_handle;
+ tmp |= systab64->stderr_handle;
+ efi_systab.stderr = systab64->stderr;
+ tmp |= systab64->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
+ tmp |= systab64->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
+ tmp |= systab64->boottime;
+ efi_systab.nr_tables = systab64->nr_tables;
+ efi_systab.tables = systab64->tables;
+ tmp |= systab64->tables;
+
+ early_iounmap(systab64, sizeof(*systab64));
#ifdef CONFIG_X86_32
- efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
- efi_phys.systab = (efi_system_table_t *)
- (boot_params.efi_info.efi_systab |
- ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+ if (tmp >> 32) {
+ pr_err("EFI data located above 4GB, disabling EFI.\n");
+ return -EINVAL;
+ }
#endif
+ } else {
+ efi_system_table_32_t *systab32;
+
+ systab32 = early_ioremap((unsigned long)phys,
+ sizeof(*systab32));
+ if (systab32 == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
+
+ efi_systab.hdr = systab32->hdr;
+ efi_systab.fw_vendor = systab32->fw_vendor;
+ efi_systab.fw_revision = systab32->fw_revision;
+ efi_systab.con_in_handle = systab32->con_in_handle;
+ efi_systab.con_in = systab32->con_in;
+ efi_systab.con_out_handle = systab32->con_out_handle;
+ efi_systab.con_out = systab32->con_out;
+ efi_systab.stderr_handle = systab32->stderr_handle;
+ efi_systab.stderr = systab32->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
+ efi_systab.nr_tables = systab32->nr_tables;
+ efi_systab.tables = systab32->tables;
+
+ early_iounmap(systab32, sizeof(*systab32));
+ }
- efi.systab = early_ioremap((unsigned long)efi_phys.systab,
- sizeof(efi_system_table_t));
- if (efi.systab == NULL)
- printk(KERN_ERR "Couldn't map the EFI system table!\n");
- memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
- early_iounmap(efi.systab, sizeof(efi_system_table_t));
efi.systab = &efi_systab;
/*
* Verify the EFI Table
*/
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- printk(KERN_ERR "EFI system table signature incorrect!\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+ pr_err("System table signature incorrect!\n");
+ return -EINVAL;
+ }
if ((efi.systab->hdr.revision >> 16) == 0)
- printk(KERN_ERR "Warning: EFI system table version "
+ pr_err("Warning: System table version "
"%d.%02d, expected 1.00 or greater!\n",
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
- /*
- * Show what we know for posterity
- */
- c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
- if (c16) {
- for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
- vendor[i] = *c16++;
- vendor[i] = '\0';
- } else
- printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
- early_iounmap(tmp, 2);
+ return 0;
+}
- printk(KERN_INFO "EFI v%u.%.02u by %s\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
+static int __init efi_config_init(u64 tables, int nr_tables)
+{
+ void *config_tables, *tablep;
+ int i, sz;
+
+ if (efi_64bit)
+ sz = sizeof(efi_config_table_64_t);
+ else
+ sz = sizeof(efi_config_table_32_t);
/*
* Let's see what config tables the firmware passed to us.
*/
- config_tables = early_ioremap(
- efi.systab->tables,
- efi.systab->nr_tables * sizeof(efi_config_table_t));
- if (config_tables == NULL)
- printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+ config_tables = early_ioremap(tables, nr_tables * sz);
+ if (config_tables == NULL) {
+ pr_err("Could not map Configuration table!\n");
+ return -ENOMEM;
+ }
- printk(KERN_INFO);
+ tablep = config_tables;
+ pr_info("");
for (i = 0; i < efi.systab->nr_tables; i++) {
- if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
- efi.mps = config_tables[i].table;
- printk(" MPS=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- ACPI_20_TABLE_GUID)) {
- efi.acpi20 = config_tables[i].table;
- printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- ACPI_TABLE_GUID)) {
- efi.acpi = config_tables[i].table;
- printk(" ACPI=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- SMBIOS_TABLE_GUID)) {
- efi.smbios = config_tables[i].table;
- printk(" SMBIOS=0x%lx ", config_tables[i].table);
+ efi_guid_t guid;
+ unsigned long table;
+
+ if (efi_64bit) {
+ u64 table64;
+ guid = ((efi_config_table_64_t *)tablep)->guid;
+ table64 = ((efi_config_table_64_t *)tablep)->table;
+ table = table64;
+#ifdef CONFIG_X86_32
+ if (table64 >> 32) {
+ pr_cont("\n");
+ pr_err("Table located above 4GB, disabling EFI.\n");
+ early_iounmap(config_tables,
+ efi.systab->nr_tables * sz);
+ return -EINVAL;
+ }
+#endif
+ } else {
+ guid = ((efi_config_table_32_t *)tablep)->guid;
+ table = ((efi_config_table_32_t *)tablep)->table;
+ }
+ if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
+ efi.mps = table;
+ pr_cont(" MPS=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
+ efi.acpi20 = table;
+ pr_cont(" ACPI 2.0=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
+ efi.acpi = table;
+ pr_cont(" ACPI=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
+ efi.smbios = table;
+ pr_cont(" SMBIOS=0x%lx ", table);
#ifdef CONFIG_X86_UV
- } else if (!efi_guidcmp(config_tables[i].guid,
- UV_SYSTEM_TABLE_GUID)) {
- efi.uv_systab = config_tables[i].table;
- printk(" UVsystab=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
+ efi.uv_systab = table;
+ pr_cont(" UVsystab=0x%lx ", table);
#endif
- } else if (!efi_guidcmp(config_tables[i].guid,
- HCDP_TABLE_GUID)) {
- efi.hcdp = config_tables[i].table;
- printk(" HCDP=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- UGA_IO_PROTOCOL_GUID)) {
- efi.uga = config_tables[i].table;
- printk(" UGA=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
+ efi.hcdp = table;
+ pr_cont(" HCDP=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
+ efi.uga = table;
+ pr_cont(" UGA=0x%lx ", table);
}
+ tablep += sz;
}
- printk("\n");
- early_iounmap(config_tables,
- efi.systab->nr_tables * sizeof(efi_config_table_t));
+ pr_cont("\n");
+ early_iounmap(config_tables, efi.systab->nr_tables * sz);
+ return 0;
+}
+
+static int __init efi_runtime_init(void)
+{
+ efi_runtime_services_t *runtime;
/*
* Check out the runtime services table. We need to map
@@ -535,43 +615,116 @@ void __init efi_init(void)
*/
runtime = early_ioremap((unsigned long)efi.systab->runtime,
sizeof(efi_runtime_services_t));
- if (runtime != NULL) {
- /*
- * We will only need *early* access to the following
- * two EFI runtime services before set_virtual_address_map
- * is invoked.
- */
- efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- runtime->set_virtual_address_map;
- /*
- * Make efi_get_time can be called before entering
- * virtual mode.
- */
- efi.get_time = phys_efi_get_time;
- } else
- printk(KERN_ERR "Could not map the EFI runtime service "
- "table!\n");
+ if (!runtime) {
+ pr_err("Could not map the runtime service table!\n");
+ return -ENOMEM;
+ }
+ /*
+ * We will only need *early* access to the following
+ * two EFI runtime services before set_virtual_address_map
+ * is invoked.
+ */
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+ efi_phys.set_virtual_address_map =
+ (efi_set_virtual_address_map_t *)
+ runtime->set_virtual_address_map;
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
early_iounmap(runtime, sizeof(efi_runtime_services_t));
+ return 0;
+}
+
+static int __init efi_memmap_init(void)
+{
/* Map the EFI memory map */
memmap.map = early_ioremap((unsigned long)memmap.phys_map,
memmap.nr_map * memmap.desc_size);
- if (memmap.map == NULL)
- printk(KERN_ERR "Could not map the EFI memory map!\n");
+ if (memmap.map == NULL) {
+ pr_err("Could not map the memory map!\n");
+ return -ENOMEM;
+ }
memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
- if (memmap.desc_size != sizeof(efi_memory_desc_t))
- printk(KERN_WARNING
- "Kernel-defined memdesc doesn't match the one from EFI!\n");
-
if (add_efi_memmap)
do_add_efi_memmap();
+ return 0;
+}
+
+void __init efi_init(void)
+{
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i = 0;
+ void *tmp;
+
+#ifdef CONFIG_X86_32
+ if (boot_params.efi_info.efi_systab_hi ||
+ boot_params.efi_info.efi_memmap_hi) {
+ pr_info("Table located above 4GB, disabling EFI.\n");
+ efi_enabled = 0;
+ return;
+ }
+ efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
+ efi_native = !efi_64bit;
+#else
+ efi_phys.systab = (efi_system_table_t *)
+ (boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+ efi_native = efi_64bit;
+#endif
+
+ if (efi_systab_init(efi_phys.systab)) {
+ efi_enabled = 0;
+ return;
+ }
+
+ /*
+ * Show what we know for posterity
+ */
+ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
+ if (c16) {
+ for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ } else
+ pr_err("Could not map the firmware vendor!\n");
+ early_iounmap(tmp, 2);
+
+ pr_info("EFI v%u.%.02u by %s\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
+ efi_enabled = 0;
+ return;
+ }
+
+ /*
+ * Note: We currently don't support runtime services on an EFI
+ * that doesn't match the kernel 32/64-bit mode.
+ */
+
+ if (!efi_native)
+ pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
+ else if (efi_runtime_init()) {
+ efi_enabled = 0;
+ return;
+ }
+
+ if (efi_memmap_init()) {
+ efi_enabled = 0;
+ return;
+ }
#ifdef CONFIG_X86_32
- x86_platform.get_wallclock = efi_get_time;
- x86_platform.set_wallclock = efi_set_rtc_mmss;
+ if (efi_native) {
+ x86_platform.get_wallclock = efi_get_time;
+ x86_platform.set_wallclock = efi_set_rtc_mmss;
+ }
#endif
#if EFI_DEBUG
@@ -629,6 +782,14 @@ void __init efi_enter_virtual_mode(void)
efi.systab = NULL;
+ /*
+ * We don't do virtual mode, since we don't do runtime services, on
+ * non-native EFI
+ */
+
+ if (!efi_native)
+ goto out;
+
/* Merge contiguous regions of the same type and attribute */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
u64 prev_size;
@@ -677,7 +838,7 @@ void __init efi_enter_virtual_mode(void)
md->virt_addr = (u64) (unsigned long) va;
if (!va) {
- printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
+ pr_err("ioremap of 0x%llX failed!\n",
(unsigned long long)md->phys_addr);
continue;
}
@@ -711,8 +872,8 @@ void __init efi_enter_virtual_mode(void)
(efi_memory_desc_t *)__pa(new_memmap));
if (status != EFI_SUCCESS) {
- printk(KERN_ALERT "Unable to switch EFI into virtual mode "
- "(status=%lx)!\n", status);
+ pr_alert("Unable to switch EFI into virtual mode "
+ "(status=%lx)!\n", status);
panic("EFI call to SetVirtualAddressMap() failed!");
}
@@ -744,6 +905,8 @@ void __init efi_enter_virtual_mode(void)
efi.query_capsule_caps = virt_efi_query_capsule_caps;
if (__supported_pte_mask & _PAGE_NX)
runtime_code_page_mkexec();
+
+out:
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
kfree(new_memmap);
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 07c9cd0..246b788 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1 +1,2 @@
obj-$(CONFIG_ALIX) += alix.o
+obj-$(CONFIG_NET5501) += net5501.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index ca19736..90e23e7 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -6,6 +6,7 @@
*
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ * and Philip Prindeville <philipp@redfish-solutions.com>
*
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
@@ -24,14 +25,47 @@
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/dmi.h>
#include <asm/geode.h>
-static int force = 0;
+#define BIOS_SIGNATURE_TINYBIOS 0xf0000
+#define BIOS_SIGNATURE_COREBOOT 0x500
+#define BIOS_REGION_SIZE 0x10000
+
+static bool force = 0;
module_param(force, bool, 0444);
/* FIXME: Award bios is not automatically detected as Alix platform */
MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
+static struct gpio_keys_button alix_gpio_buttons[] = {
+ {
+ .code = KEY_RESTART,
+ .gpio = 24,
+ .active_low = 1,
+ .desc = "Reset button",
+ .type = EV_KEY,
+ .wakeup = 0,
+ .debounce_interval = 100,
+ .can_disable = 0,
+ }
+};
+static struct gpio_keys_platform_data alix_buttons_data = {
+ .buttons = alix_gpio_buttons,
+ .nbuttons = ARRAY_SIZE(alix_gpio_buttons),
+ .poll_interval = 20,
+};
+
+static struct platform_device alix_buttons_dev = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ .dev = {
+ .platform_data = &alix_buttons_data,
+ }
+};
+
static struct gpio_led alix_leds[] = {
{
.name = "alix:1",
@@ -64,17 +98,22 @@ static struct platform_device alix_leds_dev = {
.dev.platform_data = &alix_leds_data,
};
+static struct __initdata platform_device *alix_devs[] = {
+ &alix_buttons_dev,
+ &alix_leds_dev,
+};
+
static void __init register_alix(void)
{
/* Setup LED control through leds-gpio driver */
- platform_device_register(&alix_leds_dev);
+ platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
}
-static int __init alix_present(unsigned long bios_phys,
+static bool __init alix_present(unsigned long bios_phys,
const char *alix_sig,
size_t alix_sig_len)
{
- const size_t bios_len = 0x00010000;
+ const size_t bios_len = BIOS_REGION_SIZE;
const char *bios_virt;
const char *scan_end;
const char *p;
@@ -84,7 +123,7 @@ static int __init alix_present(unsigned long bios_phys,
printk(KERN_NOTICE "%s: forced to skip BIOS test, "
"assume system is ALIX.2/ALIX.3\n",
KBUILD_MODNAME);
- return 1;
+ return true;
}
bios_virt = phys_to_virt(bios_phys);
@@ -109,15 +148,33 @@ static int __init alix_present(unsigned long bios_phys,
*a = '\0';
tail = p + alix_sig_len;
- if ((tail[0] == '2' || tail[0] == '3')) {
+ if ((tail[0] == '2' || tail[0] == '3' || tail[0] == '6')) {
printk(KERN_INFO
"%s: system is recognized as \"%s\"\n",
KBUILD_MODNAME, name);
- return 1;
+ return true;
}
}
- return 0;
+ return false;
+}
+
+static bool __init alix_present_dmi(void)
+{
+ const char *vendor, *product;
+
+ vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+ if (!vendor || strcmp(vendor, "PC Engines"))
+ return false;
+
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product || (strcmp(product, "ALIX.2D") && strcmp(product, "ALIX.6")))
+ return false;
+
+ printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
+ KBUILD_MODNAME, vendor, product);
+
+ return true;
}
static int __init alix_init(void)
@@ -128,8 +185,9 @@ static int __init alix_init(void)
if (!is_geode())
return 0;
- if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) ||
- alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1))
+ if (alix_present(BIOS_SIGNATURE_TINYBIOS, tinybios_sig, sizeof(tinybios_sig) - 1) ||
+ alix_present(BIOS_SIGNATURE_COREBOOT, coreboot_sig, sizeof(coreboot_sig) - 1) ||
+ alix_present_dmi())
register_alix();
return 0;
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
new file mode 100644
index 0000000..66d377e
--- /dev/null
+++ b/arch/x86/platform/geode/net5501.c
@@ -0,0 +1,154 @@
+/*
+ * System Specific setup for Soekris net5501
+ * At the moment this means setup of GPIO control of LEDs and buttons
+ * on net5501 boards.
+ *
+ *
+ * Copyright (C) 2008-2009 Tower Technologies
+ * Written by Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
+ * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ * and Philip Prindeville <philipp@redfish-solutions.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+
+#include <asm/geode.h>
+
+#define BIOS_REGION_BASE 0xffff0000
+#define BIOS_REGION_SIZE 0x00010000
+
+static struct gpio_keys_button net5501_gpio_buttons[] = {
+ {
+ .code = KEY_RESTART,
+ .gpio = 24,
+ .active_low = 1,
+ .desc = "Reset button",
+ .type = EV_KEY,
+ .wakeup = 0,
+ .debounce_interval = 100,
+ .can_disable = 0,
+ }
+};
+static struct gpio_keys_platform_data net5501_buttons_data = {
+ .buttons = net5501_gpio_buttons,
+ .nbuttons = ARRAY_SIZE(net5501_gpio_buttons),
+ .poll_interval = 20,
+};
+
+static struct platform_device net5501_buttons_dev = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ .dev = {
+ .platform_data = &net5501_buttons_data,
+ }
+};
+
+static struct gpio_led net5501_leds[] = {
+ {
+ .name = "net5501:1",
+ .gpio = 6,
+ .default_trigger = "default-on",
+ .active_low = 1,
+ },
+};
+
+static struct gpio_led_platform_data net5501_leds_data = {
+ .num_leds = ARRAY_SIZE(net5501_leds),
+ .leds = net5501_leds,
+};
+
+static struct platform_device net5501_leds_dev = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev.platform_data = &net5501_leds_data,
+};
+
+static struct __initdata platform_device *net5501_devs[] = {
+ &net5501_buttons_dev,
+ &net5501_leds_dev,
+};
+
+static void __init register_net5501(void)
+{
+ /* Setup LED control through leds-gpio driver */
+ platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
+}
+
+struct net5501_board {
+ u16 offset;
+ u16 len;
+ char *sig;
+};
+
+static struct net5501_board __initdata boards[] = {
+ { 0xb7b, 7, "net5501" }, /* net5501 v1.33/1.33c */
+ { 0xb1f, 7, "net5501" }, /* net5501 v1.32i */
+};
+
+static bool __init net5501_present(void)
+{
+ int i;
+ unsigned char *rombase, *bios;
+ bool found = false;
+
+ rombase = ioremap(BIOS_REGION_BASE, BIOS_REGION_SIZE - 1);
+ if (!rombase) {
+ printk(KERN_ERR "%s: failed to get rombase\n", KBUILD_MODNAME);
+ return found;
+ }
+
+ bios = rombase + 0x20; /* null terminated */
+
+ if (memcmp(bios, "comBIOS", 7))
+ goto unmap;
+
+ for (i = 0; i < ARRAY_SIZE(boards); i++) {
+ unsigned char *model = rombase + boards[i].offset;
+
+ if (!memcmp(model, boards[i].sig, boards[i].len)) {
+ printk(KERN_INFO "%s: system is recognized as \"%s\"\n",
+ KBUILD_MODNAME, model);
+
+ found = true;
+ break;
+ }
+ }
+
+unmap:
+ iounmap(rombase);
+ return found;
+}
+
+static int __init net5501_init(void)
+{
+ if (!is_geode())
+ return 0;
+
+ if (!net5501_present())
+ return 0;
+
+ register_net5501();
+
+ return 0;
+}
+
+module_init(net5501_init);
+
+MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
+MODULE_DESCRIPTION("Soekris net5501 System Setup");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 1ba7f5e..5917eb5 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
-static int force;
+static bool force;
module_param(force, bool, 0);
MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index 1ea3877..af1da7e 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,4 +1,3 @@
-obj-$(CONFIG_X86_MRST) += mrst.o
-obj-$(CONFIG_X86_MRST) += vrtc.o
-obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
-obj-$(CONFIG_X86_MRST) += pmu.o
+obj-$(CONFIG_X86_INTEL_MID) += mrst.o
+obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ad4ec1c..e0a3723 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -28,6 +28,8 @@
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/mfd/intel_msic.h>
+#include <linux/gpio.h>
+#include <linux/i2c/tc35876x.h>
#include <asm/setup.h>
#include <asm/mpspec_def.h>
@@ -78,16 +80,11 @@ int sfi_mrtc_num;
static void mrst_power_off(void)
{
- if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
- intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
}
static void mrst_reboot(void)
{
- if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
- intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
- else
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+ intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
}
/* parse all the mtimer info to a static mtimer array */
@@ -200,34 +197,28 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
static unsigned long __init mrst_calibrate_tsc(void)
{
- unsigned long flags, fast_calibrate;
- if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
- u32 lo, hi, ratio, fsb;
-
- rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
- pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
- ratio = (hi >> 8) & 0x1f;
- pr_debug("ratio is %d\n", ratio);
- if (!ratio) {
- pr_err("read a zero ratio, should be incorrect!\n");
- pr_err("force tsc ratio to 16 ...\n");
- ratio = 16;
- }
- rdmsr(MSR_FSB_FREQ, lo, hi);
- if ((lo & 0x7) == 0x7)
- fsb = PENWELL_FSB_FREQ_83SKU;
- else
- fsb = PENWELL_FSB_FREQ_100SKU;
- fast_calibrate = ratio * fsb;
- pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
- lapic_timer_frequency = fsb * 1000 / HZ;
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- } else {
- local_irq_save(flags);
- fast_calibrate = apbt_quick_calibrate();
- local_irq_restore(flags);
+ unsigned long fast_calibrate;
+ u32 lo, hi, ratio, fsb;
+
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+ ratio = (hi >> 8) & 0x1f;
+ pr_debug("ratio is %d\n", ratio);
+ if (!ratio) {
+ pr_err("read a zero ratio, should be incorrect!\n");
+ pr_err("force tsc ratio to 16 ...\n");
+ ratio = 16;
}
+ rdmsr(MSR_FSB_FREQ, lo, hi);
+ if ((lo & 0x7) == 0x7)
+ fsb = PENWELL_FSB_FREQ_83SKU;
+ else
+ fsb = PENWELL_FSB_FREQ_100SKU;
+ fast_calibrate = ratio * fsb;
+ pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+ lapic_timer_frequency = fsb * 1000 / HZ;
+ /* mark tsc clocksource as reliable */
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
if (fast_calibrate)
return fast_calibrate;
@@ -261,16 +252,11 @@ static void __cpuinit mrst_arch_setup(void)
{
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
- __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
else {
- pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
+ pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
boot_cpu_data.x86, boot_cpu_data.x86_model);
- __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
+ __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
}
- pr_debug("Moorestown CPU %s identified\n",
- (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
- "Lincroft" : "Penwell");
}
/* MID systems don't have i8042 controller */
@@ -686,6 +672,24 @@ static void *msic_ocd_platform_data(void *info)
return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
}
+static void *msic_thermal_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
+}
+
+/* tc35876x DSI-LVDS bridge chip and panel platform data */
+static void *tc35876x_platform_data(void *data)
+{
+ static struct tc35876x_platform_data pdata;
+
+ /* gpio pins set to -1 will not be used by the driver */
+ pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
+ pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
+ pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+
+ return &pdata;
+}
+
static const struct devs_id __initconst device_ids[] = {
{"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
@@ -698,6 +702,7 @@ static const struct devs_id __initconst device_ids[] = {
{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
{"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
+ {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
/* MSIC subdevices */
{"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
@@ -705,6 +710,7 @@ static const struct devs_id __initconst device_ids[] = {
{"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
{"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
{"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
+ {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
{},
};
@@ -848,8 +854,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
if (mrst_has_msic())
return;
- /* ID as IRQ is a hack that will go away */
- pdev = platform_device_alloc(entry->name, entry->irq);
+ pdev = platform_device_alloc(entry->name, 0);
if (pdev == NULL) {
pr_err("out of memory for SFI platform device '%s'.\n",
entry->name);
@@ -1030,6 +1035,7 @@ static int __init pb_keys_init(void)
num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
for (i = 0; i < num; i++) {
gb[i].gpio = get_gpio_by_name(gb[i].desc);
+ pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio);
if (gb[i].gpio == -1)
continue;
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
deleted file mode 100644
index c0ac06d..0000000
--- a/arch/x86/platform/mrst/pmu.c
+++ /dev/null
@@ -1,817 +0,0 @@
-/*
- * mrst/pmu.c - driver for MRST Power Management Unit
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/cpuidle.h>
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-#include <linux/sfi.h>
-#include <asm/intel_scu_ipc.h>
-#include "pmu.h"
-
-#define IPCMSG_FW_REVISION 0xF4
-
-struct mrst_device {
- u16 pci_dev_num; /* DEBUG only */
- u16 lss;
- u16 latest_request;
- unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
-};
-
-/*
- * comlete list of MRST PCI devices
- */
-static struct mrst_device mrst_devs[] = {
-/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
-/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
-/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
-/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
-/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
-/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
-/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
-/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
-/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
-/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
-/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
-/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
-/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
-/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
-/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
-/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
-/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
-/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
-/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
-/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
-/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
-/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
-
-/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
-
-/* 23 */ { 0x4102, 0 }, /* Lincroft */
-/* 24 */ { 0x4110, 0 }, /* Lincroft */
-};
-
-/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
-static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
-static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
- 0x0804, 0x0805, 0x080f, 0};
-
-/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
-static spinlock_t mrst_pmu_power_state_lock;
-
-static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
-static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
-
-static int graphics_is_off;
-static int lss_s0i3_enabled;
-static bool mrst_pmu_s0i3_enable;
-
-/* debug counters */
-static u32 pmu_wait_ready_calls;
-static u32 pmu_wait_ready_udelays;
-static u32 pmu_wait_ready_udelays_max;
-static u32 pmu_wait_done_calls;
-static u32 pmu_wait_done_udelays;
-static u32 pmu_wait_done_udelays_max;
-static u32 pmu_set_power_state_entry;
-static u32 pmu_set_power_state_send_cmd;
-
-static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
-{
- int index = 0;
-
- if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
- index = pci_dev_num - 0x800;
- else if (pci_dev_num == 0x084F)
- index = 22;
- else if (pci_dev_num == 0x4102)
- index = 23;
- else if (pci_dev_num == 0x4110)
- index = 24;
-
- if (pci_dev_num != mrst_devs[index].pci_dev_num) {
- WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
- return 0;
- }
-
- return &mrst_devs[index];
-}
-
-/**
- * mrst_pmu_validate_cstates
- * @dev: cpuidle_device
- *
- * Certain states are not appropriate for governor to pick in some cases.
- * This function will be called as cpuidle_device's prepare callback and
- * thus tells governor to ignore such states when selecting the next state
- * to enter.
- */
-
-#define IDLE_STATE4_IS_C6 4
-#define IDLE_STATE5_IS_S0I3 5
-
-int mrst_pmu_invalid_cstates(void)
-{
- int cpu = smp_processor_id();
-
- /*
- * Demote to C4 if the PMU is busy.
- * Since LSS changes leave the busy bit clear...
- * busy means either the PMU is waiting for an ACK-C6 that
- * isn't coming due to an MWAIT that returned immediately;
- * or we returned from S0i3 successfully, and the PMU
- * is not done sending us interrupts.
- */
- if (pmu_read_busy_status())
- return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
-
- /*
- * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
- * or if device LSS is insufficient, or the GPU is active,
- * or if it has been explicitly disabled.
- */
- if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
- !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
- return 1 << IDLE_STATE5_IS_S0I3;
- else
- return 0;
-}
-
-/*
- * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
- * DEBUG only.
- */
-static void pmu_update_wake_counters(void)
-{
- int lss;
- u32 wake_status;
-
- wake_status = pmu_read_wks();
-
- for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
- if (wake_status & (1 << lss))
- wake_counters[lss]++;
- }
-}
-
-int mrst_pmu_s0i3_entry(void)
-{
- int status;
-
- /* Clear any possible error conditions */
- pmu_write_ics(0x300);
-
- /* set wake control to current D-states */
- pmu_write_wssc(S0I3_SSS_TARGET);
-
- status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
- pmu_update_wake_counters();
- return status;
-}
-
-/* poll for maximum of 5ms for busy bit to clear */
-static int pmu_wait_ready(void)
-{
- int udelays;
-
- pmu_wait_ready_calls++;
-
- for (udelays = 0; udelays < 500; ++udelays) {
- if (udelays > pmu_wait_ready_udelays_max)
- pmu_wait_ready_udelays_max = udelays;
-
- if (pmu_read_busy_status() == 0)
- return 0;
-
- udelay(10);
- pmu_wait_ready_udelays++;
- }
-
- /*
- * if this fires, observe
- * /sys/kernel/debug/mrst_pmu_wait_ready_calls
- * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
- */
- WARN_ONCE(1, "SCU not ready for 5ms");
- return -EBUSY;
-}
-/* poll for maximum of 50ms us for busy bit to clear */
-static int pmu_wait_done(void)
-{
- int udelays;
-
- pmu_wait_done_calls++;
-
- for (udelays = 0; udelays < 500; ++udelays) {
- if (udelays > pmu_wait_done_udelays_max)
- pmu_wait_done_udelays_max = udelays;
-
- if (pmu_read_busy_status() == 0)
- return 0;
-
- udelay(100);
- pmu_wait_done_udelays++;
- }
-
- /*
- * if this fires, observe
- * /sys/kernel/debug/mrst_pmu_wait_done_calls
- * /sys/kernel/debug/mrst_pmu_wait_done_udelays
- */
- WARN_ONCE(1, "SCU not done for 50ms");
- return -EBUSY;
-}
-
-u32 mrst_pmu_msi_is_disabled(void)
-{
- return pmu_msi_is_disabled();
-}
-
-void mrst_pmu_enable_msi(void)
-{
- pmu_msi_enable();
-}
-
-/**
- * pmu_irq - pmu driver interrupt handler
- * Context: interrupt context
- */
-static irqreturn_t pmu_irq(int irq, void *dummy)
-{
- union pmu_pm_ics pmu_ics;
-
- pmu_ics.value = pmu_read_ics();
-
- if (!pmu_ics.bits.pending)
- return IRQ_NONE;
-
- switch (pmu_ics.bits.cause) {
- case INT_SPURIOUS:
- case INT_CMD_DONE:
- case INT_CMD_ERR:
- case INT_WAKE_RX:
- case INT_SS_ERROR:
- case INT_S0IX_MISS:
- case INT_NO_ACKC6:
- pmu_irq_stats[pmu_ics.bits.cause]++;
- break;
- default:
- pmu_irq_stats[INT_INVALID]++;
- }
-
- pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
-
- return IRQ_HANDLED;
-}
-
-/*
- * Translate PCI power management to MRST LSS D-states
- */
-static int pci_2_mrst_state(int lss, pci_power_t pci_state)
-{
- switch (pci_state) {
- case PCI_D0:
- if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
- return D0i1;
- else
- return D0;
- case PCI_D1:
- return D0i1;
- case PCI_D2:
- return D0i2;
- case PCI_D3hot:
- case PCI_D3cold:
- return D0i3;
- default:
- WARN(1, "pci_state %d\n", pci_state);
- return 0;
- }
-}
-
-static int pmu_issue_command(u32 pm_ssc)
-{
- union pmu_pm_set_cfg_cmd_t command;
-
- if (pmu_read_busy_status()) {
- pr_debug("pmu is busy, Operation not permitted\n");
- return -1;
- }
-
- /*
- * enable interrupts in PMU so that interrupts are
- * propagated when ioc bit for a particular set
- * command is set
- */
-
- pmu_irq_enable();
-
- /* Configure the sub systems for pmu2 */
-
- pmu_write_ssc(pm_ssc);
-
- /*
- * Send the set config command for pmu its configured
- * for mode CM_IMMEDIATE & hence with No Trigger
- */
-
- command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
- command.pmu2_params.d_param.cfg_delay = 0;
- command.pmu2_params.d_param.rsvd = 0;
-
- /* construct the command to send SET_CFG to particular PMU */
- command.pmu2_params.d_param.cmd = SET_CFG_CMD;
- command.pmu2_params.d_param.ioc = 0;
- command.pmu2_params.d_param.mode_id = 0;
- command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
-
- /* write the value of PM_CMD into particular PMU */
- pr_debug("pmu command being written %x\n",
- command.pmu_pm_set_cfg_cmd_value);
-
- pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
-
- return 0;
-}
-
-static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
-{
- u16 existing_request;
- int i;
-
- for (i = 0; ids[i]; ++i) {
- struct mrst_device *mrst_dev;
-
- mrst_dev = pci_id_2_mrst_dev(ids[i]);
- if (unlikely(!mrst_dev))
- continue;
-
- existing_request = mrst_dev->latest_request;
- if (existing_request < pci_state)
- pci_state = existing_request;
- }
- return pci_state;
-}
-
-/**
- * pmu_pci_set_power_state - Callback function is used by all the PCI devices
- * for a platform specific device power on/shutdown.
- */
-
-int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
-{
- u32 old_sss, new_sss;
- int status = 0;
- struct mrst_device *mrst_dev;
-
- pmu_set_power_state_entry++;
-
- BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
- BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
- if (unlikely(!mrst_dev))
- return -ENODEV;
-
- mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
-
- /* PMU driver calls self as part of PCI initialization, ignore */
- if (pdev->device == PCI_DEV_ID_MRST_PMU)
- return 0;
-
- BUG_ON(!pmu_reg); /* SW bug if called before initialized */
-
- spin_lock(&mrst_pmu_power_state_lock);
-
- if (pdev->d3_delay) {
- dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
- pdev->d3_delay);
- pdev->d3_delay = 0;
- }
- /*
- * If Lincroft graphics, simply remember state
- */
- if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
- && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
- if (pci_state == PCI_D0)
- graphics_is_off = 0;
- else
- graphics_is_off = 1;
- goto ret;
- }
-
- if (!mrst_dev->lss)
- goto ret; /* device with no LSS */
-
- if (mrst_dev->latest_request == pci_state)
- goto ret; /* no change */
-
- mrst_dev->latest_request = pci_state; /* record latest request */
-
- /*
- * LSS9 and LSS10 contain multiple PCI devices.
- * Use the lowest numbered (highest power) state in the LSS
- */
- if (mrst_dev->lss == 9)
- pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
- else if (mrst_dev->lss == 10)
- pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
-
- status = pmu_wait_ready();
- if (status)
- goto ret;
-
- old_sss = pmu_read_sss();
- new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
- new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
- mrst_dev->lss);
-
- if (new_sss == old_sss)
- goto ret; /* nothing to do */
-
- pmu_set_power_state_send_cmd++;
-
- status = pmu_issue_command(new_sss);
-
- if (unlikely(status != 0)) {
- dev_err(&pdev->dev, "Failed to Issue a PM command\n");
- goto ret;
- }
-
- if (pmu_wait_done())
- goto ret;
-
- lss_s0i3_enabled =
- ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
-ret:
- spin_unlock(&mrst_pmu_power_state_lock);
- return status;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
-
-static inline const char *d0ix_name(int state)
-{
- return d0ix_names[(int) state];
-}
-
-static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
-{
- struct pci_dev *pdev = NULL;
- u32 cur_pmsss;
- int lss;
-
- seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
-
- cur_pmsss = pmu_read_sss();
-
- seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
-
- seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
- seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
-
- if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
- seq_printf(s, "cpu0 is only cpu online\n");
- else
- seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
-
- seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
-
-
- for_each_pci_dev(pdev) {
- int pos;
- u16 pmcsr;
- struct mrst_device *mrst_dev;
- int i;
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
-
- seq_printf(s, "%s %04x/%04X %-16.16s ",
- dev_name(&pdev->dev),
- pdev->vendor, pdev->device,
- dev_driver_string(&pdev->dev));
-
- if (unlikely (!mrst_dev)) {
- seq_printf(s, " UNKNOWN\n");
- continue;
- }
-
- if (mrst_dev->lss)
- seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
- d0ix_name(((cur_pmsss >>
- (mrst_dev->lss * 2)) & 0x3)));
- else
- seq_printf(s, " ");
-
- /* PCI PM config space setting */
- pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
- if (pos != 0) {
- pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
- seq_printf(s, "PCI-%-4s",
- pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
- } else {
- seq_printf(s, " ");
- }
-
- seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
- for (i = 0; i <= PCI_D3cold; ++i)
- seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
-
- if (mrst_dev->lss) {
- unsigned int lssmask;
-
- lssmask = SSMSK(D0i3, mrst_dev->lss);
-
- if ((lssmask & S0I3_SSS_TARGET) &&
- ((lssmask & cur_pmsss) !=
- (lssmask & S0I3_SSS_TARGET)))
- seq_printf(s , "[BLOCKS s0i3]");
- }
-
- seq_printf(s, "\n");
- }
- seq_printf(s, "Wake Counters:\n");
- for (lss = 0; lss < MRST_NUM_LSS; ++lss)
- seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
-
- seq_printf(s, "Interrupt Counters:\n");
- seq_printf(s,
- "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
- "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
- "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
- "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
- pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
- pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
- pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
- pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
-
- seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
- pmu_wait_ready_calls);
- seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
- pmu_wait_ready_udelays);
- seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
- pmu_wait_ready_udelays_max);
- seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
- pmu_wait_done_calls);
- seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
- pmu_wait_done_udelays);
- seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
- pmu_wait_done_udelays_max);
- seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
- pmu_set_power_state_entry);
- seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
- pmu_set_power_state_send_cmd);
- seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
-
- return 0;
-}
-
-static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
-{
- return single_open(file, debug_mrst_pmu_show, NULL);
-}
-
-static const struct file_operations devices_state_operations = {
- .open = debug_mrst_pmu_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* DEBUG_FS */
-
-/*
- * Validate SCU PCI shim PCI vendor capability byte
- * against LSS hard-coded in mrst_devs[] above.
- * DEBUG only.
- */
-static void pmu_scu_firmware_debug(void)
-{
- struct pci_dev *pdev = NULL;
-
- for_each_pci_dev(pdev) {
- struct mrst_device *mrst_dev;
- u8 pci_config_lss;
- int pos;
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
- if (unlikely(!mrst_dev)) {
- printk(KERN_ERR FW_BUG "pmu: Unknown "
- "PCI device 0x%04X\n", pdev->device);
- continue;
- }
-
- if (mrst_dev->lss == 0)
- continue; /* no LSS in our table */
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
- if (!pos != 0) {
- printk(KERN_ERR FW_BUG "pmu: 0x%04X "
- "missing PCI Vendor Capability\n",
- pdev->device);
- continue;
- }
- pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
- if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
- printk(KERN_ERR FW_BUG "pmu: 0x%04X "
- "invalid PCI Vendor Capability 0x%x "
- " expected LSS 0x%X\n",
- pdev->device, pci_config_lss, mrst_dev->lss);
- continue;
- }
- pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
-
- if (mrst_dev->lss == pci_config_lss)
- continue;
-
- printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
- pdev->device, pci_config_lss, mrst_dev->lss);
- }
-}
-
-/**
- * pmu_probe
- */
-static int __devinit pmu_probe(struct pci_dev *pdev,
- const struct pci_device_id *pci_id)
-{
- int ret;
- struct mrst_pmu_reg *pmu;
-
- /* Init the device */
- ret = pci_enable_device(pdev);
- if (ret) {
- dev_err(&pdev->dev, "Unable to Enable PCI device\n");
- return ret;
- }
-
- ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
- if (ret < 0) {
- dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
- goto out_err1;
- }
-
- /* Map the memory of PMU reg base */
- pmu = pci_iomap(pdev, 0, 0);
- if (!pmu) {
- dev_err(&pdev->dev, "Unable to map the PMU address space\n");
- ret = -ENOMEM;
- goto out_err2;
- }
-
-#ifdef CONFIG_DEBUG_FS
- /* /sys/kernel/debug/mrst_pmu */
- (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
- NULL, NULL, &devices_state_operations);
-#endif
- pmu_reg = pmu; /* success */
-
- if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
- dev_err(&pdev->dev, "Registering isr has failed\n");
- ret = -1;
- goto out_err3;
- }
-
- pmu_scu_firmware_debug();
-
- pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
-
- pmu_wait_ready();
-
- pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
- pmu_write_cmd(0x201);
-
- spin_lock_init(&mrst_pmu_power_state_lock);
-
- /* Enable the hardware interrupt */
- pmu_irq_enable();
- return 0;
-
-out_err3:
- free_irq(pdev->irq, NULL);
- pci_iounmap(pdev, pmu_reg);
- pmu_reg = NULL;
-out_err2:
- pci_release_region(pdev, 0);
-out_err1:
- pci_disable_device(pdev);
- return ret;
-}
-
-static void __devexit pmu_remove(struct pci_dev *pdev)
-{
- dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
-
- /* Freeing up the irq */
- free_irq(pdev->irq, NULL);
-
- pci_iounmap(pdev, pmu_reg);
- pmu_reg = NULL;
-
- /* disable the current PCI device */
- pci_release_region(pdev, 0);
- pci_disable_device(pdev);
-}
-
-static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
- { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
- { }
-};
-
-MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
-
-static struct pci_driver driver = {
- .name = MRST_PMU_DRV_NAME,
- .id_table = pmu_pci_ids,
- .probe = pmu_probe,
- .remove = __devexit_p(pmu_remove),
-};
-
-/**
- * pmu_pci_register - register the PMU driver as PCI device
- */
-static int __init pmu_pci_register(void)
-{
- return pci_register_driver(&driver);
-}
-
-/* Register and probe via fs_initcall() to preceed device_initcall() */
-fs_initcall(pmu_pci_register);
-
-static void __exit mid_pci_cleanup(void)
-{
- pci_unregister_driver(&driver);
-}
-
-static int ia_major;
-static int ia_minor;
-
-static int pmu_sfi_parse_oem(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
-
- sb = (struct sfi_table_simple *)table;
- ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
- ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
- printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
- ia_major, ia_minor);
-
- return 0;
-}
-
-static int __init scu_fw_check(void)
-{
- int ret;
- u32 fw_version;
-
- if (!pmu_reg)
- return 0; /* this driver didn't probe-out */
-
- sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
-
- if (ia_major < 0x6005 || ia_minor < 0x1525) {
- WARN(1, "mrst_pmu: IA FW version too old\n");
- return -1;
- }
-
- ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
- &fw_version, 1);
-
- if (ret) {
- WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
- } else {
- int scu_major = (fw_version >> 8) & 0xFF;
- int scu_minor = (fw_version >> 0) & 0xFF;
-
- printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
-
- if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
- printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
- mrst_pmu_s0i3_enable = true;
- } else {
- WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
- scu_major, scu_minor);
- }
- }
- return 0;
-}
-late_initcall(scu_fw_check);
-module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
deleted file mode 100644
index bfbfe64..0000000
--- a/arch/x86/platform/mrst/pmu.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _MRST_PMU_H_
-#define _MRST_PMU_H_
-
-#define PCI_DEV_ID_MRST_PMU 0x0810
-#define MRST_PMU_DRV_NAME "mrst_pmu"
-#define PCI_SUB_CLASS_MASK 0xFF00
-
-#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
-#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
-
-#define SUB_SYS_ALL_D0I1 0x01155555
-#define S0I3_WAKE_SOURCES 0x00001FFF
-
-#define PM_S0I3_COMMAND \
- ((0 << 31) | /* Reserved */ \
- (0 << 30) | /* Core must be idle */ \
- (0xc2 << 22) | /* ACK C6 trigger */ \
- (3 << 19) | /* Trigger on DMI message */ \
- (3 << 16) | /* Enter S0i3 */ \
- (0 << 13) | /* Numeric mode ID (sw) */ \
- (3 << 9) | /* Trigger mode */ \
- (0 << 8) | /* Do not interrupt */ \
- (1 << 0)) /* Set configuration */
-
-#define LSS_DMI 0
-#define LSS_SD_HC0 1
-#define LSS_SD_HC1 2
-#define LSS_NAND 3
-#define LSS_IMAGING 4
-#define LSS_SECURITY 5
-#define LSS_DISPLAY 6
-#define LSS_USB_HC 7
-#define LSS_USB_OTG 8
-#define LSS_AUDIO 9
-#define LSS_AUDIO_LPE 9
-#define LSS_AUDIO_SSP 9
-#define LSS_I2C0 10
-#define LSS_I2C1 10
-#define LSS_I2C2 10
-#define LSS_KBD 10
-#define LSS_SPI0 10
-#define LSS_SPI1 10
-#define LSS_SPI2 10
-#define LSS_GPIO 10
-#define LSS_SRAM 11 /* used by SCU, do not touch */
-#define LSS_SD_HC2 12
-/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
-#define MRST_NUM_LSS 13
-
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
-#define D0 0
-#define D0i1 1
-#define D0i2 2
-#define D0i3 3
-
-#define S0I3_SSS_TARGET ( \
- SSMSK(D0i1, LSS_DMI) | \
- SSMSK(D0i3, LSS_SD_HC0) | \
- SSMSK(D0i3, LSS_SD_HC1) | \
- SSMSK(D0i3, LSS_NAND) | \
- SSMSK(D0i3, LSS_SD_HC2) | \
- SSMSK(D0i3, LSS_IMAGING) | \
- SSMSK(D0i3, LSS_SECURITY) | \
- SSMSK(D0i3, LSS_DISPLAY) | \
- SSMSK(D0i3, LSS_USB_HC) | \
- SSMSK(D0i3, LSS_USB_OTG) | \
- SSMSK(D0i3, LSS_AUDIO) | \
- SSMSK(D0i1, LSS_I2C0))
-
-/*
- * D0i1 on Langwell is Autonomous Clock Gating (ACG).
- * Enable ACG on every LSS except camera and audio
- */
-#define D0I1_ACG_SSS_TARGET \
- (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
-
-enum cm_mode {
- CM_NOP, /* ignore the config mode value */
- CM_IMMEDIATE,
- CM_DELAY,
- CM_TRIGGER,
- CM_INVALID
-};
-
-enum sys_state {
- SYS_STATE_S0I0,
- SYS_STATE_S0I1,
- SYS_STATE_S0I2,
- SYS_STATE_S0I3,
- SYS_STATE_S3,
- SYS_STATE_S5
-};
-
-#define SET_CFG_CMD 1
-
-enum int_status {
- INT_SPURIOUS = 0,
- INT_CMD_DONE = 1,
- INT_CMD_ERR = 2,
- INT_WAKE_RX = 3,
- INT_SS_ERROR = 4,
- INT_S0IX_MISS = 5,
- INT_NO_ACKC6 = 6,
- INT_INVALID = 7,
-};
-
-/* PMU register interface */
-static struct mrst_pmu_reg {
- u32 pm_sts; /* 0x00 */
- u32 pm_cmd; /* 0x04 */
- u32 pm_ics; /* 0x08 */
- u32 _resv1; /* 0x0C */
- u32 pm_wkc[2]; /* 0x10 */
- u32 pm_wks[2]; /* 0x18 */
- u32 pm_ssc[4]; /* 0x20 */
- u32 pm_sss[4]; /* 0x30 */
- u32 pm_wssc[4]; /* 0x40 */
- u32 pm_c3c4; /* 0x50 */
- u32 pm_c5c6; /* 0x54 */
- u32 pm_msi_disable; /* 0x58 */
-} *pmu_reg;
-
-static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
-static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
-static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
-static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
-
-static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
-static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
-static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
-static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
-static inline void pmu_write_wssc(u32 arg)
- { writel(arg, &pmu_reg->pm_wssc[0]); }
-
-static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
-static inline u32 pmu_msi_is_disabled(void)
- { return readl(&pmu_reg->pm_msi_disable); }
-
-union pmu_pm_ics {
- struct {
- u32 cause:8;
- u32 enable:1;
- u32 pending:1;
- u32 reserved:22;
- } bits;
- u32 value;
-};
-
-static inline void pmu_irq_enable(void)
-{
- union pmu_pm_ics pmu_ics;
-
- pmu_ics.value = pmu_read_ics();
- pmu_ics.bits.enable = 1;
- pmu_write_ics(pmu_ics.value);
-}
-
-union pmu_pm_status {
- struct {
- u32 pmu_rev:8;
- u32 pmu_busy:1;
- u32 mode_id:4;
- u32 Reserved:19;
- } pmu_status_parts;
- u32 pmu_status_value;
-};
-
-static inline int pmu_read_busy_status(void)
-{
- union pmu_pm_status result;
-
- result.pmu_status_value = pmu_read_sts();
-
- return result.pmu_status_parts.pmu_busy;
-}
-
-/* pmu set config parameters */
-struct cfg_delay_param_t {
- u32 cmd:8;
- u32 ioc:1;
- u32 cfg_mode:4;
- u32 mode_id:3;
- u32 sys_state:3;
- u32 cfg_delay:8;
- u32 rsvd:5;
-};
-
-struct cfg_trig_param_t {
- u32 cmd:8;
- u32 ioc:1;
- u32 cfg_mode:4;
- u32 mode_id:3;
- u32 sys_state:3;
- u32 cfg_trig_type:3;
- u32 cfg_trig_val:8;
- u32 cmbi:1;
- u32 rsvd1:1;
-};
-
-union pmu_pm_set_cfg_cmd_t {
- union {
- struct cfg_delay_param_t d_param;
- struct cfg_trig_param_t t_param;
- } pmu2_params;
- u32 pmu_pm_set_cfg_cmd_value;
-};
-
-#ifdef FUTURE_PATCH
-extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
-#else
-static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
-#endif
-#endif
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2b235b7..23e5b9d 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -23,7 +23,66 @@
#define XO15_SCI_CLASS DRV_NAME
#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI"
-static unsigned long xo15_sci_gpe;
+static unsigned long xo15_sci_gpe;
+static bool lid_wake_on_close;
+
+/*
+ * The normal ACPI LID wakeup behavior is wake-on-open, but not
+ * wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
+ *
+ * We provide here a sysfs attribute that will additionally enable
+ * wake-on-close behavior. This is useful (e.g.) when we oportunistically
+ * suspend with the display running; if the lid is then closed, we want to
+ * wake up to turn the display off.
+ *
+ * This is controlled through a custom method in the XO-1.5 DSDT.
+ */
+static int set_lid_wake_behavior(bool wake_on_close)
+{
+ struct acpi_object_list arg_list;
+ union acpi_object arg;
+ acpi_status status;
+
+ arg_list.count = 1;
+ arg_list.pointer = &arg;
+ arg.type = ACPI_TYPE_INTEGER;
+ arg.integer.value = wake_on_close;
+
+ status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
+ if (ACPI_FAILURE(status)) {
+ pr_warning(PFX "failed to set lid behavior\n");
+ return 1;
+ }
+
+ lid_wake_on_close = wake_on_close;
+
+ return 0;
+}
+
+static ssize_t
+lid_wake_on_close_show(struct kobject *s, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", lid_wake_on_close);
+}
+
+static ssize_t lid_wake_on_close_store(struct kobject *s,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u", &val) != 1)
+ return -EINVAL;
+
+ set_lid_wake_behavior(!!val);
+
+ return n;
+}
+
+static struct kobj_attribute lid_wake_on_close_attr =
+ __ATTR(lid_wake_on_close, 0644,
+ lid_wake_on_close_show,
+ lid_wake_on_close_store);
static void battery_status_changed(void)
{
@@ -91,6 +150,7 @@ static int xo15_sci_add(struct acpi_device *device)
{
unsigned long long tmp;
acpi_status status;
+ int r;
if (!device)
return -EINVAL;
@@ -112,6 +172,10 @@ static int xo15_sci_add(struct acpi_device *device)
dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe);
+ r = sysfs_create_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
+ if (r)
+ goto err_sysfs;
+
/* Flush queue, and enable all SCI events */
process_sci_queue();
olpc_ec_mask_write(EC_SCI_SRC_ALL);
@@ -123,6 +187,11 @@ static int xo15_sci_add(struct acpi_device *device)
device_init_wakeup(&device->dev, true);
return 0;
+
+err_sysfs:
+ acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
+ cancel_work_sync(&sci_work);
+ return r;
}
static int xo15_sci_remove(struct acpi_device *device, int type)
@@ -130,6 +199,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type)
acpi_disable_gpe(NULL, xo15_sci_gpe);
acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
cancel_work_sync(&sci_work);
+ sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
return 0;
}
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7e004ac..7a9ad30 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -17,8 +17,6 @@
/* Verify that the configuration block really is there */
#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
-#define NAME "scx200"
-
MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
MODULE_DESCRIPTION("NatSemi SCx200 Driver");
MODULE_LICENSE("GPL");
@@ -29,10 +27,10 @@ unsigned long scx200_gpio_shadow[2];
unsigned scx200_cb_base = 0;
static struct pci_device_id scx200_tbl[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
{ },
};
MODULE_DEVICE_TABLE(pci,scx200_tbl);
@@ -63,10 +61,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
base = pci_resource_start(pdev, 0);
- printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
+ pr_info("GPIO base 0x%x\n", base);
- if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
- printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
+ if (!request_region(base, SCx200_GPIO_SIZE,
+ "NatSemi SCx200 GPIO")) {
+ pr_err("can't allocate I/O for GPIOs\n");
return -EBUSY;
}
@@ -82,11 +81,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
if (scx200_cb_probe(base)) {
scx200_cb_base = base;
} else {
- printk(KERN_WARNING NAME ": Configuration Block not found\n");
+ pr_warn("Configuration Block not found\n");
return -ENODEV;
}
}
- printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
+ pr_info("Configuration Block base 0x%x\n", scx200_cb_base);
}
return 0;
@@ -111,8 +110,7 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
static int __init scx200_init(void)
{
- printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
-
+ pr_info("NatSemi SCx200 Driver\n");
return pci_register_driver(&scx200_pci_driver);
}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 5b55219..3ae0e61 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -157,13 +157,14 @@ static int __init uvhub_to_first_apicid(int uvhub)
* clear of the Timeout bit (as well) will free the resource. No reply will
* be sent (the hardware will only do one reply per message).
*/
-static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp)
+static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
+ int do_acknowledge)
{
unsigned long dw;
struct bau_pq_entry *msg;
msg = mdp->msg;
- if (!msg->canceled) {
+ if (!msg->canceled && do_acknowledge) {
dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
write_mmr_sw_ack(dw);
}
@@ -212,8 +213,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
unsigned long mr;
/*
- * is the resource timed out?
- * make everyone ignore the cancelled message.
+ * Is the resource timed out?
+ * Make everyone ignore the cancelled message.
*/
msg2->canceled = 1;
stat->d_canceled++;
@@ -231,8 +232,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
* Do all the things a cpu should do for a TLB shootdown message.
* Other cpu's may come here at the same time for this message.
*/
-static void bau_process_message(struct msg_desc *mdp,
- struct bau_control *bcp)
+static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
+ int do_acknowledge)
{
short socket_ack_count = 0;
short *sp;
@@ -284,8 +285,9 @@ static void bau_process_message(struct msg_desc *mdp,
if (msg_ack_count == bcp->cpus_in_uvhub) {
/*
* All cpus in uvhub saw it; reply
+ * (unless we are in the UV2 workaround)
*/
- reply_to_message(mdp, bcp);
+ reply_to_message(mdp, bcp, do_acknowledge);
}
}
@@ -491,27 +493,138 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
/*
* UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
*/
-static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu)
+static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
{
unsigned long descriptor_status;
unsigned long descriptor_status2;
descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
- descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL;
+ descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
descriptor_status = (descriptor_status << 1) | descriptor_status2;
return descriptor_status;
}
+/*
+ * Return whether the status of the descriptor that is normally used for this
+ * cpu (the one indexed by its hub-relative cpu number) is busy.
+ * The status of the original 32 descriptors is always reflected in the 64
+ * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
+ * The bit provided by the activation_status_2 register is irrelevant to
+ * the status if it is only being tested for busy or not busy.
+ */
+int normal_busy(struct bau_control *bcp)
+{
+ int cpu = bcp->uvhub_cpu;
+ int mmr_offset;
+ int right_shift;
+
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+ right_shift = cpu * UV_ACT_STATUS_SIZE;
+ return (((((read_lmmr(mmr_offset) >> right_shift) &
+ UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
+}
+
+/*
+ * Entered when a bau descriptor has gone into a permanent busy wait because
+ * of a hardware bug.
+ * Workaround the bug.
+ */
+int handle_uv2_busy(struct bau_control *bcp)
+{
+ int busy_one = bcp->using_desc;
+ int normal = bcp->uvhub_cpu;
+ int selected = -1;
+ int i;
+ unsigned long descriptor_status;
+ unsigned long status;
+ int mmr_offset;
+ struct bau_desc *bau_desc_old;
+ struct bau_desc *bau_desc_new;
+ struct bau_control *hmaster = bcp->uvhub_master;
+ struct ptc_stats *stat = bcp->statp;
+ cycles_t ttm;
+
+ stat->s_uv2_wars++;
+ spin_lock(&hmaster->uvhub_lock);
+ /* try for the original first */
+ if (busy_one != normal) {
+ if (!normal_busy(bcp))
+ selected = normal;
+ }
+ if (selected < 0) {
+ /* can't use the normal, select an alternate */
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+ descriptor_status = read_lmmr(mmr_offset);
+
+ /* scan available descriptors 32-63 */
+ for (i = 0; i < UV_CPUS_PER_AS; i++) {
+ if ((hmaster->inuse_map & (1 << i)) == 0) {
+ status = ((descriptor_status >>
+ (i * UV_ACT_STATUS_SIZE)) &
+ UV_ACT_STATUS_MASK) << 1;
+ if (status != UV2H_DESC_BUSY) {
+ selected = i + UV_CPUS_PER_AS;
+ break;
+ }
+ }
+ }
+ }
+
+ if (busy_one != normal)
+ /* mark the busy alternate as not in-use */
+ hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
+
+ if (selected >= 0) {
+ /* switch to the selected descriptor */
+ if (selected != normal) {
+ /* set the selected alternate as in-use */
+ hmaster->inuse_map |=
+ (1 << (selected - UV_CPUS_PER_AS));
+ if (selected > stat->s_uv2_wars_hw)
+ stat->s_uv2_wars_hw = selected;
+ }
+ bau_desc_old = bcp->descriptor_base;
+ bau_desc_old += (ITEMS_PER_DESC * busy_one);
+ bcp->using_desc = selected;
+ bau_desc_new = bcp->descriptor_base;
+ bau_desc_new += (ITEMS_PER_DESC * selected);
+ *bau_desc_new = *bau_desc_old;
+ } else {
+ /*
+ * All are busy. Wait for the normal one for this cpu to
+ * free up.
+ */
+ stat->s_uv2_war_waits++;
+ spin_unlock(&hmaster->uvhub_lock);
+ ttm = get_cycles();
+ do {
+ cpu_relax();
+ } while (normal_busy(bcp));
+ spin_lock(&hmaster->uvhub_lock);
+ /* switch to the original descriptor */
+ bcp->using_desc = normal;
+ bau_desc_old = bcp->descriptor_base;
+ bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
+ bcp->using_desc = (ITEMS_PER_DESC * normal);
+ bau_desc_new = bcp->descriptor_base;
+ bau_desc_new += (ITEMS_PER_DESC * normal);
+ *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
+ }
+ spin_unlock(&hmaster->uvhub_lock);
+ return FLUSH_RETRY_BUSYBUG;
+}
+
static int uv2_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, long try)
{
unsigned long descriptor_stat;
cycles_t ttm;
- int cpu = bcp->uvhub_cpu;
+ int desc = bcp->using_desc;
+ long busy_reps = 0;
struct ptc_stats *stat = bcp->statp;
- descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
+ descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
/* spin on the status MMR, waiting for it to go idle */
while (descriptor_stat != UV2H_DESC_IDLE) {
@@ -522,32 +635,35 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
* our message and its state will stay IDLE.
*/
if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
- (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
(descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
stat->s_stimeout++;
return FLUSH_GIVEUP;
+ } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
+ stat->s_strongnacks++;
+ bcp->conseccompletes = 0;
+ return FLUSH_GIVEUP;
} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
stat->s_dtimeout++;
- ttm = get_cycles();
- /*
- * Our retries may be blocked by all destination
- * swack resources being consumed, and a timeout
- * pending. In that case hardware returns the
- * ERROR that looks like a destination timeout.
- */
- if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
- bcp->conseccompletes = 0;
- return FLUSH_RETRY_PLUGGED;
- }
bcp->conseccompletes = 0;
return FLUSH_RETRY_TIMEOUT;
} else {
+ busy_reps++;
+ if (busy_reps > 1000000) {
+ /* not to hammer on the clock */
+ busy_reps = 0;
+ ttm = get_cycles();
+ if ((ttm - bcp->send_message) >
+ (bcp->clocks_per_100_usec)) {
+ return handle_uv2_busy(bcp);
+ }
+ }
/*
* descriptor_stat is still BUSY
*/
cpu_relax();
}
- descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
+ descriptor_stat = uv2_read_status(mmr_offset, right_shift,
+ desc);
}
bcp->conseccompletes++;
return FLUSH_COMPLETE;
@@ -563,17 +679,17 @@ static int wait_completion(struct bau_desc *bau_desc,
{
int right_shift;
unsigned long mmr_offset;
- int cpu = bcp->uvhub_cpu;
+ int desc = bcp->using_desc;
- if (cpu < UV_CPUS_PER_AS) {
+ if (desc < UV_CPUS_PER_AS) {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
- right_shift = cpu * UV_ACT_STATUS_SIZE;
+ right_shift = desc * UV_ACT_STATUS_SIZE;
} else {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
- right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
+ right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
}
- if (is_uv1_hub())
+ if (bcp->uvhub_version == 1)
return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
bcp, try);
else
@@ -752,19 +868,22 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
* Returns 1 if it gives up entirely and the original cpu mask is to be
* returned to the kernel.
*/
-int uv_flush_send_and_wait(struct bau_desc *bau_desc,
- struct cpumask *flush_mask, struct bau_control *bcp)
+int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
{
int seq_number = 0;
int completion_stat = 0;
+ int uv1 = 0;
long try = 0;
unsigned long index;
cycles_t time1;
cycles_t time2;
struct ptc_stats *stat = bcp->statp;
struct bau_control *hmaster = bcp->uvhub_master;
+ struct uv1_bau_msg_header *uv1_hdr = NULL;
+ struct uv2_bau_msg_header *uv2_hdr = NULL;
+ struct bau_desc *bau_desc;
- if (is_uv1_hub())
+ if (bcp->uvhub_version == 1)
uv1_throttle(hmaster, stat);
while (hmaster->uvhub_quiesce)
@@ -772,22 +891,39 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
time1 = get_cycles();
do {
- if (try == 0) {
- bau_desc->header.msg_type = MSG_REGULAR;
+ bau_desc = bcp->descriptor_base;
+ bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
+ if (bcp->uvhub_version == 1) {
+ uv1 = 1;
+ uv1_hdr = &bau_desc->header.uv1_hdr;
+ } else
+ uv2_hdr = &bau_desc->header.uv2_hdr;
+ if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
+ if (uv1)
+ uv1_hdr->msg_type = MSG_REGULAR;
+ else
+ uv2_hdr->msg_type = MSG_REGULAR;
seq_number = bcp->message_number++;
} else {
- bau_desc->header.msg_type = MSG_RETRY;
+ if (uv1)
+ uv1_hdr->msg_type = MSG_RETRY;
+ else
+ uv2_hdr->msg_type = MSG_RETRY;
stat->s_retry_messages++;
}
- bau_desc->header.sequence = seq_number;
- index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
+ if (uv1)
+ uv1_hdr->sequence = seq_number;
+ else
+ uv2_hdr->sequence = seq_number;
+ index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
bcp->send_message = get_cycles();
write_mmr_activation(index);
try++;
completion_stat = wait_completion(bau_desc, bcp, try);
+ /* UV2: wait_completion() may change the bcp->using_desc */
handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
@@ -798,6 +934,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
}
cpu_relax();
} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
+ (completion_stat == FLUSH_RETRY_BUSYBUG) ||
(completion_stat == FLUSH_RETRY_TIMEOUT));
time2 = get_cycles();
@@ -812,6 +949,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
record_send_stats(time1, time2, bcp, stat, completion_stat, try);
if (completion_stat == FLUSH_GIVEUP)
+ /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
return 1;
return 0;
}
@@ -967,7 +1105,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
stat->s_ntargself++;
bau_desc = bcp->descriptor_base;
- bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
+ bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
return NULL;
@@ -980,13 +1118,86 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
* uv_flush_send_and_wait returns 0 if all cpu's were messaged,
* or 1 if it gave up and the original cpumask should be returned.
*/
- if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
+ if (!uv_flush_send_and_wait(flush_mask, bcp))
return NULL;
else
return cpumask;
}
/*
+ * Search the message queue for any 'other' message with the same software
+ * acknowledge resource bit vector.
+ */
+struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
+ struct bau_control *bcp, unsigned char swack_vec)
+{
+ struct bau_pq_entry *msg_next = msg + 1;
+
+ if (msg_next > bcp->queue_last)
+ msg_next = bcp->queue_first;
+ while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
+ if (msg_next->swack_vec == swack_vec)
+ return msg_next;
+ msg_next++;
+ if (msg_next > bcp->queue_last)
+ msg_next = bcp->queue_first;
+ }
+ return NULL;
+}
+
+/*
+ * UV2 needs to work around a bug in which an arriving message has not
+ * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
+ * Such a message must be ignored.
+ */
+void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
+{
+ unsigned long mmr_image;
+ unsigned char swack_vec;
+ struct bau_pq_entry *msg = mdp->msg;
+ struct bau_pq_entry *other_msg;
+
+ mmr_image = read_mmr_sw_ack();
+ swack_vec = msg->swack_vec;
+
+ if ((swack_vec & mmr_image) == 0) {
+ /*
+ * This message was assigned a swack resource, but no
+ * reserved acknowlegment is pending.
+ * The bug has prevented this message from setting the MMR.
+ * And no other message has used the same sw_ack resource.
+ * Do the requested shootdown but do not reply to the msg.
+ * (the 0 means make no acknowledge)
+ */
+ bau_process_message(mdp, bcp, 0);
+ return;
+ }
+
+ /*
+ * Some message has set the MMR 'pending' bit; it might have been
+ * another message. Look for that message.
+ */
+ other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
+ if (other_msg) {
+ /* There is another. Do not ack the current one. */
+ bau_process_message(mdp, bcp, 0);
+ /*
+ * Let the natural processing of that message acknowledge
+ * it. Don't get the processing of sw_ack's out of order.
+ */
+ return;
+ }
+
+ /*
+ * There is no other message using this sw_ack, so it is safe to
+ * acknowledge it.
+ */
+ bau_process_message(mdp, bcp, 1);
+
+ return;
+}
+
+/*
* The BAU message interrupt comes here. (registered by set_intr_gate)
* See entry_64.S
*
@@ -1009,6 +1220,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
struct ptc_stats *stat;
struct msg_desc msgdesc;
+ ack_APIC_irq();
time_start = get_cycles();
bcp = &per_cpu(bau_control, smp_processor_id());
@@ -1022,9 +1234,11 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
count++;
msgdesc.msg_slot = msg - msgdesc.queue_first;
- msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
msgdesc.msg = msg;
- bau_process_message(&msgdesc, bcp);
+ if (bcp->uvhub_version == 2)
+ process_uv2_message(&msgdesc, bcp);
+ else
+ bau_process_message(&msgdesc, bcp, 1);
msg++;
if (msg > msgdesc.queue_last)
@@ -1036,8 +1250,6 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
stat->d_nomsg++;
else if (count > 1)
stat->d_multmsg++;
-
- ack_APIC_irq();
}
/*
@@ -1083,7 +1295,7 @@ static void __init enable_timeouts(void)
*/
mmr_image |= (1L << SOFTACK_MSHIFT);
if (is_uv2_hub()) {
- mmr_image |= (1L << UV2_LEG_SHFT);
+ mmr_image &= ~(1L << UV2_LEG_SHFT);
mmr_image |= (1L << UV2_EXT_SHFT);
}
write_mmr_misc_control(pnode, mmr_image);
@@ -1136,13 +1348,13 @@ static int ptc_seq_show(struct seq_file *file, void *data)
seq_printf(file,
"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
seq_printf(file,
- "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok ");
+ "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
seq_printf(file,
"resetp resett giveup sto bz throt swack recv rtime ");
seq_printf(file,
"all one mult none retry canc nocan reset rcan ");
seq_printf(file,
- "disable enable\n");
+ "disable enable wars warshw warwaits\n");
}
if (cpu < num_possible_cpus() && cpu_online(cpu)) {
stat = &per_cpu(ptcstats, cpu);
@@ -1154,10 +1366,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
stat->s_ntargremotes, stat->s_ntargcpu,
stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
stat->s_ntarguvhub, stat->s_ntarguvhub16);
- seq_printf(file, "%ld %ld %ld %ld %ld ",
+ seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
stat->s_ntarguvhub8, stat->s_ntarguvhub4,
stat->s_ntarguvhub2, stat->s_ntarguvhub1,
- stat->s_dtimeout);
+ stat->s_dtimeout, stat->s_strongnacks);
seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
stat->s_retry_messages, stat->s_retriesok,
stat->s_resets_plug, stat->s_resets_timeout,
@@ -1173,8 +1385,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
stat->d_nomsg, stat->d_retries, stat->d_canceled,
stat->d_nocanceled, stat->d_resets,
stat->d_rcanceled);
- seq_printf(file, "%ld %ld\n",
- stat->s_bau_disabled, stat->s_bau_reenabled);
+ seq_printf(file, "%ld %ld %ld %ld %ld\n",
+ stat->s_bau_disabled, stat->s_bau_reenabled,
+ stat->s_uv2_wars, stat->s_uv2_wars_hw,
+ stat->s_uv2_war_waits);
}
return 0;
}
@@ -1432,12 +1646,15 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
{
int i;
int cpu;
+ int uv1 = 0;
unsigned long gpa;
unsigned long m;
unsigned long n;
size_t dsize;
struct bau_desc *bau_desc;
struct bau_desc *bd2;
+ struct uv1_bau_msg_header *uv1_hdr;
+ struct uv2_bau_msg_header *uv2_hdr;
struct bau_control *bcp;
/*
@@ -1451,6 +1668,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
gpa = uv_gpa(bau_desc);
n = uv_gpa_to_gnode(gpa);
m = uv_gpa_to_offset(gpa);
+ if (is_uv1_hub())
+ uv1 = 1;
/* the 14-bit pnode */
write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1461,21 +1680,33 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
*/
for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
memset(bd2, 0, sizeof(struct bau_desc));
- bd2->header.swack_flag = 1;
- /*
- * The base_dest_nasid set in the message header is the nasid
- * of the first uvhub in the partition. The bit map will
- * indicate destination pnode numbers relative to that base.
- * They may not be consecutive if nasid striding is being used.
- */
- bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
- bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
- bd2->header.command = UV_NET_ENDPOINT_INTD;
- bd2->header.int_both = 1;
- /*
- * all others need to be set to zero:
- * fairness chaining multilevel count replied_to
- */
+ if (uv1) {
+ uv1_hdr = &bd2->header.uv1_hdr;
+ uv1_hdr->swack_flag = 1;
+ /*
+ * The base_dest_nasid set in the message header
+ * is the nasid of the first uvhub in the partition.
+ * The bit map will indicate destination pnode numbers
+ * relative to that base. They may not be consecutive
+ * if nasid striding is being used.
+ */
+ uv1_hdr->base_dest_nasid =
+ UV_PNODE_TO_NASID(base_pnode);
+ uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv1_hdr->command = UV_NET_ENDPOINT_INTD;
+ uv1_hdr->int_both = 1;
+ /*
+ * all others need to be set to zero:
+ * fairness chaining multilevel count replied_to
+ */
+ } else {
+ uv2_hdr = &bd2->header.uv2_hdr;
+ uv2_hdr->swack_flag = 1;
+ uv2_hdr->base_dest_nasid =
+ UV_PNODE_TO_NASID(base_pnode);
+ uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv2_hdr->command = UV_NET_ENDPOINT_INTD;
+ }
}
for_each_present_cpu(cpu) {
if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
@@ -1531,6 +1762,7 @@ static void pq_init(int node, int pnode)
write_mmr_payload_first(pnode, pn_first);
write_mmr_payload_tail(pnode, first);
write_mmr_payload_last(pnode, last);
+ write_gmmr_sw_ack(pnode, 0xffffUL);
/* in effect, all msg_type's are set to MSG_NOOP */
memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1584,14 +1816,14 @@ static int calculate_destination_timeout(void)
ts_ns = base * mult1 * mult2;
ret = ts_ns / 1000;
} else {
- /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */
- mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+ /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
+ mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
- mult1 = 80;
+ base = 80;
else
- mult1 = 10;
- base = mmr_image & UV2_ACK_MASK;
+ base = 10;
+ mult1 = mmr_image & UV2_ACK_MASK;
ret = mult1 * base;
}
return ret;
@@ -1618,6 +1850,9 @@ static void __init init_per_cpu_tunables(void)
bcp->cong_response_us = congested_respns_us;
bcp->cong_reps = congested_reps;
bcp->cong_period = congested_period;
+ bcp->clocks_per_100_usec = usec_2_cycles(100);
+ spin_lock_init(&bcp->queue_lock);
+ spin_lock_init(&bcp->uvhub_lock);
}
}
@@ -1728,8 +1963,17 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
bcp->cpus_in_socket = sdp->num_cpus;
bcp->socket_master = *smasterp;
bcp->uvhub = bdp->uvhub;
+ if (is_uv1_hub())
+ bcp->uvhub_version = 1;
+ else if (is_uv2_hub())
+ bcp->uvhub_version = 2;
+ else {
+ printk(KERN_EMERG "uvhub version not 1 or 2\n");
+ return 1;
+ }
bcp->uvhub_master = *hmasterp;
bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ bcp->using_desc = bcp->uvhub_cpu;
if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
printk(KERN_EMERG "%d cpus per uvhub invalid\n",
bcp->uvhub_cpu);
@@ -1845,6 +2089,8 @@ static int __init uv_bau_init(void)
uv_base_pnode = uv_blade_to_pnode(uvhub);
}
+ enable_timeouts();
+
if (init_per_cpu(nuvhubs, uv_base_pnode)) {
nobau = 1;
return 0;
@@ -1855,7 +2101,6 @@ static int __init uv_bau_init(void)
if (uv_blade_nr_possible_cpus(uvhub))
init_uvhub(uvhub, vector, uv_base_pnode);
- enable_timeouts();
alloc_intr_gate(vector, uv_bau_message_intr1);
for_each_possible_blade(uvhub) {
@@ -1867,7 +2112,8 @@ static int __init uv_bau_init(void)
val = 1L << 63;
write_gmmr_activation(pnode, val);
mmr = 1; /* should be 1 to broadcast to both sockets */
- write_mmr_data_broadcast(pnode, mmr);
+ if (!is_uv1_hub())
+ write_mmr_data_broadcast(pnode, mmr);
}
}
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 374a05d..f25c276 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -25,7 +25,7 @@ struct uv_irq_2_mmr_pnode{
int irq;
};
-static spinlock_t uv_irq_lock;
+static DEFINE_SPINLOCK(uv_irq_lock);
static struct rb_root uv_irq_root;
static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9f29a01..5032e0d 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -37,7 +37,7 @@ static void uv_rtc_timer_setup(enum clock_event_mode,
static struct clocksource clocksource_uv = {
.name = RTC_NAME,
- .rating = 400,
+ .rating = 299,
.read = uv_read_rtc,
.mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -379,10 +379,6 @@ static __init int uv_rtc_setup_clock(void)
if (!is_uv_system())
return -ENODEV;
- /* If single blade, prefer tsc */
- if (uv_num_possible_blades() == 1)
- clocksource_uv.rating = 250;
-
rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
if (rc)
printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index f10c0af..4889655 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -20,6 +20,7 @@
#include <asm/xcr.h>
#include <asm/suspend.h>
#include <asm/debugreg.h>
+#include <asm/fpu-internal.h> /* pcntxt_mask */
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
new file mode 100644
index 0000000..564b247
--- /dev/null
+++ b/arch/x86/syscalls/Makefile
@@ -0,0 +1,43 @@
+out := $(obj)/../include/generated/asm
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+syscall32 := $(srctree)/$(src)/syscall_32.tbl
+syscall64 := $(srctree)/$(src)/syscall_64.tbl
+
+syshdr := $(srctree)/$(src)/syscallhdr.sh
+systbl := $(srctree)/$(src)/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' $< $@ \
+ $(syshdr_abi_$(basetarget)) $(syshdr_pfx_$(basetarget))
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
+
+syshdr_abi_unistd_32 := i386
+$(out)/unistd_32.h: $(syscall32) $(syshdr)
+ $(call if_changed,syshdr)
+
+syshdr_abi_unistd_32_ia32 := i386
+syshdr_pfx_unistd_32_ia32 := ia32_
+$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
+ $(call if_changed,syshdr)
+
+syshdr_abi_unistd_64 := 64
+$(out)/unistd_64.h: $(syscall64) $(syshdr)
+ $(call if_changed,syshdr)
+
+$(out)/syscalls_32.h: $(syscall32) $(systbl)
+ $(call if_changed,systbl)
+$(out)/syscalls_64.h: $(syscall64) $(systbl)
+ $(call if_changed,systbl)
+
+syshdr-y += unistd_32.h unistd_64.h
+syshdr-y += syscalls_32.h
+syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h
+syshdr-$(CONFIG_X86_64) += syscalls_64.h
+
+targets += $(syshdr-y)
+
+all: $(addprefix $(out)/,$(targets))
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
new file mode 100644
index 0000000..ce98e28
--- /dev/null
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -0,0 +1,357 @@
+#
+# 32-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The abi is always "i386" for this file.
+#
+0 i386 restart_syscall sys_restart_syscall
+1 i386 exit sys_exit
+2 i386 fork ptregs_fork stub32_fork
+3 i386 read sys_read
+4 i386 write sys_write
+5 i386 open sys_open compat_sys_open
+6 i386 close sys_close
+7 i386 waitpid sys_waitpid sys32_waitpid
+8 i386 creat sys_creat
+9 i386 link sys_link
+10 i386 unlink sys_unlink
+11 i386 execve ptregs_execve stub32_execve
+12 i386 chdir sys_chdir
+13 i386 time sys_time compat_sys_time
+14 i386 mknod sys_mknod
+15 i386 chmod sys_chmod
+16 i386 lchown sys_lchown16
+17 i386 break
+18 i386 oldstat sys_stat
+19 i386 lseek sys_lseek sys32_lseek
+20 i386 getpid sys_getpid
+21 i386 mount sys_mount compat_sys_mount
+22 i386 umount sys_oldumount
+23 i386 setuid sys_setuid16
+24 i386 getuid sys_getuid16
+25 i386 stime sys_stime compat_sys_stime
+26 i386 ptrace sys_ptrace compat_sys_ptrace
+27 i386 alarm sys_alarm
+28 i386 oldfstat sys_fstat
+29 i386 pause sys_pause
+30 i386 utime sys_utime compat_sys_utime
+31 i386 stty
+32 i386 gtty
+33 i386 access sys_access
+34 i386 nice sys_nice
+35 i386 ftime
+36 i386 sync sys_sync
+37 i386 kill sys_kill sys32_kill
+38 i386 rename sys_rename
+39 i386 mkdir sys_mkdir
+40 i386 rmdir sys_rmdir
+41 i386 dup sys_dup
+42 i386 pipe sys_pipe
+43 i386 times sys_times compat_sys_times
+44 i386 prof
+45 i386 brk sys_brk
+46 i386 setgid sys_setgid16
+47 i386 getgid sys_getgid16
+48 i386 signal sys_signal
+49 i386 geteuid sys_geteuid16
+50 i386 getegid sys_getegid16
+51 i386 acct sys_acct
+52 i386 umount2 sys_umount
+53 i386 lock
+54 i386 ioctl sys_ioctl compat_sys_ioctl
+55 i386 fcntl sys_fcntl compat_sys_fcntl64
+56 i386 mpx
+57 i386 setpgid sys_setpgid
+58 i386 ulimit
+59 i386 oldolduname sys_olduname
+60 i386 umask sys_umask
+61 i386 chroot sys_chroot
+62 i386 ustat sys_ustat compat_sys_ustat
+63 i386 dup2 sys_dup2
+64 i386 getppid sys_getppid
+65 i386 getpgrp sys_getpgrp
+66 i386 setsid sys_setsid
+67 i386 sigaction sys_sigaction sys32_sigaction
+68 i386 sgetmask sys_sgetmask
+69 i386 ssetmask sys_ssetmask
+70 i386 setreuid sys_setreuid16
+71 i386 setregid sys_setregid16
+72 i386 sigsuspend sys_sigsuspend sys32_sigsuspend
+73 i386 sigpending sys_sigpending compat_sys_sigpending
+74 i386 sethostname sys_sethostname
+75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
+76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+77 i386 getrusage sys_getrusage compat_sys_getrusage
+78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
+80 i386 getgroups sys_getgroups16
+81 i386 setgroups sys_setgroups16
+82 i386 select sys_old_select compat_sys_old_select
+83 i386 symlink sys_symlink
+84 i386 oldlstat sys_lstat
+85 i386 readlink sys_readlink
+86 i386 uselib sys_uselib
+87 i386 swapon sys_swapon
+88 i386 reboot sys_reboot
+89 i386 readdir sys_old_readdir compat_sys_old_readdir
+90 i386 mmap sys_old_mmap sys32_mmap
+91 i386 munmap sys_munmap
+92 i386 truncate sys_truncate
+93 i386 ftruncate sys_ftruncate
+94 i386 fchmod sys_fchmod
+95 i386 fchown sys_fchown16
+96 i386 getpriority sys_getpriority
+97 i386 setpriority sys_setpriority
+98 i386 profil
+99 i386 statfs sys_statfs compat_sys_statfs
+100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
+101 i386 ioperm sys_ioperm
+102 i386 socketcall sys_socketcall compat_sys_socketcall
+103 i386 syslog sys_syslog
+104 i386 setitimer sys_setitimer compat_sys_setitimer
+105 i386 getitimer sys_getitimer compat_sys_getitimer
+106 i386 stat sys_newstat compat_sys_newstat
+107 i386 lstat sys_newlstat compat_sys_newlstat
+108 i386 fstat sys_newfstat compat_sys_newfstat
+109 i386 olduname sys_uname
+110 i386 iopl ptregs_iopl stub32_iopl
+111 i386 vhangup sys_vhangup
+112 i386 idle
+113 i386 vm86old ptregs_vm86old sys32_vm86_warning
+114 i386 wait4 sys_wait4 compat_sys_wait4
+115 i386 swapoff sys_swapoff
+116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
+117 i386 ipc sys_ipc sys32_ipc
+118 i386 fsync sys_fsync
+119 i386 sigreturn ptregs_sigreturn stub32_sigreturn
+120 i386 clone ptregs_clone stub32_clone
+121 i386 setdomainname sys_setdomainname
+122 i386 uname sys_newuname
+123 i386 modify_ldt sys_modify_ldt
+124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
+125 i386 mprotect sys_mprotect sys32_mprotect
+126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+127 i386 create_module
+128 i386 init_module sys_init_module
+129 i386 delete_module sys_delete_module
+130 i386 get_kernel_syms
+131 i386 quotactl sys_quotactl sys32_quotactl
+132 i386 getpgid sys_getpgid
+133 i386 fchdir sys_fchdir
+134 i386 bdflush sys_bdflush
+135 i386 sysfs sys_sysfs
+136 i386 personality sys_personality
+137 i386 afs_syscall
+138 i386 setfsuid sys_setfsuid16
+139 i386 setfsgid sys_setfsgid16
+140 i386 _llseek sys_llseek
+141 i386 getdents sys_getdents compat_sys_getdents
+142 i386 _newselect sys_select compat_sys_select
+143 i386 flock sys_flock
+144 i386 msync sys_msync
+145 i386 readv sys_readv compat_sys_readv
+146 i386 writev sys_writev compat_sys_writev
+147 i386 getsid sys_getsid
+148 i386 fdatasync sys_fdatasync
+149 i386 _sysctl sys_sysctl compat_sys_sysctl
+150 i386 mlock sys_mlock
+151 i386 munlock sys_munlock
+152 i386 mlockall sys_mlockall
+153 i386 munlockall sys_munlockall
+154 i386 sched_setparam sys_sched_setparam
+155 i386 sched_getparam sys_sched_getparam
+156 i386 sched_setscheduler sys_sched_setscheduler
+157 i386 sched_getscheduler sys_sched_getscheduler
+158 i386 sched_yield sys_sched_yield
+159 i386 sched_get_priority_max sys_sched_get_priority_max
+160 i386 sched_get_priority_min sys_sched_get_priority_min
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval
+162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
+163 i386 mremap sys_mremap
+164 i386 setresuid sys_setresuid16
+165 i386 getresuid sys_getresuid16
+166 i386 vm86 ptregs_vm86 sys32_vm86_warning
+167 i386 query_module
+168 i386 poll sys_poll
+169 i386 nfsservctl
+170 i386 setresgid sys_setresgid16
+171 i386 getresgid sys_getresgid16
+172 i386 prctl sys_prctl
+173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn
+174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction
+175 i386 rt_sigprocmask sys_rt_sigprocmask sys32_rt_sigprocmask
+176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo
+179 i386 rt_sigsuspend sys_rt_sigsuspend
+180 i386 pread64 sys_pread64 sys32_pread
+181 i386 pwrite64 sys_pwrite64 sys32_pwrite
+182 i386 chown sys_chown16
+183 i386 getcwd sys_getcwd
+184 i386 capget sys_capget
+185 i386 capset sys_capset
+186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack
+187 i386 sendfile sys_sendfile sys32_sendfile
+188 i386 getpmsg
+189 i386 putpmsg
+190 i386 vfork ptregs_vfork stub32_vfork
+191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
+192 i386 mmap2 sys_mmap_pgoff
+193 i386 truncate64 sys_truncate64 sys32_truncate64
+194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
+195 i386 stat64 sys_stat64 sys32_stat64
+196 i386 lstat64 sys_lstat64 sys32_lstat64
+197 i386 fstat64 sys_fstat64 sys32_fstat64
+198 i386 lchown32 sys_lchown
+199 i386 getuid32 sys_getuid
+200 i386 getgid32 sys_getgid
+201 i386 geteuid32 sys_geteuid
+202 i386 getegid32 sys_getegid
+203 i386 setreuid32 sys_setreuid
+204 i386 setregid32 sys_setregid
+205 i386 getgroups32 sys_getgroups
+206 i386 setgroups32 sys_setgroups
+207 i386 fchown32 sys_fchown
+208 i386 setresuid32 sys_setresuid
+209 i386 getresuid32 sys_getresuid
+210 i386 setresgid32 sys_setresgid
+211 i386 getresgid32 sys_getresgid
+212 i386 chown32 sys_chown
+213 i386 setuid32 sys_setuid
+214 i386 setgid32 sys_setgid
+215 i386 setfsuid32 sys_setfsuid
+216 i386 setfsgid32 sys_setfsgid
+217 i386 pivot_root sys_pivot_root
+218 i386 mincore sys_mincore
+219 i386 madvise sys_madvise
+220 i386 getdents64 sys_getdents64 compat_sys_getdents64
+221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
+# 222 is unused
+# 223 is unused
+224 i386 gettid sys_gettid
+225 i386 readahead sys_readahead sys32_readahead
+226 i386 setxattr sys_setxattr
+227 i386 lsetxattr sys_lsetxattr
+228 i386 fsetxattr sys_fsetxattr
+229 i386 getxattr sys_getxattr
+230 i386 lgetxattr sys_lgetxattr
+231 i386 fgetxattr sys_fgetxattr
+232 i386 listxattr sys_listxattr
+233 i386 llistxattr sys_llistxattr
+234 i386 flistxattr sys_flistxattr
+235 i386 removexattr sys_removexattr
+236 i386 lremovexattr sys_lremovexattr
+237 i386 fremovexattr sys_fremovexattr
+238 i386 tkill sys_tkill
+239 i386 sendfile64 sys_sendfile64
+240 i386 futex sys_futex compat_sys_futex
+241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+243 i386 set_thread_area sys_set_thread_area
+244 i386 get_thread_area sys_get_thread_area
+245 i386 io_setup sys_io_setup compat_sys_io_setup
+246 i386 io_destroy sys_io_destroy
+247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
+248 i386 io_submit sys_io_submit compat_sys_io_submit
+249 i386 io_cancel sys_io_cancel
+250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
+# 251 is available for reuse (was briefly sys_set_zone_reclaim)
+252 i386 exit_group sys_exit_group
+253 i386 lookup_dcookie sys_lookup_dcookie sys32_lookup_dcookie
+254 i386 epoll_create sys_epoll_create
+255 i386 epoll_ctl sys_epoll_ctl
+256 i386 epoll_wait sys_epoll_wait
+257 i386 remap_file_pages sys_remap_file_pages
+258 i386 set_tid_address sys_set_tid_address
+259 i386 timer_create sys_timer_create compat_sys_timer_create
+260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
+261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
+262 i386 timer_getoverrun sys_timer_getoverrun
+263 i386 timer_delete sys_timer_delete
+264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
+265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
+266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
+267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+268 i386 statfs64 sys_statfs64 compat_sys_statfs64
+269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+270 i386 tgkill sys_tgkill
+271 i386 utimes sys_utimes compat_sys_utimes
+272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
+273 i386 vserver
+274 i386 mbind sys_mbind
+275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+276 i386 set_mempolicy sys_set_mempolicy
+277 i386 mq_open sys_mq_open compat_sys_mq_open
+278 i386 mq_unlink sys_mq_unlink
+279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
+280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
+282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr
+283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
+284 i386 waitid sys_waitid compat_sys_waitid
+# 285 sys_setaltroot
+286 i386 add_key sys_add_key
+287 i386 request_key sys_request_key
+288 i386 keyctl sys_keyctl
+289 i386 ioprio_set sys_ioprio_set
+290 i386 ioprio_get sys_ioprio_get
+291 i386 inotify_init sys_inotify_init
+292 i386 inotify_add_watch sys_inotify_add_watch
+293 i386 inotify_rm_watch sys_inotify_rm_watch
+294 i386 migrate_pages sys_migrate_pages
+295 i386 openat sys_openat compat_sys_openat
+296 i386 mkdirat sys_mkdirat
+297 i386 mknodat sys_mknodat
+298 i386 fchownat sys_fchownat
+299 i386 futimesat sys_futimesat compat_sys_futimesat
+300 i386 fstatat64 sys_fstatat64 sys32_fstatat
+301 i386 unlinkat sys_unlinkat
+302 i386 renameat sys_renameat
+303 i386 linkat sys_linkat
+304 i386 symlinkat sys_symlinkat
+305 i386 readlinkat sys_readlinkat
+306 i386 fchmodat sys_fchmodat
+307 i386 faccessat sys_faccessat
+308 i386 pselect6 sys_pselect6 compat_sys_pselect6
+309 i386 ppoll sys_ppoll compat_sys_ppoll
+310 i386 unshare sys_unshare
+311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
+312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
+313 i386 splice sys_splice
+314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
+315 i386 tee sys_tee
+316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
+317 i386 move_pages sys_move_pages compat_sys_move_pages
+318 i386 getcpu sys_getcpu
+319 i386 epoll_pwait sys_epoll_pwait
+320 i386 utimensat sys_utimensat compat_sys_utimensat
+321 i386 signalfd sys_signalfd compat_sys_signalfd
+322 i386 timerfd_create sys_timerfd_create
+323 i386 eventfd sys_eventfd
+324 i386 fallocate sys_fallocate sys32_fallocate
+325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
+326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
+328 i386 eventfd2 sys_eventfd2
+329 i386 epoll_create1 sys_epoll_create1
+330 i386 dup3 sys_dup3
+331 i386 pipe2 sys_pipe2
+332 i386 inotify_init1 sys_inotify_init1
+333 i386 preadv sys_preadv compat_sys_preadv
+334 i386 pwritev sys_pwritev compat_sys_pwritev
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+336 i386 perf_event_open sys_perf_event_open
+337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
+338 i386 fanotify_init sys_fanotify_init
+339 i386 fanotify_mark sys_fanotify_mark sys32_fanotify_mark
+340 i386 prlimit64 sys_prlimit64
+341 i386 name_to_handle_at sys_name_to_handle_at
+342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+344 i386 syncfs sys_syncfs
+345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
+346 i386 setns sys_setns
+347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
new file mode 100644
index 0000000..b440a8f
--- /dev/null
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -0,0 +1,320 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The abi is always "64" for this file (for now.)
+#
+0 64 read sys_read
+1 64 write sys_write
+2 64 open sys_open
+3 64 close sys_close
+4 64 stat sys_newstat
+5 64 fstat sys_newfstat
+6 64 lstat sys_newlstat
+7 64 poll sys_poll
+8 64 lseek sys_lseek
+9 64 mmap sys_mmap
+10 64 mprotect sys_mprotect
+11 64 munmap sys_munmap
+12 64 brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 64 rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn stub_rt_sigreturn
+16 64 ioctl sys_ioctl
+17 64 pread64 sys_pread64
+18 64 pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 64 access sys_access
+22 64 pipe sys_pipe
+23 64 select sys_select
+24 64 sched_yield sys_sched_yield
+25 64 mremap sys_mremap
+26 64 msync sys_msync
+27 64 mincore sys_mincore
+28 64 madvise sys_madvise
+29 64 shmget sys_shmget
+30 64 shmat sys_shmat
+31 64 shmctl sys_shmctl
+32 64 dup sys_dup
+33 64 dup2 sys_dup2
+34 64 pause sys_pause
+35 64 nanosleep sys_nanosleep
+36 64 getitimer sys_getitimer
+37 64 alarm sys_alarm
+38 64 setitimer sys_setitimer
+39 64 getpid sys_getpid
+40 64 sendfile sys_sendfile64
+41 64 socket sys_socket
+42 64 connect sys_connect
+43 64 accept sys_accept
+44 64 sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 64 shutdown sys_shutdown
+49 64 bind sys_bind
+50 64 listen sys_listen
+51 64 getsockname sys_getsockname
+52 64 getpeername sys_getpeername
+53 64 socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 64 clone stub_clone
+57 64 fork stub_fork
+58 64 vfork stub_vfork
+59 64 execve stub_execve
+60 64 exit sys_exit
+61 64 wait4 sys_wait4
+62 64 kill sys_kill
+63 64 uname sys_newuname
+64 64 semget sys_semget
+65 64 semop sys_semop
+66 64 semctl sys_semctl
+67 64 shmdt sys_shmdt
+68 64 msgget sys_msgget
+69 64 msgsnd sys_msgsnd
+70 64 msgrcv sys_msgrcv
+71 64 msgctl sys_msgctl
+72 64 fcntl sys_fcntl
+73 64 flock sys_flock
+74 64 fsync sys_fsync
+75 64 fdatasync sys_fdatasync
+76 64 truncate sys_truncate
+77 64 ftruncate sys_ftruncate
+78 64 getdents sys_getdents
+79 64 getcwd sys_getcwd
+80 64 chdir sys_chdir
+81 64 fchdir sys_fchdir
+82 64 rename sys_rename
+83 64 mkdir sys_mkdir
+84 64 rmdir sys_rmdir
+85 64 creat sys_creat
+86 64 link sys_link
+87 64 unlink sys_unlink
+88 64 symlink sys_symlink
+89 64 readlink sys_readlink
+90 64 chmod sys_chmod
+91 64 fchmod sys_fchmod
+92 64 chown sys_chown
+93 64 fchown sys_fchown
+94 64 lchown sys_lchown
+95 64 umask sys_umask
+96 64 gettimeofday sys_gettimeofday
+97 64 getrlimit sys_getrlimit
+98 64 getrusage sys_getrusage
+99 64 sysinfo sys_sysinfo
+100 64 times sys_times
+101 64 ptrace sys_ptrace
+102 64 getuid sys_getuid
+103 64 syslog sys_syslog
+104 64 getgid sys_getgid
+105 64 setuid sys_setuid
+106 64 setgid sys_setgid
+107 64 geteuid sys_geteuid
+108 64 getegid sys_getegid
+109 64 setpgid sys_setpgid
+110 64 getppid sys_getppid
+111 64 getpgrp sys_getpgrp
+112 64 setsid sys_setsid
+113 64 setreuid sys_setreuid
+114 64 setregid sys_setregid
+115 64 getgroups sys_getgroups
+116 64 setgroups sys_setgroups
+117 64 setresuid sys_setresuid
+118 64 getresuid sys_getresuid
+119 64 setresgid sys_setresgid
+120 64 getresgid sys_getresgid
+121 64 getpgid sys_getpgid
+122 64 setfsuid sys_setfsuid
+123 64 setfsgid sys_setfsgid
+124 64 getsid sys_getsid
+125 64 capget sys_capget
+126 64 capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 64 rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack stub_sigaltstack
+132 64 utime sys_utime
+133 64 mknod sys_mknod
+134 64 uselib
+135 64 personality sys_personality
+136 64 ustat sys_ustat
+137 64 statfs sys_statfs
+138 64 fstatfs sys_fstatfs
+139 64 sysfs sys_sysfs
+140 64 getpriority sys_getpriority
+141 64 setpriority sys_setpriority
+142 64 sched_setparam sys_sched_setparam
+143 64 sched_getparam sys_sched_getparam
+144 64 sched_setscheduler sys_sched_setscheduler
+145 64 sched_getscheduler sys_sched_getscheduler
+146 64 sched_get_priority_max sys_sched_get_priority_max
+147 64 sched_get_priority_min sys_sched_get_priority_min
+148 64 sched_rr_get_interval sys_sched_rr_get_interval
+149 64 mlock sys_mlock
+150 64 munlock sys_munlock
+151 64 mlockall sys_mlockall
+152 64 munlockall sys_munlockall
+153 64 vhangup sys_vhangup
+154 64 modify_ldt sys_modify_ldt
+155 64 pivot_root sys_pivot_root
+156 64 _sysctl sys_sysctl
+157 64 prctl sys_prctl
+158 64 arch_prctl sys_arch_prctl
+159 64 adjtimex sys_adjtimex
+160 64 setrlimit sys_setrlimit
+161 64 chroot sys_chroot
+162 64 sync sys_sync
+163 64 acct sys_acct
+164 64 settimeofday sys_settimeofday
+165 64 mount sys_mount
+166 64 umount2 sys_umount
+167 64 swapon sys_swapon
+168 64 swapoff sys_swapoff
+169 64 reboot sys_reboot
+170 64 sethostname sys_sethostname
+171 64 setdomainname sys_setdomainname
+172 64 iopl stub_iopl
+173 64 ioperm sys_ioperm
+174 64 create_module
+175 64 init_module sys_init_module
+176 64 delete_module sys_delete_module
+177 64 get_kernel_syms
+178 64 query_module
+179 64 quotactl sys_quotactl
+180 64 nfsservctl
+181 64 getpmsg
+182 64 putpmsg
+183 64 afs_syscall
+184 64 tuxcall
+185 64 security
+186 64 gettid sys_gettid
+187 64 readahead sys_readahead
+188 64 setxattr sys_setxattr
+189 64 lsetxattr sys_lsetxattr
+190 64 fsetxattr sys_fsetxattr
+191 64 getxattr sys_getxattr
+192 64 lgetxattr sys_lgetxattr
+193 64 fgetxattr sys_fgetxattr
+194 64 listxattr sys_listxattr
+195 64 llistxattr sys_llistxattr
+196 64 flistxattr sys_flistxattr
+197 64 removexattr sys_removexattr
+198 64 lremovexattr sys_lremovexattr
+199 64 fremovexattr sys_fremovexattr
+200 64 tkill sys_tkill
+201 64 time sys_time
+202 64 futex sys_futex
+203 64 sched_setaffinity sys_sched_setaffinity
+204 64 sched_getaffinity sys_sched_getaffinity
+205 64 set_thread_area
+206 64 io_setup sys_io_setup
+207 64 io_destroy sys_io_destroy
+208 64 io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 64 io_cancel sys_io_cancel
+211 64 get_thread_area
+212 64 lookup_dcookie sys_lookup_dcookie
+213 64 epoll_create sys_epoll_create
+214 64 epoll_ctl_old
+215 64 epoll_wait_old
+216 64 remap_file_pages sys_remap_file_pages
+217 64 getdents64 sys_getdents64
+218 64 set_tid_address sys_set_tid_address
+219 64 restart_syscall sys_restart_syscall
+220 64 semtimedop sys_semtimedop
+221 64 fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 64 timer_settime sys_timer_settime
+224 64 timer_gettime sys_timer_gettime
+225 64 timer_getoverrun sys_timer_getoverrun
+226 64 timer_delete sys_timer_delete
+227 64 clock_settime sys_clock_settime
+228 64 clock_gettime sys_clock_gettime
+229 64 clock_getres sys_clock_getres
+230 64 clock_nanosleep sys_clock_nanosleep
+231 64 exit_group sys_exit_group
+232 64 epoll_wait sys_epoll_wait
+233 64 epoll_ctl sys_epoll_ctl
+234 64 tgkill sys_tgkill
+235 64 utimes sys_utimes
+236 64 vserver
+237 64 mbind sys_mbind
+238 64 set_mempolicy sys_set_mempolicy
+239 64 get_mempolicy sys_get_mempolicy
+240 64 mq_open sys_mq_open
+241 64 mq_unlink sys_mq_unlink
+242 64 mq_timedsend sys_mq_timedsend
+243 64 mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 64 mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 64 add_key sys_add_key
+249 64 request_key sys_request_key
+250 64 keyctl sys_keyctl
+251 64 ioprio_set sys_ioprio_set
+252 64 ioprio_get sys_ioprio_get
+253 64 inotify_init sys_inotify_init
+254 64 inotify_add_watch sys_inotify_add_watch
+255 64 inotify_rm_watch sys_inotify_rm_watch
+256 64 migrate_pages sys_migrate_pages
+257 64 openat sys_openat
+258 64 mkdirat sys_mkdirat
+259 64 mknodat sys_mknodat
+260 64 fchownat sys_fchownat
+261 64 futimesat sys_futimesat
+262 64 newfstatat sys_newfstatat
+263 64 unlinkat sys_unlinkat
+264 64 renameat sys_renameat
+265 64 linkat sys_linkat
+266 64 symlinkat sys_symlinkat
+267 64 readlinkat sys_readlinkat
+268 64 fchmodat sys_fchmodat
+269 64 faccessat sys_faccessat
+270 64 pselect6 sys_pselect6
+271 64 ppoll sys_ppoll
+272 64 unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 64 splice sys_splice
+276 64 tee sys_tee
+277 64 sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 64 utimensat sys_utimensat
+281 64 epoll_pwait sys_epoll_pwait
+282 64 signalfd sys_signalfd
+283 64 timerfd_create sys_timerfd_create
+284 64 eventfd sys_eventfd
+285 64 fallocate sys_fallocate
+286 64 timerfd_settime sys_timerfd_settime
+287 64 timerfd_gettime sys_timerfd_gettime
+288 64 accept4 sys_accept4
+289 64 signalfd4 sys_signalfd4
+290 64 eventfd2 sys_eventfd2
+291 64 epoll_create1 sys_epoll_create1
+292 64 dup3 sys_dup3
+293 64 pipe2 sys_pipe2
+294 64 inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 64 perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 64 fanotify_init sys_fanotify_init
+301 64 fanotify_mark sys_fanotify_mark
+302 64 prlimit64 sys_prlimit64
+303 64 name_to_handle_at sys_name_to_handle_at
+304 64 open_by_handle_at sys_open_by_handle_at
+305 64 clock_adjtime sys_clock_adjtime
+306 64 syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 64 setns sys_setns
+309 64 getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/syscalls/syscallhdr.sh
new file mode 100644
index 0000000..31fd5f1
--- /dev/null
+++ b/arch/x86/syscalls/syscallhdr.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+prefix="$4"
+offset="$5"
+
+fileguard=_ASM_X86_`basename "$out" | sed \
+ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+ -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+ echo "#ifndef ${fileguard}"
+ echo "#define ${fileguard} 1"
+ echo ""
+
+ while read nr abi name entry ; do
+ if [ -z "$offset" ]; then
+ echo "#define __NR_${prefix}${name} $nr"
+ else
+ echo "#define __NR_${prefix}${name} ($offset + $nr)"
+ fi
+ done
+
+ echo ""
+ echo "#endif /* ${fileguard} */"
+) > "$out"
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/syscalls/syscalltbl.sh
new file mode 100644
index 0000000..0e7f8ec
--- /dev/null
+++ b/arch/x86/syscalls/syscalltbl.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+in="$1"
+out="$2"
+
+grep '^[0-9]' "$in" | sort -n | (
+ while read nr abi name entry compat; do
+ abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
+ if [ -n "$compat" ]; then
+ echo "__SYSCALL_${abi}($nr, $entry, $compat)"
+ elif [ -n "$entry" ]; then
+ echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+ fi
+ done
+) > "$out"
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 1d97bd8..b2b54d2 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -6,14 +6,6 @@ menu "UML-specific options"
menu "Host processor type and features"
-config CMPXCHG_LOCAL
- bool
- default n
-
-config CMPXCHG_DOUBLE
- bool
- default n
-
source "arch/x86/Kconfig.cpu"
endmenu
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 8fb5840..5d065b2 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -37,7 +37,8 @@ subarch-$(CONFIG_MODULES) += ../kernel/module.o
USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
extra-y += user-offsets.s
-$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS)
+$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \
+ -Iarch/x86/include/generated
UNPROFILE_OBJS := stub_segv.o
CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b162..2bbe1ec2 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -1,5 +1,15 @@
+#ifndef __SYSDEP_X86_PTRACE_H
+#define __SYSDEP_X86_PTRACE_H
+
#ifdef __i386__
#include "ptrace_32.h"
#else
#include "ptrace_64.h"
#endif
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+ return UPT_SYSCALL_RET(regs);
+}
+
+#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S
deleted file mode 100644
index a7ca80d..0000000
--- a/arch/x86/um/sys_call_table_32.S
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <linux/linkage.h>
-/* Steal i386 syscall table for our purposes, but with some slight changes.*/
-
-#define sys_iopl sys_ni_syscall
-#define sys_ioperm sys_ni_syscall
-
-#define sys_vm86old sys_ni_syscall
-#define sys_vm86 sys_ni_syscall
-
-#define old_mmap sys_old_mmap
-
-#define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
-#define ptregs_iopl sys_iopl
-#define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
-#define ptregs_vm86 sys_vm86
-#define ptregs_sigaltstack sys_sigaltstack
-#define ptregs_vfork sys_vfork
-
-.section .rodata,"a"
-
-#include "../kernel/syscall_table_32.S"
-
-ENTRY(syscall_table_size)
-.long .-sys_call_table
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
new file mode 100644
index 0000000..416bd40
--- /dev/null
+++ b/arch/x86/um/sys_call_table_32.c
@@ -0,0 +1,55 @@
+/*
+ * System call table for UML/i386, copied from arch/x86/kernel/syscall_*.c
+ * with some changes for UML.
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+#include <generated/user_constants.h>
+
+#define __NO_STUBS
+
+/*
+ * Below you can see, in terms of #define's, the differences between the x86-64
+ * and the UML syscall table.
+ */
+
+/* Not going to be implemented by UML, since we have no hardware. */
+#define sys_iopl sys_ni_syscall
+#define sys_ioperm sys_ni_syscall
+
+#define sys_vm86old sys_ni_syscall
+#define sys_vm86 sys_ni_syscall
+
+#define old_mmap sys_old_mmap
+
+#define ptregs_fork sys_fork
+#define ptregs_execve sys_execve
+#define ptregs_iopl sys_iopl
+#define ptregs_vm86old sys_vm86old
+#define ptregs_clone sys_clone
+#define ptregs_vm86 sys_vm86
+#define ptregs_sigaltstack sys_sigaltstack
+#define ptregs_vfork sys_vfork
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#include <asm/syscalls_32.h>
+
+#undef __SYSCALL_I386
+#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
+
+typedef void (*sys_call_ptr_t)(void);
+
+extern void sys_ni_syscall(void);
+
+const sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_32.h>
+};
+
+int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 99522f7..fe626c3 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -1,11 +1,12 @@
/*
- * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
+ * System call table for UML/x86-64, copied from arch/x86/kernel/syscall_*.c
* with some changes for UML.
*/
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <generated/user_constants.h>
#define __NO_STUBS
@@ -34,31 +35,23 @@
#define stub_sigaltstack sys_sigaltstack
#define stub_rt_sigreturn sys_rt_sigreturn
-#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_UNISTD_64_H
-#include "../../x86/include/asm/unistd_64.h"
+#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
+#include <asm/syscalls_64.h>
-#undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
-#undef _ASM_X86_UNISTD_64_H
+#undef __SYSCALL_64
+#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
typedef void (*sys_call_ptr_t)(void);
extern void sys_ni_syscall(void);
-/*
- * We used to have a trick here which made sure that holes in the
- * x86_64 table were filled in with sys_ni_syscall, but a comment in
- * unistd_64.h says that holes aren't allowed, so the trick was
- * removed.
- * The trick looked like this
- * [0 ... UM_NR_syscall_max] = &sys_ni_syscall
- * before including unistd_64.h - the later initializations overwrote
- * the sys_ni_syscall filler.
- */
-
-sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
-#include <asm/unistd_64.h>
+const sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
};
int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ca49be8..5edf4f4 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -8,6 +8,18 @@
#include <asm/ptrace.h>
#include <asm/types.h>
+#ifdef __i386__
+#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+static char syscalls[] = {
+#include <asm/syscalls_32.h>
+};
+#else
+#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
+static char syscalls[] = {
+#include <asm/syscalls_64.h>
+};
+#endif
+
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -77,4 +89,7 @@ void foo(void)
DEFINE(UM_PROT_READ, PROT_READ);
DEFINE(UM_PROT_WRITE, PROT_WRITE);
DEFINE(UM_PROT_EXEC, PROT_EXEC);
+
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ DEFINE(NR_syscalls, sizeof(syscalls));
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fe06bf4..b132ade 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1232,7 +1232,9 @@ asmlinkage void __init xen_start_kernel(void)
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
+#if 0
if (!xen_initial_domain())
+#endif
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
__supported_pte_mask |= _PAGE_IOMAP;
@@ -1295,10 +1297,6 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
- if (!xen_initial_domain())
- __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
- __supported_pte_mask |= _PAGE_IOMAP;
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1a309ee..988828b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -415,13 +415,13 @@ static pteval_t iomap_pte(pteval_t val)
static pteval_t xen_pte_val(pte_t pte)
{
pteval_t pteval = pte.pte;
-
+#if 0
/* If this is a WC pte, convert back from Xen WC to Linux WC */
if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
WARN_ON(!pat_enabled);
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
}
-
+#endif
if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
return pteval;
@@ -463,7 +463,7 @@ void xen_set_pat(u64 pat)
static pte_t xen_make_pte(pteval_t pte)
{
phys_addr_t addr = (pte & PTE_PFN_MASK);
-
+#if 0
/* If Linux is trying to set a WC pte, then map to the Xen WC.
* If _PAGE_PAT is set, then it probably means it is really
* _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
@@ -476,7 +476,7 @@ static pte_t xen_make_pte(pteval_t pte)
if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
}
-
+#endif
/*
* Unprivileged domains are allowed to do IOMAPpings for
* PCI passthrough, but not map ISA space. The ISA
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 449f868..315d8fa 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -409,6 +409,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
+ /*
+ * Balance out the preempt calls - as we are running in cpu_idle
+ * loop which has been called at bootup from cpu_bringup_and_idle.
+ * The cpucpu_bringup_and_idle called cpu_bringup which made a
+ * preempt_disable() So this preempt_enable will balance it out.
+ */
+ preempt_enable();
}
#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cc9b1e1..d69cc6c 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -116,9 +116,26 @@ static inline void spin_time_accum_blocked(u64 start)
}
#endif /* CONFIG_XEN_DEBUG_FS */
+/*
+ * Size struct xen_spinlock so it's the same as arch_spinlock_t.
+ */
+#if NR_CPUS < 256
+typedef u8 xen_spinners_t;
+# define inc_spinners(xl) \
+ asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory");
+# define dec_spinners(xl) \
+ asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory");
+#else
+typedef u16 xen_spinners_t;
+# define inc_spinners(xl) \
+ asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory");
+# define dec_spinners(xl) \
+ asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
+#endif
+
struct xen_spinlock {
unsigned char lock; /* 0 -> free; 1 -> locked */
- unsigned short spinners; /* count of waiting cpus */
+ xen_spinners_t spinners; /* count of waiting cpus */
};
static int xen_spin_is_locked(struct arch_spinlock *lock)
@@ -164,8 +181,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
wmb(); /* set lock of interest before count */
- asm(LOCK_PREFIX " incw %0"
- : "+m" (xl->spinners) : : "memory");
+ inc_spinners(xl);
return prev;
}
@@ -176,8 +192,7 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
*/
static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
{
- asm(LOCK_PREFIX " decw %0"
- : "+m" (xl->spinners) : : "memory");
+ dec_spinners(xl);
wmb(); /* decrement count before restoring lock */
__this_cpu_write(lock_spinners, prev);
}
@@ -373,6 +388,8 @@ void xen_uninit_lock_cpu(int cpu)
void __init xen_init_spinlocks(void)
{
+ BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
+
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
OpenPOWER on IntegriCloud